diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..e69de29bb2 diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000..c6fcc550fc --- /dev/null +++ b/.travis.yml @@ -0,0 +1,10 @@ +language: nix +sudo: false +script: + - nix-build -A test-O3 + - nix-build -A test-O2 + - nix-build -A test-O1 + - nix-build -A test-nojit + - nix-build -A test-capi + - nix-build -A test-libraptorjit + - nix-build -A check-generated-code --arg check true diff --git a/CONDUCT.md b/CONDUCT.md new file mode 100644 index 0000000000..38f2cb8f96 --- /dev/null +++ b/CONDUCT.md @@ -0,0 +1,7 @@ +### Code of conduct + +Follow the [Contributor Covenant](http://contributor-covenant.org/) and report problems to `lukego@gmail.com`. + +Help people achieve their goals. Don't stand between people who want to cooperate. + +Keep people in the loop. Use Github Issues for important discussions. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000..5d56fb834e --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,21 @@ +Thank you for your contribution! + +Please be aware that RaptorJIT is [Apache 2.0 licensed](https://www.apache.org/licenses/LICENSE-2.0) +and when you contribute code you are also releasing it under the same +license. If this is a problem, for example because you are unwilling +or unable to release your code under this license, then please explain +the situation in your pull request so that we can find an appropriate +solution. + +Please do not put individual copyright notices into source files. This +leads to clutter over time and complicates refactoring. If you own the +copyright of your contribution then this will be implicit in the Git +history (as clarified in the COPYING/COPYRIGHT files) and no notice is +needed. If you are not the copyright owner, perhaps due to a copyright +assignment agreement with your employer, then please mention this in +your Pull Request and we will add an appropriate clarification to the +COPYRIGHT file. + +Please also respect the +[Contributor Covenant Code of Conduct](http://contributor-covenant.org/version/1/4/) +in your interactions with the RaptorJIT community. diff --git a/COPYING b/COPYING new file mode 100644 index 0000000000..e1d5871f5c --- /dev/null +++ b/COPYING @@ -0,0 +1,217 @@ +RaptorJIT is a fork of LuaJIT. Code merged from LuaJIT is copyright +Mike Pall and released under the MIT license. + +New code contributed independently to RaptorJIT is licensed under the +Apache License 2.0 and its copyright is retained by the original +authors (except as otherwise noted in this file.) + +Here are the copyright and license terms for the code from LuaJIT: + + LuaJIT is Copyright (C) 2005-2017 Mike Pall. + LuaJIT is free software, released under the MIT license. + See full Copyright Notice in the COPYRIGHT file or in luajit.h. + +Here is the Apache License that applies to all other code (apart from +exceptions noted in this file): + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/COPYRIGHT b/COPYRIGHT index 6ed40025ae..17a037c14e 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,3 +1,10 @@ +RaptorJIT -- a LuaJIT fork for Linux/x86-64. + +Copyright is retained by the individual authors and contributors. + +[ Apache 2.0 license - see the file COPYING. ] + +[ RaptorJIT includes code from LuaJIT 2.1 which has the following statements: ] =============================================================================== LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ diff --git a/Makefile b/Makefile index 489d7e754c..8a0d5b0253 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ ############################################################################## -# LuaJIT top level Makefile for installation. Requires GNU Make. +# RaptorJIT top level Makefile for installation. Requires GNU Make. # # Please read doc/install.html before changing any variables! # @@ -13,10 +13,10 @@ # Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ############################################################################## -MAJVER= 2 -MINVER= 1 +MAJVER= 1 +MINVER= 0 RELVER= 0 -PREREL= -beta2 +PREREL= VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL) ABIVER= 5.1 @@ -33,27 +33,22 @@ DPREFIX= $(DESTDIR)$(PREFIX) INSTALL_BIN= $(DPREFIX)/bin INSTALL_LIB= $(DPREFIX)/$(MULTILIB) INSTALL_SHARE= $(DPREFIX)/share -INSTALL_INC= $(DPREFIX)/include/luajit-$(MAJVER).$(MINVER) +INSTALL_INC= $(DPREFIX)/include/raptorjit-$(MAJVER).$(MINVER) -INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(VERSION) +INSTALL_LJLIBD= $(INSTALL_SHARE)/raptorjit-$(VERSION) INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit -INSTALL_LMODD= $(INSTALL_SHARE)/lua -INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER) -INSTALL_CMODD= $(INSTALL_LIB)/lua -INSTALL_CMOD= $(INSTALL_CMODD)/$(ABIVER) -INSTALL_MAN= $(INSTALL_SHARE)/man/man1 INSTALL_PKGCONFIG= $(INSTALL_LIB)/pkgconfig -INSTALL_TNAME= luajit-$(VERSION) -INSTALL_TSYMNAME= luajit -INSTALL_ANAME= libluajit-$(ABIVER).a -INSTALL_SOSHORT1= libluajit-$(ABIVER).so -INSTALL_SOSHORT2= libluajit-$(ABIVER).so.$(MAJVER) +INSTALL_TNAME= raptorjit-$(VERSION) +INSTALL_TSYMNAME= raptorjit +INSTALL_ANAME= libraptorjit-$(ABIVER).a +INSTALL_SOSHORT1= libraptorjit-$(ABIVER).so +INSTALL_SOSHORT2= libraptorjit-$(ABIVER).so.$(MAJVER) INSTALL_SONAME= $(INSTALL_SOSHORT2).$(MINVER).$(RELVER) -INSTALL_DYLIBSHORT1= libluajit-$(ABIVER).dylib -INSTALL_DYLIBSHORT2= libluajit-$(ABIVER).$(MAJVER).dylib -INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).$(MINVER).$(RELVER).dylib -INSTALL_PCNAME= luajit.pc +INSTALL_DYLIBSHORT1= libraptorjit-$(ABIVER).dylib +INSTALL_DYLIBSHORT2= libraptorjit-$(ABIVER).$(MAJVER).dylib +INSTALL_DYLIBNAME= libraptorjit-$(ABIVER).$(MAJVER).$(MINVER).$(RELVER).dylib +INSTALL_PCNAME= raptorjit.pc INSTALL_STATIC= $(INSTALL_LIB)/$(INSTALL_ANAME) INSTALL_DYN= $(INSTALL_LIB)/$(INSTALL_SONAME) @@ -63,10 +58,9 @@ INSTALL_T= $(INSTALL_BIN)/$(INSTALL_TNAME) INSTALL_TSYM= $(INSTALL_BIN)/$(INSTALL_TSYMNAME) INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME) -INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \ - $(INSTALL_PKGCONFIG) $(INSTALL_JITLIB) $(INSTALL_LMOD) $(INSTALL_CMOD) -UNINSTALL_DIRS= $(INSTALL_JITLIB) $(INSTALL_LJLIBD) $(INSTALL_INC) \ - $(INSTALL_LMOD) $(INSTALL_LMODD) $(INSTALL_CMOD) $(INSTALL_CMODD) +INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) \ + $(INSTALL_JITLIB) +UNINSTALL_DIRS= $(INSTALL_JITLIB) $(INSTALL_LJLIBD) $(INSTALL_INC) RM= rm -f MKDIR= mkdir -p @@ -79,16 +73,12 @@ LDCONFIG= ldconfig -n SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \ -e "s|^multilib=.*|multilib=$(MULTILIB)|" -FILE_T= luajit -FILE_A= libluajit.a -FILE_SO= libluajit.so -FILE_MAN= luajit.1 -FILE_PC= luajit.pc +FILE_T= raptorjit +FILE_A= libraptorjit.a +FILE_SO= libraptorjit.so +FILE_PC= raptorjit.pc FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h -FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ - dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ - dis_ppc.lua dis_mips.lua dis_mipsel.lua dis_mips64.lua \ - dis_mips64el.lua vmdef.lua +FILES_JITLIB= bc.lua bcsave.lua vmdef.lua ifeq (,$(findstring Windows,$(OS))) HOST_SYS:= $(shell uname -s) @@ -106,15 +96,18 @@ endif ############################################################################## -INSTALL_DEP= src/luajit +INSTALL_DEP= src/raptorjit default all $(INSTALL_DEP): - @echo "==== Building LuaJIT $(VERSION) ====" + @echo "==== Building RaptorJIT $(VERSION) ====" $(MAKE) -C src - @echo "==== Successfully built LuaJIT $(VERSION) ====" + @echo "==== Successfully built RaptorJIT $(VERSION) ====" + +reusevm: + $(MAKE) -C src reusevm install: $(INSTALL_DEP) - @echo "==== Installing LuaJIT $(VERSION) to $(PREFIX) ====" + @echo "==== Installing RaptorJIT $(VERSION) to $(PREFIX) ====" $(MKDIR) $(INSTALL_DIRS) cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T) cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || : @@ -124,24 +117,18 @@ install: $(INSTALL_DEP) $(LDCONFIG) $(INSTALL_LIB) && \ $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT1) && \ $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT2) || : - cd etc && $(INSTALL_F) $(FILE_MAN) $(INSTALL_MAN) cd etc && $(SED_PC) $(FILE_PC) > $(FILE_PC).tmp && \ - $(INSTALL_F) $(FILE_PC).tmp $(INSTALL_PC) && \ + $(INSTALL_F) -D $(FILE_PC).tmp $(INSTALL_PC) && \ $(RM) $(FILE_PC).tmp cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) - @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" - @echo "" - @echo "Note: the development releases deliberately do NOT install a symlink for luajit" - @echo "You can do this now by running this command (with sudo):" - @echo "" - @echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)" + $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM) @echo "" uninstall: - @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" - $(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) + @echo "==== Uninstalling RaptorJIT $(VERSION) from $(PREFIX) ====" + $(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) for file in $(FILES_JITLIB); do \ $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ done @@ -150,17 +137,16 @@ uninstall: done $(LDCONFIG) $(INSTALL_LIB) $(RMDIR) $(UNINSTALL_DIRS) || : - @echo "==== Successfully uninstalled LuaJIT $(VERSION) from $(PREFIX) ====" + @echo "==== Successfully uninstalled RaptorJIT $(VERSION) from $(PREFIX) ====" ############################################################################## -amalg: - @echo "Building LuaJIT $(VERSION)" - $(MAKE) -C src amalg - clean: $(MAKE) -C src clean -.PHONY: all install amalg clean +bootstrapclean: + $(MAKE) -C src bootstrapclean + +.PHONY: all install clean ############################################################################## diff --git a/PROFESSIONAL-SUPPORT.md b/PROFESSIONAL-SUPPORT.md new file mode 100644 index 0000000000..30dcd0bccf --- /dev/null +++ b/PROFESSIONAL-SUPPORT.md @@ -0,0 +1,10 @@ +Professional support is available to help you use RaptorJIT +successfully. You can contact the parties listed below if you need +help to design your application; to understand the compiler and the +tools; to fix bugs and optimize code; to maintain a port to a new +platform; etc. + +Links in alphabetical order with one-sentence summaries: + +- [Snabb Solutions](https://snabb.solutions/) offers consulting and standard support agreements for RaptorJIT and related projects. + diff --git a/README b/README deleted file mode 100644 index 719e6118ab..0000000000 --- a/README +++ /dev/null @@ -1,16 +0,0 @@ -README for LuaJIT 2.1.0-beta2 ------------------------------ - -LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. - -Project Homepage: http://luajit.org/ - -LuaJIT is Copyright (C) 2005-2017 Mike Pall. -LuaJIT is free software, released under the MIT license. -See full Copyright Notice in the COPYRIGHT file or in luajit.h. - -Documentation for LuaJIT is available in HTML format. -Please point your favorite browser to: - - doc/luajit.html - diff --git a/README.md b/README.md new file mode 100644 index 0000000000..d7b0247db5 --- /dev/null +++ b/README.md @@ -0,0 +1,205 @@ +

RaptorJIT

+ +[![Build Status](https://travis-ci.org/raptorjit/raptorjit.svg?branch=master)](https://travis-ci.org/raptorjit/raptorjit) + +**RaptorJIT** is a Lua implementation suitable for high-performance +low-level system programming. If you want to use a simple dynamic +language to write a network stack; a hypervisor; a unikernel; a +database; etc, then you have come to the right place. + +RaptorJIT is a fork of [LuaJIT](https://luajit.org/) where we aim to +provide: + +- Ubiquitous tracing and profiling to make application + performance and compiler behaviour transparent to programmers. +- Interactive tools for inspecting and cross-referencing + trace and profiler data ([Studio](https://github.com/studio/studio/)). +- Collaborative and distributed development based on the Linux kernel + fork-and-merge model. + +The most notable technical changes since forking LuaJIT are: + +- Added `auditlog` and `vmprofile` low-overhead ("always on") binary + tracing and profiler logging features. Removed obsoleted tracing + based on introspection including `jit.v`, `jit.dump`, and `jit.p`. +- Reduced code maintenance footprint ~50% by removing `#ifdef` + features that are not required for Linux/x86-64 e.g. Windows + support, 32-bit heap support, and non-x86 backends. This is a + necessary short-term expedient to make the code maintainable while + we bootstrap the project. +- Compiler heuristics tightened to reduce the risk of bytecode + blacklisting causing catastrophic performance drops. +- Started using `git merge` to accept contributions of both code and + development history from other forks. + +RaptorJIT is used successfully by +the [Snabb](https://github.com/snabbco/snabb) community to develop +high-performance production network equipment. Join us! + +### RaptorJIT compilation for users + +Build using LuaJIT to bootstrap the VM: + +```shell +$ make # requires LuaJIT (2.0 or 2.1) to run DynASM +``` + +Build without bootstrapping, when not hacking the VM: + +```shell +$ make reusevm # Reuse reference copy of the generated VM code +$ make # Does not require LuaJIT now +``` + +### Inspecting trace and profiler data interactively + +To understand how your program executes you first produce diagnostic data (*auditlog* and *vmprofile* files) and then you inspect them interactively with [Studio](https://github.com/studio/studio). + +You can produce diagnostic data on the command line: + +```shell +$ raptorjit -a audit.log -p default.vmprofile ... +``` + +Or within your Lua code: + +```lua +jit.auditlog("audit.log") +local vmprofile = require("jit.vmprofile") +vmprofile.open("default.vmprofile") +``` + +Then you can copy the file `audit.log` and `*.vmprofile` into a +directory `/somepath` and inspect that with the Studio script: + +``` +with import ; +raptorjit.inspect /somepath +``` + +Studio will then parse, analyze, cross-reference, etc, the diagnostic +data and present an interactive user-interface for browsing how the +program ran. + +Here are tutorial videos for Studio: + +- [How to load Snabb diagnostic data into Studio](https://www.youtube.com/watch?v=x6e1vFFpq5Q). Covers installing Studio and running a script. (Uses a Snabb-specific mechanism for producing diagnostic data which is implemented in Lua.) +- [Inspecting RaptorJIT IR code with Studio](https://www.youtube.com/watch?v=MQyxXSPXcwg). Covers profiling and inspecting small Lua scripts. Runs Lua code directly from the Studio UI. + +### RaptorJIT compilation for VM hackers + +RaptorJIT uses [Nix](http://nixos.org/nix/) to provide a reference +build environment. You can use Nix to build/test/benchmark RaptorJIT +with suitable versions of all dependencies provided. + +Note: Building with nix will be slow the first time because it +downloads the exact reference versions of the toolchain (gcc, etc) +and all dependencies (glibc, etc). This is all cached for future +builds. + +#### Build with nix + +Install nix: + +``` +$ curl https://nixos.org/nix/install | sh +``` + +Build in batch-mode and run the test suite (option 1a): + +```shell +$ nix-build # produces result/bin/raptorjit +``` + +Build in batch-mode without the test suite (option 1b): + +```shell +$ nix-build -A raptorjit +``` + +Build interactively (option 2): + +```shell +$ nix-shell # start sub-shell with pristine build environment in $PATH +[nix-shell]$ make -j # build manually as many times as you like +[nix-shell]$ exit # quit when done +``` + +#### Build without nix + +```shell +$ make +``` + +... but make sure you have at least `make`, `gcc`, and `luajit` in your `$PATH`. + +### Run the benchmarks + +Nix can also run the full benchmark suite and generate visualizations +with R/ggplot2. + +The simplest incantation tests one branch: + +```shell +$ nix-build testsuite/bench --arg Asrc ./. # note: ./. means ./ +``` + +You can also test several branches (A-E), give them names, specify +command-line arguments, say how many tests to run, and allow parallel +execution: + +```shell +# Run the benchmarks and create result visualizations result/ +$ nix-build testsuite/bench \ + --arg Asrc ~/git/raptorjit \ + --argstr Aname master \ + --arg Bsrc ~/git/raptorjit-hack \ + --argstr Bname hacked \ + --arg Csrc ~/git/raptorjit-hack2 \ + --argstr Cname hacked-O1 \ + --argstr Cargs -O1 \ + --arg runs 100 \ + -j 5 # Run up to 5 tests in parallel +``` + +If you are using a distributed nix environment such +as [Hydra](https://nixos.org/hydra/) then the tests can be +automatically parallelized and distributed across a suitable build +farm. + +### Optimization resources + +These are the authoritative optimization resources for processors +supported by RaptorJIT. If you are confused by references to CPU +details in discussions then these are the places to look for answers. + +- [Computer Architecture: A Quantitative Approach](https://www.amazon.com/Computer-Architecture-Fifth-Quantitative-Approach/dp/012383872X) by Hennessy and Patterson. +- [Intel Architectures Optimization Reference Manual](http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-optimization-manual.html). +- Agner Fog's [software optimization resources](http://www.agner.org/optimize/): + - [Instruction latency and throughput tables](http://www.agner.org/optimize/instruction_tables.pdf). + - [Microarchitecture of Intel, AMD, and VIA CPUs](http://www.agner.org/optimize/microarchitecture.pdf). + - [Optimizing subroutines in assembly language for x86](http://www.agner.org/optimize/optimizing_assembly.pdf). + +The [AnandTech review of the Haswell microarchitecture](http://www.anandtech.com/show/6355/intels-haswell-architecture) is also excellent lighter reading. + +### Quotes + +Here are some borrowed words to put this branch into context: + +> I'm outta here in a couple of days. Good luck. You'll need it. +> _[Mike Pall](http://www.freelists.org/post/luajit/Turning-Lua-into-C-was-alleviate-the-load-of-the-GC)_ + +> Optimal code is not optimal to maintain. _[Vyacheslav Egorov](https://www.youtube.com/watch?v=EaLboOUG9VQ)_ + +> If a programmer is indispensable, get rid of him as quickly as possible. _[Gerald M. Weinberg](https://www.amazon.com/Psychology-Computer-Programming-Silver-Anniversary/dp/0932633420)_ + +> If a system is to serve the creative spirit, it +> must be entirely comprehensible to a single individual. _[Dan +> Ingalls](https://www.cs.virginia.edu/~evans/cs655/readings/smalltalk.html)_ + +> The competent programmer is fully aware of the strictly limited size of his own skull; therefore he approaches the programming task in full humility, and among other things he avoids clever tricks like the plague. _[E.W. Dijkstra](https://www.cs.utexas.edu/~EWD/transcriptions/EWD03xx/EWD340.html)_ + +> There are two ways of constructing a software design: One way is to make it so simple that there are obviously no deficiencies, and the other way is to make it so complicated that there are no obvious deficiencies. The first method is far more difficult. _[C.A.R. Hoare](http://zoo.cs.yale.edu/classes/cs422/2014/bib/hoare81emperor.pdf)_ + +> Everyone knows that debugging is twice as hard as writing a program in the first place. So if you're as clever as you can be when you write it, how will you ever debug it? _[Brian Kernighan](http://www2.ing.unipi.it/~a009435/issw/extra/kp_elems_of_pgmng_sty.pdf)_ + diff --git a/check-generated-code.nix b/check-generated-code.nix new file mode 100644 index 0000000000..78ff51b2f8 --- /dev/null +++ b/check-generated-code.nix @@ -0,0 +1,20 @@ +# Check that generated sources match the repo version. +{ pkgs, raptorjit }: +with pkgs; with lib; + +# Generated files that are kept in tree. +let generatedFiles = + "lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h host/buildvm_arch.h"; +in + +overrideDerivation raptorjit (as: + { + checkPhase = '' + for f in ${generatedFiles}; do + echo "checking $f.." + diff -u src/reusevm/$f src/$f + done + echo "all files ok" + ''; + doCheck = true; + }) diff --git a/default.nix b/default.nix new file mode 100644 index 0000000000..59500d4725 --- /dev/null +++ b/default.nix @@ -0,0 +1,48 @@ +# default.nix - define the build environment for RaptorJIT +# +# This file can be used by 'nix-build' or 'nix-shell' to create a +# pristine build environment with precisely the expected software in +# $PATH. This makes it possible to build raptorjit in the same way on +# any machine. +# +# See README.md for usage instructions. + +{ pkgs ? (import ./pkgs.nix) {} +, source ? pkgs.lib.cleanSource ./. +, version ? "dev" +, check ? false }: + +let + callPackage = (pkgs.lib.callPackageWith { inherit pkgs source version; }); + raptorjit = (callPackage ./raptorjit.nix {}); + raptorjit-assert = raptorjit.overrideAttrs( + old: { NIX_CFLAGS_COMPILE = " -DLUA_USE_ASSERT"; }); + test = name: args: (callPackage ./test.nix { inherit name args; raptorjit = raptorjit-assert; }); + test-capi = callPackage ./test-capi.nix { inherit pkgs raptorjit; }; + test-libraptorjit = callPackage ./test-libraptorjit.nix { inherit pkgs raptorjit; }; + check-generated-code = (callPackage ./check-generated-code.nix { inherit raptorjit; }); + nowarnings = raptorjit.overrideAttrs( + old: { NIX_CFLAGS_COMPILE = "-Werror"; }); +in + +# Build RaptorJIT and run multiple test suites. +{ + raptorjit = raptorjit; + test-O3 = test "O3" "-O3"; + test-O2 = test "O2" "-O2"; + test-O1 = test "O1" "-O1"; + test-nojit = test "nojit" "-joff"; + test-capi = test-capi; + test-libraptorjit = test-libraptorjit; + # Test that generated bytecode is compatible + test-bytecode-compat = pkgs.runCommand "test-bytecode-compat" + { buildInputs = [ raptorjit ]; } + '' + echo "print('ok')" > test.lua + raptorjit -bg test.lua test.raw + raptorjit test.raw | grep ok + touch $out + ''; +} // +(if check then { inherit nowarnings check-generated-code; } else {}) + diff --git a/doc/changes.html b/doc/changes.html index 426b18f783..44e03293c3 100644 --- a/doc/changes.html +++ b/doc/changes.html @@ -44,8 +44,6 @@

LuaJIT Change History

jit.* Library
  • Lua/C API -
  • -Profiler
  • Status diff --git a/doc/contact.html b/doc/contact.html index fe4751c0ea..5e07bded58 100644 --- a/doc/contact.html +++ b/doc/contact.html @@ -41,8 +41,6 @@

    Contact

    jit.* Library
  • Lua/C API -
  • -Profiler
  • Status diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html index ad462c63e5..041a722009 100644 --- a/doc/ext_c_api.html +++ b/doc/ext_c_api.html @@ -41,8 +41,6 @@

    Lua/C API Extensions

    jit.* Library
  • Lua/C API -
  • -Profiler
  • Status diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html index 5e1daaf54b..d48d77fae3 100644 --- a/doc/ext_ffi.html +++ b/doc/ext_ffi.html @@ -41,8 +41,6 @@

    FFI Library

    jit.* Library
  • Lua/C API -
  • -Profiler
  • Status diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html index 91af2e1d43..5cedcf0e7e 100644 --- a/doc/ext_ffi_api.html +++ b/doc/ext_ffi_api.html @@ -46,8 +46,6 @@

    ffi.* API Functions

    jit.* Library
  • Lua/C API -
  • -Profiler
  • Status diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html index bba03b7b43..f7480be0dd 100644 --- a/doc/ext_ffi_semantics.html +++ b/doc/ext_ffi_semantics.html @@ -46,8 +46,6 @@

    FFI Semantics

    jit.* Library
  • Lua/C API -
  • -Profiler
  • Status @@ -864,7 +862,7 @@

    Parameterized Types

    The main use for parameterized types are libraries implementing abstract data types -(» example), +(example), similar to what can be achieved with C++ template metaprogramming. Another use case are derived types of anonymous structs, which avoids pollution of the global struct namespace. @@ -1215,19 +1213,17 @@

    Current Status

    The JIT compiler already handles a large subset of all FFI operations. It automatically falls back to the interpreter for unimplemented -operations (you can check for this with the --jv command line option). +operations. The following operations are currently not compiled and may exhibit suboptimal performance, especially when used in inner loops:

  • Status diff --git a/doc/ext_jit.html b/doc/ext_jit.html index e4088bcbd2..0b0924a896 100644 --- a/doc/ext_jit.html +++ b/doc/ext_jit.html @@ -41,8 +41,6 @@

    jit.* Library

    jit.* Library
  • Lua/C API -
  • -Profiler
  • Status @@ -176,17 +174,6 @@

    jit.opt.* — JIT compiler optimization control -

    jit.util.* — JIT compiler introspection

    -

    -This sub-module holds functions to introspect the bytecode, generated -traces, the IR and the generated machine code. The functionality -provided by this module is still in flux and therefore undocumented. -

    -

    -The debug modules -jbc, -jv and -jdump make -extensive use of these functions. Please check out their source code, -if you want to know more. -


  • Lua/C API -
  • -Profiler
  • Status @@ -151,11 +149,6 @@

    C API extensions

    extra functions to the Lua/C API.

    -

    Profiler

    -

    -LuaJIT has an integrated profiler. -

    -

    Enhanced Standard Library Functions

    xpcall(f, err [,args...]) passes arguments

    @@ -312,6 +305,26 @@

    Extensions from Lua 5.2

  • debug.getupvalue() and debug.setupvalue() handle C functions.
  • debug.upvalueid() and debug.upvaluejoin().
  • +
  • Lua/C API extensions: +lua_version() +lua_upvalueid() +lua_upvaluejoin() +lua_loadx() +lua_copy() +lua_tonumberx() +lua_tointegerx() +luaL_fileresult() +luaL_execresult() +luaL_loadfilex() +luaL_loadbufferx() +luaL_traceback() +luaL_setfuncs() +luaL_pushmodule() +luaL_newlibtable() +luaL_newlib() +luaL_testudata() +luaL_setmetatable() +
  • Command line option -E.
  • Command line checks __tostring for errors.
  • @@ -338,6 +351,7 @@

    Extensions from Lua 5.2

  • debug.getuservalue() and debug.setuservalue().
  • Remove math.mod(), string.gfind().
  • package.searchers.
  • +
  • module() returns the module table.
  • Note: this provides only partial compatibility with Lua 5.2 at the @@ -355,6 +369,9 @@

    Extensions from Lua 5.3

  • io.read() and file:read() accept formats with or without a leading *.
  • table.move(a1, f, e, t [,a2]).
  • coroutine.isyieldable().
  • +
  • Lua/C API extensions: +lua_isyieldable() +
  • C++ Exception Interoperability

    diff --git a/doc/faq.html b/doc/faq.html index 2c930743dd..afeff940c4 100644 --- a/doc/faq.html +++ b/doc/faq.html @@ -44,8 +44,6 @@

    Frequently Asked Questions (FAQ)

    jit.* Library
  • Lua/C API -
  • -Profiler
  • Status diff --git a/doc/install.html b/doc/install.html index 851f910afa..97c5f27925 100644 --- a/doc/install.html +++ b/doc/install.html @@ -69,8 +69,6 @@

    Installation

    jit.* Library
  • Lua/C API -
  • -Profiler
  • Status diff --git a/doc/luajit.html b/doc/luajit.html index ef5b824c1b..7202f73f2b 100644 --- a/doc/luajit.html +++ b/doc/luajit.html @@ -126,8 +126,6 @@

    LuaJIT

    jit.* Library
  • Lua/C API -
  • -Profiler
  • Status diff --git a/doc/raptorjit.eps.bz2 b/doc/raptorjit.eps.bz2 new file mode 100644 index 0000000000..a006db84ce Binary files /dev/null and b/doc/raptorjit.eps.bz2 differ diff --git a/doc/raptorjit.png b/doc/raptorjit.png new file mode 100644 index 0000000000..2d5fc52c5c Binary files /dev/null and b/doc/raptorjit.png differ diff --git a/doc/running.html b/doc/running.html index 64f0491647..eb43075a4d 100644 --- a/doc/running.html +++ b/doc/running.html @@ -63,8 +63,6 @@

    Running LuaJIT

    jit.* Library
  • Lua/C API -
  • -Profiler
  • Status @@ -162,14 +160,10 @@

    -b[options] input output

    # Link test.obj with your application and load it with require("test") -

    -j cmd[=arg[,arg...]]

    +

    -j cmd

    -This option performs a LuaJIT control command or activates one of the -loadable extension modules. The command is first looked up in the -jit.* library. If no matching function is found, a module -named jit.<cmd> is loaded and the start() -function of the module is called with the specified arguments (if -any). The space between -j and cmd is optional. +This option performs a LuaJIT control command from the jit.* +library.

    Here are the available LuaJIT control commands: @@ -178,20 +172,7 @@

    -j cmd[=arg[,arg...]]

  • -jon — Turns the JIT compiler on (default).
  • -joff — Turns the JIT compiler off (only use the interpreter).
  • -jflush — Flushes the whole cache of compiled code.
  • -
  • -jv — Shows verbose information about the progress of the JIT compiler.
  • -
  • -jdump — Dumps the code and structures used in various compiler stages.
  • -
  • -jp — Start the integrated profiler.
  • -

    -The -jv and -jdump commands are extension modules -written in Lua. They are mainly used for debugging the JIT compiler -itself. For a description of their options and output format, please -read the comment block at the start of their source. -They can be found in the lib directory of the source -distribution or installed under the jit directory. By default -this is /usr/local/share/luajit-2.0.4/jit on POSIX -systems. -

    -O[level]
    -O[+]flag   -O-flag
    diff --git a/doc/status.html b/doc/status.html index cad6ca6546..6619ebb7db 100644 --- a/doc/status.html +++ b/doc/status.html @@ -44,8 +44,6 @@

    Status

    jit.* Library
  • Lua/C API -
  • -Profiler
  • Status diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h deleted file mode 100644 index a43f7c6645..0000000000 --- a/dynasm/dasm_arm.h +++ /dev/null @@ -1,456 +0,0 @@ -/* -** DynASM ARM encoding engine. -** Copyright (C) 2005-2017 Mike Pall. All rights reserved. -** Released under the MIT license. See dynasm.lua for full copyright notice. -*/ - -#include -#include -#include -#include - -#define DASM_ARCH "arm" - -#ifndef DASM_EXTERN -#define DASM_EXTERN(a,b,c,d) 0 -#endif - -/* Action definitions. */ -enum { - DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, - /* The following actions need a buffer position. */ - DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, - /* The following actions also have an argument. */ - DASM_REL_PC, DASM_LABEL_PC, - DASM_IMM, DASM_IMM12, DASM_IMM16, DASM_IMML8, DASM_IMML12, DASM_IMMV8, - DASM__MAX -}; - -/* Maximum number of section buffer positions for a single dasm_put() call. */ -#define DASM_MAXSECPOS 25 - -/* DynASM encoder status codes. Action list offset or number are or'ed in. */ -#define DASM_S_OK 0x00000000 -#define DASM_S_NOMEM 0x01000000 -#define DASM_S_PHASE 0x02000000 -#define DASM_S_MATCH_SEC 0x03000000 -#define DASM_S_RANGE_I 0x11000000 -#define DASM_S_RANGE_SEC 0x12000000 -#define DASM_S_RANGE_LG 0x13000000 -#define DASM_S_RANGE_PC 0x14000000 -#define DASM_S_RANGE_REL 0x15000000 -#define DASM_S_UNDEF_LG 0x21000000 -#define DASM_S_UNDEF_PC 0x22000000 - -/* Macros to convert positions (8 bit section + 24 bit index). */ -#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) -#define DASM_POS2BIAS(pos) ((pos)&0xff000000) -#define DASM_SEC2POS(sec) ((sec)<<24) -#define DASM_POS2SEC(pos) ((pos)>>24) -#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) - -/* Action list type. */ -typedef const unsigned int *dasm_ActList; - -/* Per-section structure. */ -typedef struct dasm_Section { - int *rbuf; /* Biased buffer pointer (negative section bias). */ - int *buf; /* True buffer pointer. */ - size_t bsize; /* Buffer size in bytes. */ - int pos; /* Biased buffer position. */ - int epos; /* End of biased buffer position - max single put. */ - int ofs; /* Byte offset into section. */ -} dasm_Section; - -/* Core structure holding the DynASM encoding state. */ -struct dasm_State { - size_t psize; /* Allocated size of this structure. */ - dasm_ActList actionlist; /* Current actionlist pointer. */ - int *lglabels; /* Local/global chain/pos ptrs. */ - size_t lgsize; - int *pclabels; /* PC label chains/pos ptrs. */ - size_t pcsize; - void **globals; /* Array of globals (bias -10). */ - dasm_Section *section; /* Pointer to active section. */ - size_t codesize; /* Total size of all code sections. */ - int maxsection; /* 0 <= sectionidx < maxsection. */ - int status; /* Status code. */ - dasm_Section sections[1]; /* All sections. Alloc-extended. */ -}; - -/* The size of the core structure depends on the max. number of sections. */ -#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) - - -/* Initialize DynASM state. */ -void dasm_init(Dst_DECL, int maxsection) -{ - dasm_State *D; - size_t psz = 0; - int i; - Dst_REF = NULL; - DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); - D = Dst_REF; - D->psize = psz; - D->lglabels = NULL; - D->lgsize = 0; - D->pclabels = NULL; - D->pcsize = 0; - D->globals = NULL; - D->maxsection = maxsection; - for (i = 0; i < maxsection; i++) { - D->sections[i].buf = NULL; /* Need this for pass3. */ - D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); - D->sections[i].bsize = 0; - D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ - } -} - -/* Free DynASM state. */ -void dasm_free(Dst_DECL) -{ - dasm_State *D = Dst_REF; - int i; - for (i = 0; i < D->maxsection; i++) - if (D->sections[i].buf) - DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); - if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); - if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); - DASM_M_FREE(Dst, D, D->psize); -} - -/* Setup global label array. Must be called before dasm_setup(). */ -void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) -{ - dasm_State *D = Dst_REF; - D->globals = gl - 10; /* Negative bias to compensate for locals. */ - DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); -} - -/* Grow PC label array. Can be called after dasm_setup(), too. */ -void dasm_growpc(Dst_DECL, unsigned int maxpc) -{ - dasm_State *D = Dst_REF; - size_t osz = D->pcsize; - DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); - memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); -} - -/* Setup encoder. */ -void dasm_setup(Dst_DECL, const void *actionlist) -{ - dasm_State *D = Dst_REF; - int i; - D->actionlist = (dasm_ActList)actionlist; - D->status = DASM_S_OK; - D->section = &D->sections[0]; - memset((void *)D->lglabels, 0, D->lgsize); - if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); - for (i = 0; i < D->maxsection; i++) { - D->sections[i].pos = DASM_SEC2POS(i); - D->sections[i].ofs = 0; - } -} - - -#ifdef DASM_CHECKS -#define CK(x, st) \ - do { if (!(x)) { \ - D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) -#define CKPL(kind, st) \ - do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ - D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) -#else -#define CK(x, st) ((void)0) -#define CKPL(kind, st) ((void)0) -#endif - -static int dasm_imm12(unsigned int n) -{ - int i; - for (i = 0; i < 16; i++, n = (n << 2) | (n >> 30)) - if (n <= 255) return (int)(n + (i << 8)); - return -1; -} - -/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ -void dasm_put(Dst_DECL, int start, ...) -{ - va_list ap; - dasm_State *D = Dst_REF; - dasm_ActList p = D->actionlist + start; - dasm_Section *sec = D->section; - int pos = sec->pos, ofs = sec->ofs; - int *b; - - if (pos >= sec->epos) { - DASM_M_GROW(Dst, int, sec->buf, sec->bsize, - sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); - sec->rbuf = sec->buf - DASM_POS2BIAS(pos); - sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); - } - - b = sec->rbuf; - b[pos++] = start; - - va_start(ap, start); - while (1) { - unsigned int ins = *p++; - unsigned int action = (ins >> 16); - if (action >= DASM__MAX) { - ofs += 4; - } else { - int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; - switch (action) { - case DASM_STOP: goto stop; - case DASM_SECTION: - n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); - D->section = &D->sections[n]; goto stop; - case DASM_ESC: p++; ofs += 4; break; - case DASM_REL_EXT: break; - case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; - case DASM_REL_LG: - n = (ins & 2047) - 10; pl = D->lglabels + n; - /* Bkwd rel or global. */ - if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } - pl += 10; n = *pl; - if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ - goto linkrel; - case DASM_REL_PC: - pl = D->pclabels + n; CKPL(pc, PC); - putrel: - n = *pl; - if (n < 0) { /* Label exists. Get label pos and store it. */ - b[pos] = -n; - } else { - linkrel: - b[pos] = n; /* Else link to rel chain, anchored at label. */ - *pl = pos; - } - pos++; - break; - case DASM_LABEL_LG: - pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; - case DASM_LABEL_PC: - pl = D->pclabels + n; CKPL(pc, PC); - putlabel: - n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ - while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; - } - *pl = -pos; /* Label exists now. */ - b[pos++] = ofs; /* Store pass1 offset estimate. */ - break; - case DASM_IMM: - case DASM_IMM16: -#ifdef DASM_CHECKS - CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); - if ((ins & 0x8000)) - CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); - else - CK((n>>((ins>>5)&31)) == 0, RANGE_I); -#endif - b[pos++] = n; - break; - case DASM_IMMV8: - CK((n & 3) == 0, RANGE_I); - n >>= 2; - case DASM_IMML8: - case DASM_IMML12: - CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) : - (((-n)>>((ins>>5)&31)) == 0), RANGE_I); - b[pos++] = n; - break; - case DASM_IMM12: - CK(dasm_imm12((unsigned int)n) != -1, RANGE_I); - b[pos++] = n; - break; - } - } - } -stop: - va_end(ap); - sec->pos = pos; - sec->ofs = ofs; -} -#undef CK - -/* Pass 2: Link sections, shrink aligns, fix label offsets. */ -int dasm_link(Dst_DECL, size_t *szp) -{ - dasm_State *D = Dst_REF; - int secnum; - int ofs = 0; - -#ifdef DASM_CHECKS - *szp = 0; - if (D->status != DASM_S_OK) return D->status; - { - int pc; - for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) - if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; - } -#endif - - { /* Handle globals not defined in this translation unit. */ - int idx; - for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { - int n = D->lglabels[idx]; - /* Undefined label: Collapse rel chain and replace with marker (< 0). */ - while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } - } - } - - /* Combine all code sections. No support for data sections (yet). */ - for (secnum = 0; secnum < D->maxsection; secnum++) { - dasm_Section *sec = D->sections + secnum; - int *b = sec->rbuf; - int pos = DASM_SEC2POS(secnum); - int lastpos = sec->pos; - - while (pos != lastpos) { - dasm_ActList p = D->actionlist + b[pos++]; - while (1) { - unsigned int ins = *p++; - unsigned int action = (ins >> 16); - switch (action) { - case DASM_STOP: case DASM_SECTION: goto stop; - case DASM_ESC: p++; break; - case DASM_REL_EXT: break; - case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; - case DASM_REL_LG: case DASM_REL_PC: pos++; break; - case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; - case DASM_IMM: case DASM_IMM12: case DASM_IMM16: - case DASM_IMML8: case DASM_IMML12: case DASM_IMMV8: pos++; break; - } - } - stop: (void)0; - } - ofs += sec->ofs; /* Next section starts right after current section. */ - } - - D->codesize = ofs; /* Total size of all code sections */ - *szp = ofs; - return DASM_S_OK; -} - -#ifdef DASM_CHECKS -#define CK(x, st) \ - do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) -#else -#define CK(x, st) ((void)0) -#endif - -/* Pass 3: Encode sections. */ -int dasm_encode(Dst_DECL, void *buffer) -{ - dasm_State *D = Dst_REF; - char *base = (char *)buffer; - unsigned int *cp = (unsigned int *)buffer; - int secnum; - - /* Encode all code sections. No support for data sections (yet). */ - for (secnum = 0; secnum < D->maxsection; secnum++) { - dasm_Section *sec = D->sections + secnum; - int *b = sec->buf; - int *endb = sec->rbuf + sec->pos; - - while (b != endb) { - dasm_ActList p = D->actionlist + *b++; - while (1) { - unsigned int ins = *p++; - unsigned int action = (ins >> 16); - int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; - switch (action) { - case DASM_STOP: case DASM_SECTION: goto stop; - case DASM_ESC: *cp++ = *p++; break; - case DASM_REL_EXT: - n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048)); - goto patchrel; - case DASM_ALIGN: - ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; - break; - case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); - case DASM_REL_PC: - CK(n >= 0, UNDEF_PC); - n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4; - patchrel: - if ((ins & 0x800) == 0) { - CK((n & 3) == 0 && ((n+0x02000000) >> 26) == 0, RANGE_REL); - cp[-1] |= ((n >> 2) & 0x00ffffff); - } else if ((ins & 0x1000)) { - CK((n & 3) == 0 && -256 <= n && n <= 256, RANGE_REL); - goto patchimml8; - } else if ((ins & 0x2000) == 0) { - CK((n & 3) == 0 && -4096 <= n && n <= 4096, RANGE_REL); - goto patchimml; - } else { - CK((n & 3) == 0 && -1020 <= n && n <= 1020, RANGE_REL); - n >>= 2; - goto patchimml; - } - break; - case DASM_LABEL_LG: - ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); - break; - case DASM_LABEL_PC: break; - case DASM_IMM: - cp[-1] |= ((n>>((ins>>10)&31)) & ((1<<((ins>>5)&31))-1)) << (ins&31); - break; - case DASM_IMM12: - cp[-1] |= dasm_imm12((unsigned int)n); - break; - case DASM_IMM16: - cp[-1] |= ((n & 0xf000) << 4) | (n & 0x0fff); - break; - case DASM_IMML8: patchimml8: - cp[-1] |= n >= 0 ? (0x00800000 | (n & 0x0f) | ((n & 0xf0) << 4)) : - ((-n & 0x0f) | ((-n & 0xf0) << 4)); - break; - case DASM_IMML12: case DASM_IMMV8: patchimml: - cp[-1] |= n >= 0 ? (0x00800000 | n) : (-n); - break; - default: *cp++ = ins; break; - } - } - stop: (void)0; - } - } - - if (base + D->codesize != (char *)cp) /* Check for phase errors. */ - return DASM_S_PHASE; - return DASM_S_OK; -} -#undef CK - -/* Get PC label offset. */ -int dasm_getpclabel(Dst_DECL, unsigned int pc) -{ - dasm_State *D = Dst_REF; - if (pc*sizeof(int) < D->pcsize) { - int pos = D->pclabels[pc]; - if (pos < 0) return *DASM_POS2PTR(D, -pos); - if (pos > 0) return -1; /* Undefined. */ - } - return -2; /* Unused or out of range. */ -} - -#ifdef DASM_CHECKS -/* Optional sanity checker to call between isolated encoding steps. */ -int dasm_checkstep(Dst_DECL, int secmatch) -{ - dasm_State *D = Dst_REF; - if (D->status == DASM_S_OK) { - int i; - for (i = 1; i <= 9; i++) { - if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } - D->lglabels[i] = 0; - } - } - if (D->status == DASM_S_OK && secmatch >= 0 && - D->section != &D->sections[secmatch]) - D->status = DASM_S_MATCH_SEC|(D->section-D->sections); - return D->status; -} -#endif - diff --git a/dynasm/dasm_arm.lua b/dynasm/dasm_arm.lua deleted file mode 100644 index 32f595afff..0000000000 --- a/dynasm/dasm_arm.lua +++ /dev/null @@ -1,1125 +0,0 @@ ------------------------------------------------------------------------------- --- DynASM ARM module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------- - --- Module information: -local _info = { - arch = "arm", - description = "DynASM ARM module", - version = "1.4.0", - vernum = 10400, - release = "2015-10-18", - author = "Mike Pall", - license = "MIT", -} - --- Exported glue functions for the arch-specific module. -local _M = { _info = _info } - --- Cache library functions. -local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs -local assert, setmetatable, rawget = assert, setmetatable, rawget -local _s = string -local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char -local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub -local concat, sort, insert = table.concat, table.sort, table.insert -local bit = bit or require("bit") -local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift -local ror, tohex = bit.ror, bit.tohex - --- Inherited tables and callbacks. -local g_opt, g_arch -local wline, werror, wfatal, wwarn - --- Action name list. --- CHECK: Keep this in sync with the C code! -local action_names = { - "STOP", "SECTION", "ESC", "REL_EXT", - "ALIGN", "REL_LG", "LABEL_LG", - "REL_PC", "LABEL_PC", "IMM", "IMM12", "IMM16", "IMML8", "IMML12", "IMMV8", -} - --- Maximum number of section buffer positions for dasm_put(). --- CHECK: Keep this in sync with the C code! -local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. - --- Action name -> action number. -local map_action = {} -for n,name in ipairs(action_names) do - map_action[name] = n-1 -end - --- Action list buffer. -local actlist = {} - --- Argument list for next dasm_put(). Start with offset 0 into action list. -local actargs = { 0 } - --- Current number of section buffer positions for dasm_put(). -local secpos = 1 - ------------------------------------------------------------------------------- - --- Dump action names and numbers. -local function dumpactions(out) - out:write("DynASM encoding engine action codes:\n") - for n,name in ipairs(action_names) do - local num = map_action[name] - out:write(format(" %-10s %02X %d\n", name, num, num)) - end - out:write("\n") -end - --- Write action list buffer as a huge static C array. -local function writeactions(out, name) - local nn = #actlist - if nn == 0 then nn = 1; actlist[0] = map_action.STOP end - out:write("static const unsigned int ", name, "[", nn, "] = {\n") - for i = 1,nn-1 do - assert(out:write("0x", tohex(actlist[i]), ",\n")) - end - assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) -end - ------------------------------------------------------------------------------- - --- Add word to action list. -local function wputxw(n) - assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") - actlist[#actlist+1] = n -end - --- Add action to list with optional arg. Advance buffer pos, too. -local function waction(action, val, a, num) - local w = assert(map_action[action], "bad action name `"..action.."'") - wputxw(w * 0x10000 + (val or 0)) - if a then actargs[#actargs+1] = a end - if a or num then secpos = secpos + (num or 1) end -end - --- Flush action list (intervening C code or buffer pos overflow). -local function wflush(term) - if #actlist == actargs[1] then return end -- Nothing to flush. - if not term then waction("STOP") end -- Terminate action list. - wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) - actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). - secpos = 1 -- The actionlist offset occupies a buffer position, too. -end - --- Put escaped word. -local function wputw(n) - if n <= 0x000fffff then waction("ESC") end - wputxw(n) -end - --- Reserve position for word. -local function wpos() - local pos = #actlist+1 - actlist[pos] = "" - return pos -end - --- Store word to reserved position. -local function wputpos(pos, n) - assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") - if n <= 0x000fffff then - insert(actlist, pos+1, n) - n = map_action.ESC * 0x10000 - end - actlist[pos] = n -end - ------------------------------------------------------------------------------- - --- Global label name -> global label number. With auto assignment on 1st use. -local next_global = 20 -local map_global = setmetatable({}, { __index = function(t, name) - if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end - local n = next_global - if n > 2047 then werror("too many global labels") end - next_global = n + 1 - t[name] = n - return n -end}) - --- Dump global labels. -local function dumpglobals(out, lvl) - local t = {} - for name, n in pairs(map_global) do t[n] = name end - out:write("Global labels:\n") - for i=20,next_global-1 do - out:write(format(" %s\n", t[i])) - end - out:write("\n") -end - --- Write global label enum. -local function writeglobals(out, prefix) - local t = {} - for name, n in pairs(map_global) do t[n] = name end - out:write("enum {\n") - for i=20,next_global-1 do - out:write(" ", prefix, t[i], ",\n") - end - out:write(" ", prefix, "_MAX\n};\n") -end - --- Write global label names. -local function writeglobalnames(out, name) - local t = {} - for name, n in pairs(map_global) do t[n] = name end - out:write("static const char *const ", name, "[] = {\n") - for i=20,next_global-1 do - out:write(" \"", t[i], "\",\n") - end - out:write(" (const char *)0\n};\n") -end - ------------------------------------------------------------------------------- - --- Extern label name -> extern label number. With auto assignment on 1st use. -local next_extern = 0 -local map_extern_ = {} -local map_extern = setmetatable({}, { __index = function(t, name) - -- No restrictions on the name for now. - local n = next_extern - if n > 2047 then werror("too many extern labels") end - next_extern = n + 1 - t[name] = n - map_extern_[n] = name - return n -end}) - --- Dump extern labels. -local function dumpexterns(out, lvl) - out:write("Extern labels:\n") - for i=0,next_extern-1 do - out:write(format(" %s\n", map_extern_[i])) - end - out:write("\n") -end - --- Write extern label names. -local function writeexternnames(out, name) - out:write("static const char *const ", name, "[] = {\n") - for i=0,next_extern-1 do - out:write(" \"", map_extern_[i], "\",\n") - end - out:write(" (const char *)0\n};\n") -end - ------------------------------------------------------------------------------- - --- Arch-specific maps. - --- Ext. register name -> int. name. -local map_archdef = { sp = "r13", lr = "r14", pc = "r15", } - --- Int. register name -> ext. name. -local map_reg_rev = { r13 = "sp", r14 = "lr", r15 = "pc", } - -local map_type = {} -- Type name -> { ctype, reg } -local ctypenum = 0 -- Type number (for Dt... macros). - --- Reverse defines for registers. -function _M.revdef(s) - return map_reg_rev[s] or s -end - -local map_shift = { lsl = 0, lsr = 1, asr = 2, ror = 3, } - -local map_cond = { - eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, - hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, - hs = 2, lo = 3, -} - ------------------------------------------------------------------------------- - --- Template strings for ARM instructions. -local map_op = { - -- Basic data processing instructions. - and_3 = "e0000000DNPs", - eor_3 = "e0200000DNPs", - sub_3 = "e0400000DNPs", - rsb_3 = "e0600000DNPs", - add_3 = "e0800000DNPs", - adc_3 = "e0a00000DNPs", - sbc_3 = "e0c00000DNPs", - rsc_3 = "e0e00000DNPs", - tst_2 = "e1100000NP", - teq_2 = "e1300000NP", - cmp_2 = "e1500000NP", - cmn_2 = "e1700000NP", - orr_3 = "e1800000DNPs", - mov_2 = "e1a00000DPs", - bic_3 = "e1c00000DNPs", - mvn_2 = "e1e00000DPs", - - and_4 = "e0000000DNMps", - eor_4 = "e0200000DNMps", - sub_4 = "e0400000DNMps", - rsb_4 = "e0600000DNMps", - add_4 = "e0800000DNMps", - adc_4 = "e0a00000DNMps", - sbc_4 = "e0c00000DNMps", - rsc_4 = "e0e00000DNMps", - tst_3 = "e1100000NMp", - teq_3 = "e1300000NMp", - cmp_3 = "e1500000NMp", - cmn_3 = "e1700000NMp", - orr_4 = "e1800000DNMps", - mov_3 = "e1a00000DMps", - bic_4 = "e1c00000DNMps", - mvn_3 = "e1e00000DMps", - - lsl_3 = "e1a00000DMws", - lsr_3 = "e1a00020DMws", - asr_3 = "e1a00040DMws", - ror_3 = "e1a00060DMws", - rrx_2 = "e1a00060DMs", - - -- Multiply and multiply-accumulate. - mul_3 = "e0000090NMSs", - mla_4 = "e0200090NMSDs", - umaal_4 = "e0400090DNMSs", -- v6 - mls_4 = "e0600090DNMSs", -- v6T2 - umull_4 = "e0800090DNMSs", - umlal_4 = "e0a00090DNMSs", - smull_4 = "e0c00090DNMSs", - smlal_4 = "e0e00090DNMSs", - - -- Halfword multiply and multiply-accumulate. - smlabb_4 = "e1000080NMSD", -- v5TE - smlatb_4 = "e10000a0NMSD", -- v5TE - smlabt_4 = "e10000c0NMSD", -- v5TE - smlatt_4 = "e10000e0NMSD", -- v5TE - smlawb_4 = "e1200080NMSD", -- v5TE - smulwb_3 = "e12000a0NMS", -- v5TE - smlawt_4 = "e12000c0NMSD", -- v5TE - smulwt_3 = "e12000e0NMS", -- v5TE - smlalbb_4 = "e1400080NMSD", -- v5TE - smlaltb_4 = "e14000a0NMSD", -- v5TE - smlalbt_4 = "e14000c0NMSD", -- v5TE - smlaltt_4 = "e14000e0NMSD", -- v5TE - smulbb_3 = "e1600080NMS", -- v5TE - smultb_3 = "e16000a0NMS", -- v5TE - smulbt_3 = "e16000c0NMS", -- v5TE - smultt_3 = "e16000e0NMS", -- v5TE - - -- Miscellaneous data processing instructions. - clz_2 = "e16f0f10DM", -- v5T - rev_2 = "e6bf0f30DM", -- v6 - rev16_2 = "e6bf0fb0DM", -- v6 - revsh_2 = "e6ff0fb0DM", -- v6 - sel_3 = "e6800fb0DNM", -- v6 - usad8_3 = "e780f010NMS", -- v6 - usada8_4 = "e7800010NMSD", -- v6 - rbit_2 = "e6ff0f30DM", -- v6T2 - movw_2 = "e3000000DW", -- v6T2 - movt_2 = "e3400000DW", -- v6T2 - -- Note: the X encodes width-1, not width. - sbfx_4 = "e7a00050DMvX", -- v6T2 - ubfx_4 = "e7e00050DMvX", -- v6T2 - -- Note: the X encodes the msb field, not the width. - bfc_3 = "e7c0001fDvX", -- v6T2 - bfi_4 = "e7c00010DMvX", -- v6T2 - - -- Packing and unpacking instructions. - pkhbt_3 = "e6800010DNM", pkhbt_4 = "e6800010DNMv", -- v6 - pkhtb_3 = "e6800050DNM", pkhtb_4 = "e6800050DNMv", -- v6 - sxtab_3 = "e6a00070DNM", sxtab_4 = "e6a00070DNMv", -- v6 - sxtab16_3 = "e6800070DNM", sxtab16_4 = "e6800070DNMv", -- v6 - sxtah_3 = "e6b00070DNM", sxtah_4 = "e6b00070DNMv", -- v6 - sxtb_2 = "e6af0070DM", sxtb_3 = "e6af0070DMv", -- v6 - sxtb16_2 = "e68f0070DM", sxtb16_3 = "e68f0070DMv", -- v6 - sxth_2 = "e6bf0070DM", sxth_3 = "e6bf0070DMv", -- v6 - uxtab_3 = "e6e00070DNM", uxtab_4 = "e6e00070DNMv", -- v6 - uxtab16_3 = "e6c00070DNM", uxtab16_4 = "e6c00070DNMv", -- v6 - uxtah_3 = "e6f00070DNM", uxtah_4 = "e6f00070DNMv", -- v6 - uxtb_2 = "e6ef0070DM", uxtb_3 = "e6ef0070DMv", -- v6 - uxtb16_2 = "e6cf0070DM", uxtb16_3 = "e6cf0070DMv", -- v6 - uxth_2 = "e6ff0070DM", uxth_3 = "e6ff0070DMv", -- v6 - - -- Saturating instructions. - qadd_3 = "e1000050DMN", -- v5TE - qsub_3 = "e1200050DMN", -- v5TE - qdadd_3 = "e1400050DMN", -- v5TE - qdsub_3 = "e1600050DMN", -- v5TE - -- Note: the X for ssat* encodes sat_imm-1, not sat_imm. - ssat_3 = "e6a00010DXM", ssat_4 = "e6a00010DXMp", -- v6 - usat_3 = "e6e00010DXM", usat_4 = "e6e00010DXMp", -- v6 - ssat16_3 = "e6a00f30DXM", -- v6 - usat16_3 = "e6e00f30DXM", -- v6 - - -- Parallel addition and subtraction. - sadd16_3 = "e6100f10DNM", -- v6 - sasx_3 = "e6100f30DNM", -- v6 - ssax_3 = "e6100f50DNM", -- v6 - ssub16_3 = "e6100f70DNM", -- v6 - sadd8_3 = "e6100f90DNM", -- v6 - ssub8_3 = "e6100ff0DNM", -- v6 - qadd16_3 = "e6200f10DNM", -- v6 - qasx_3 = "e6200f30DNM", -- v6 - qsax_3 = "e6200f50DNM", -- v6 - qsub16_3 = "e6200f70DNM", -- v6 - qadd8_3 = "e6200f90DNM", -- v6 - qsub8_3 = "e6200ff0DNM", -- v6 - shadd16_3 = "e6300f10DNM", -- v6 - shasx_3 = "e6300f30DNM", -- v6 - shsax_3 = "e6300f50DNM", -- v6 - shsub16_3 = "e6300f70DNM", -- v6 - shadd8_3 = "e6300f90DNM", -- v6 - shsub8_3 = "e6300ff0DNM", -- v6 - uadd16_3 = "e6500f10DNM", -- v6 - uasx_3 = "e6500f30DNM", -- v6 - usax_3 = "e6500f50DNM", -- v6 - usub16_3 = "e6500f70DNM", -- v6 - uadd8_3 = "e6500f90DNM", -- v6 - usub8_3 = "e6500ff0DNM", -- v6 - uqadd16_3 = "e6600f10DNM", -- v6 - uqasx_3 = "e6600f30DNM", -- v6 - uqsax_3 = "e6600f50DNM", -- v6 - uqsub16_3 = "e6600f70DNM", -- v6 - uqadd8_3 = "e6600f90DNM", -- v6 - uqsub8_3 = "e6600ff0DNM", -- v6 - uhadd16_3 = "e6700f10DNM", -- v6 - uhasx_3 = "e6700f30DNM", -- v6 - uhsax_3 = "e6700f50DNM", -- v6 - uhsub16_3 = "e6700f70DNM", -- v6 - uhadd8_3 = "e6700f90DNM", -- v6 - uhsub8_3 = "e6700ff0DNM", -- v6 - - -- Load/store instructions. - str_2 = "e4000000DL", str_3 = "e4000000DL", str_4 = "e4000000DL", - strb_2 = "e4400000DL", strb_3 = "e4400000DL", strb_4 = "e4400000DL", - ldr_2 = "e4100000DL", ldr_3 = "e4100000DL", ldr_4 = "e4100000DL", - ldrb_2 = "e4500000DL", ldrb_3 = "e4500000DL", ldrb_4 = "e4500000DL", - strh_2 = "e00000b0DL", strh_3 = "e00000b0DL", - ldrh_2 = "e01000b0DL", ldrh_3 = "e01000b0DL", - ldrd_2 = "e00000d0DL", ldrd_3 = "e00000d0DL", -- v5TE - ldrsb_2 = "e01000d0DL", ldrsb_3 = "e01000d0DL", - strd_2 = "e00000f0DL", strd_3 = "e00000f0DL", -- v5TE - ldrsh_2 = "e01000f0DL", ldrsh_3 = "e01000f0DL", - - ldm_2 = "e8900000oR", ldmia_2 = "e8900000oR", ldmfd_2 = "e8900000oR", - ldmda_2 = "e8100000oR", ldmfa_2 = "e8100000oR", - ldmdb_2 = "e9100000oR", ldmea_2 = "e9100000oR", - ldmib_2 = "e9900000oR", ldmed_2 = "e9900000oR", - stm_2 = "e8800000oR", stmia_2 = "e8800000oR", stmfd_2 = "e8800000oR", - stmda_2 = "e8000000oR", stmfa_2 = "e8000000oR", - stmdb_2 = "e9000000oR", stmea_2 = "e9000000oR", - stmib_2 = "e9800000oR", stmed_2 = "e9800000oR", - pop_1 = "e8bd0000R", push_1 = "e92d0000R", - - -- Branch instructions. - b_1 = "ea000000B", - bl_1 = "eb000000B", - blx_1 = "e12fff30C", - bx_1 = "e12fff10M", - - -- Miscellaneous instructions. - nop_0 = "e1a00000", - mrs_1 = "e10f0000D", - bkpt_1 = "e1200070K", -- v5T - svc_1 = "ef000000T", swi_1 = "ef000000T", - ud_0 = "e7f001f0", - - -- VFP instructions. - ["vadd.f32_3"] = "ee300a00dnm", - ["vadd.f64_3"] = "ee300b00Gdnm", - ["vsub.f32_3"] = "ee300a40dnm", - ["vsub.f64_3"] = "ee300b40Gdnm", - ["vmul.f32_3"] = "ee200a00dnm", - ["vmul.f64_3"] = "ee200b00Gdnm", - ["vnmul.f32_3"] = "ee200a40dnm", - ["vnmul.f64_3"] = "ee200b40Gdnm", - ["vmla.f32_3"] = "ee000a00dnm", - ["vmla.f64_3"] = "ee000b00Gdnm", - ["vmls.f32_3"] = "ee000a40dnm", - ["vmls.f64_3"] = "ee000b40Gdnm", - ["vnmla.f32_3"] = "ee100a40dnm", - ["vnmla.f64_3"] = "ee100b40Gdnm", - ["vnmls.f32_3"] = "ee100a00dnm", - ["vnmls.f64_3"] = "ee100b00Gdnm", - ["vdiv.f32_3"] = "ee800a00dnm", - ["vdiv.f64_3"] = "ee800b00Gdnm", - - ["vabs.f32_2"] = "eeb00ac0dm", - ["vabs.f64_2"] = "eeb00bc0Gdm", - ["vneg.f32_2"] = "eeb10a40dm", - ["vneg.f64_2"] = "eeb10b40Gdm", - ["vsqrt.f32_2"] = "eeb10ac0dm", - ["vsqrt.f64_2"] = "eeb10bc0Gdm", - ["vcmp.f32_2"] = "eeb40a40dm", - ["vcmp.f64_2"] = "eeb40b40Gdm", - ["vcmpe.f32_2"] = "eeb40ac0dm", - ["vcmpe.f64_2"] = "eeb40bc0Gdm", - ["vcmpz.f32_1"] = "eeb50a40d", - ["vcmpz.f64_1"] = "eeb50b40Gd", - ["vcmpze.f32_1"] = "eeb50ac0d", - ["vcmpze.f64_1"] = "eeb50bc0Gd", - - vldr_2 = "ed100a00dl|ed100b00Gdl", - vstr_2 = "ed000a00dl|ed000b00Gdl", - vldm_2 = "ec900a00or", - vldmia_2 = "ec900a00or", - vldmdb_2 = "ed100a00or", - vpop_1 = "ecbd0a00r", - vstm_2 = "ec800a00or", - vstmia_2 = "ec800a00or", - vstmdb_2 = "ed000a00or", - vpush_1 = "ed2d0a00r", - - ["vmov.f32_2"] = "eeb00a40dm|eeb00a00dY", -- #imm is VFPv3 only - ["vmov.f64_2"] = "eeb00b40Gdm|eeb00b00GdY", -- #imm is VFPv3 only - vmov_2 = "ee100a10Dn|ee000a10nD", - vmov_3 = "ec500a10DNm|ec400a10mDN|ec500b10GDNm|ec400b10GmDN", - - vmrs_0 = "eef1fa10", - vmrs_1 = "eef10a10D", - vmsr_1 = "eee10a10D", - - ["vcvt.s32.f32_2"] = "eebd0ac0dm", - ["vcvt.s32.f64_2"] = "eebd0bc0dGm", - ["vcvt.u32.f32_2"] = "eebc0ac0dm", - ["vcvt.u32.f64_2"] = "eebc0bc0dGm", - ["vcvtr.s32.f32_2"] = "eebd0a40dm", - ["vcvtr.s32.f64_2"] = "eebd0b40dGm", - ["vcvtr.u32.f32_2"] = "eebc0a40dm", - ["vcvtr.u32.f64_2"] = "eebc0b40dGm", - ["vcvt.f32.s32_2"] = "eeb80ac0dm", - ["vcvt.f64.s32_2"] = "eeb80bc0GdFm", - ["vcvt.f32.u32_2"] = "eeb80a40dm", - ["vcvt.f64.u32_2"] = "eeb80b40GdFm", - ["vcvt.f32.f64_2"] = "eeb70bc0dGm", - ["vcvt.f64.f32_2"] = "eeb70ac0GdFm", - - -- VFPv4 only: - ["vfma.f32_3"] = "eea00a00dnm", - ["vfma.f64_3"] = "eea00b00Gdnm", - ["vfms.f32_3"] = "eea00a40dnm", - ["vfms.f64_3"] = "eea00b40Gdnm", - ["vfnma.f32_3"] = "ee900a40dnm", - ["vfnma.f64_3"] = "ee900b40Gdnm", - ["vfnms.f32_3"] = "ee900a00dnm", - ["vfnms.f64_3"] = "ee900b00Gdnm", - - -- NYI: Advanced SIMD instructions. - - -- NYI: I have no need for these instructions right now: - -- swp, swpb, strex, ldrex, strexd, ldrexd, strexb, ldrexb, strexh, ldrexh - -- msr, nopv6, yield, wfe, wfi, sev, dbg, bxj, smc, srs, rfe - -- cps, setend, pli, pld, pldw, clrex, dsb, dmb, isb - -- stc, ldc, mcr, mcr2, mrc, mrc2, mcrr, mcrr2, mrrc, mrrc2, cdp, cdp2 -} - --- Add mnemonics for "s" variants. -do - local t = {} - for k,v in pairs(map_op) do - if sub(v, -1) == "s" then - local v2 = sub(v, 1, 2)..char(byte(v, 3)+1)..sub(v, 4, -2) - t[sub(k, 1, -3).."s"..sub(k, -2)] = v2 - end - end - for k,v in pairs(t) do - map_op[k] = v - end -end - ------------------------------------------------------------------------------- - -local function parse_gpr(expr) - local tname, ovreg = match(expr, "^([%w_]+):(r1?[0-9])$") - local tp = map_type[tname or expr] - if tp then - local reg = ovreg or tp.reg - if not reg then - werror("type `"..(tname or expr).."' needs a register override") - end - expr = reg - end - local r = match(expr, "^r(1?[0-9])$") - if r then - r = tonumber(r) - if r <= 15 then return r, tp end - end - werror("bad register name `"..expr.."'") -end - -local function parse_gpr_pm(expr) - local pm, expr2 = match(expr, "^([+-]?)(.*)$") - return parse_gpr(expr2), (pm == "-") -end - -local function parse_vr(expr, tp) - local t, r = match(expr, "^([sd])([0-9]+)$") - if t == tp then - r = tonumber(r) - if r <= 31 then - if t == "s" then return shr(r, 1), band(r, 1) end - return band(r, 15), shr(r, 4) - end - end - werror("bad register name `"..expr.."'") -end - -local function parse_reglist(reglist) - reglist = match(reglist, "^{%s*([^}]*)}$") - if not reglist then werror("register list expected") end - local rr = 0 - for p in gmatch(reglist..",", "%s*([^,]*),") do - local rbit = shl(1, parse_gpr(gsub(p, "%s+$", ""))) - if band(rr, rbit) ~= 0 then - werror("duplicate register `"..p.."'") - end - rr = rr + rbit - end - return rr -end - -local function parse_vrlist(reglist) - local ta, ra, tb, rb = match(reglist, - "^{%s*([sd])([0-9]+)%s*%-%s*([sd])([0-9]+)%s*}$") - ra, rb = tonumber(ra), tonumber(rb) - if ta and ta == tb and ra and rb and ra <= 31 and rb <= 31 and ra <= rb then - local nr = rb+1 - ra - if ta == "s" then - return shl(shr(ra,1),12)+shl(band(ra,1),22) + nr - else - return shl(band(ra,15),12)+shl(shr(ra,4),22) + nr*2 + 0x100 - end - end - werror("register list expected") -end - -local function parse_imm(imm, bits, shift, scale, signed) - imm = match(imm, "^#(.*)$") - if not imm then werror("expected immediate operand") end - local n = tonumber(imm) - if n then - local m = sar(n, scale) - if shl(m, scale) == n then - if signed then - local s = sar(m, bits-1) - if s == 0 then return shl(m, shift) - elseif s == -1 then return shl(m + shl(1, bits), shift) end - else - if sar(m, bits) == 0 then return shl(m, shift) end - end - end - werror("out of range immediate `"..imm.."'") - else - waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) - return 0 - end -end - -local function parse_imm12(imm) - local n = tonumber(imm) - if n then - local m = band(n) - for i=0,-15,-1 do - if shr(m, 8) == 0 then return m + shl(band(i, 15), 8) end - m = ror(m, 2) - end - werror("out of range immediate `"..imm.."'") - else - waction("IMM12", 0, imm) - return 0 - end -end - -local function parse_imm16(imm) - imm = match(imm, "^#(.*)$") - if not imm then werror("expected immediate operand") end - local n = tonumber(imm) - if n then - if shr(n, 16) == 0 then return band(n, 0x0fff) + shl(band(n, 0xf000), 4) end - werror("out of range immediate `"..imm.."'") - else - waction("IMM16", 32*16, imm) - return 0 - end -end - -local function parse_imm_load(imm, ext) - local n = tonumber(imm) - if n then - if ext then - if n >= -255 and n <= 255 then - local up = 0x00800000 - if n < 0 then n = -n; up = 0 end - return shl(band(n, 0xf0), 4) + band(n, 0x0f) + up - end - else - if n >= -4095 and n <= 4095 then - if n >= 0 then return n+0x00800000 end - return -n - end - end - werror("out of range immediate `"..imm.."'") - else - waction(ext and "IMML8" or "IMML12", 32768 + shl(ext and 8 or 12, 5), imm) - return 0 - end -end - -local function parse_shift(shift, gprok) - if shift == "rrx" then - return 3 * 32 - else - local s, s2 = match(shift, "^(%S+)%s*(.*)$") - s = map_shift[s] - if not s then werror("expected shift operand") end - if sub(s2, 1, 1) == "#" then - return parse_imm(s2, 5, 7, 0, false) + shl(s, 5) - else - if not gprok then werror("expected immediate shift operand") end - return shl(parse_gpr(s2), 8) + shl(s, 5) + 16 - end - end -end - -local function parse_label(label, def) - local prefix = sub(label, 1, 2) - -- =>label (pc label reference) - if prefix == "=>" then - return "PC", 0, sub(label, 3) - end - -- ->name (global label reference) - if prefix == "->" then - return "LG", map_global[sub(label, 3)] - end - if def then - -- [1-9] (local label definition) - if match(label, "^[1-9]$") then - return "LG", 10+tonumber(label) - end - else - -- [<>][1-9] (local label reference) - local dir, lnum = match(label, "^([<>])([1-9])$") - if dir then -- Fwd: 1-9, Bkwd: 11-19. - return "LG", lnum + (dir == ">" and 0 or 10) - end - -- extern label (extern label reference) - local extname = match(label, "^extern%s+(%S+)$") - if extname then - return "EXT", map_extern[extname] - end - end - werror("bad label `"..label.."'") -end - -local function parse_load(params, nparams, n, op) - local oplo = band(op, 255) - local ext, ldrd = (oplo ~= 0), (oplo == 208) - local d - if (ldrd or oplo == 240) then - d = band(shr(op, 12), 15) - if band(d, 1) ~= 0 then werror("odd destination register") end - end - local pn = params[n] - local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") - local p2 = params[n+1] - if not p1 then - if not p2 then - if match(pn, "^[<>=%-]") or match(pn, "^extern%s+") then - local mode, n, s = parse_label(pn, false) - waction("REL_"..mode, n + (ext and 0x1800 or 0x0800), s, 1) - return op + 15 * 65536 + 0x01000000 + (ext and 0x00400000 or 0) - end - local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") - if reg and tailr ~= "" then - local d, tp = parse_gpr(reg) - if tp then - waction(ext and "IMML8" or "IMML12", 32768 + 32*(ext and 8 or 12), - format(tp.ctypefmt, tailr)) - return op + shl(d, 16) + 0x01000000 + (ext and 0x00400000 or 0) - end - end - end - werror("expected address operand") - end - if wb == "!" then op = op + 0x00200000 end - if p2 then - if wb == "!" then werror("bad use of '!'") end - local p3 = params[n+2] - op = op + shl(parse_gpr(p1), 16) - local imm = match(p2, "^#(.*)$") - if imm then - local m = parse_imm_load(imm, ext) - if p3 then werror("too many parameters") end - op = op + m + (ext and 0x00400000 or 0) - else - local m, neg = parse_gpr_pm(p2) - if ldrd and (m == d or m-1 == d) then werror("register conflict") end - op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000) - if p3 then op = op + parse_shift(p3) end - end - else - local p1a, p2 = match(p1, "^([^,%s]*)%s*(.*)$") - op = op + shl(parse_gpr(p1a), 16) + 0x01000000 - if p2 ~= "" then - local imm = match(p2, "^,%s*#(.*)$") - if imm then - local m = parse_imm_load(imm, ext) - op = op + m + (ext and 0x00400000 or 0) - else - local p2a, p3 = match(p2, "^,%s*([^,%s]*)%s*,?%s*(.*)$") - local m, neg = parse_gpr_pm(p2a) - if ldrd and (m == d or m-1 == d) then werror("register conflict") end - op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000) - if p3 ~= "" then - if ext then werror("too many parameters") end - op = op + parse_shift(p3) - end - end - else - if wb == "!" then werror("bad use of '!'") end - op = op + (ext and 0x00c00000 or 0x00800000) - end - end - return op -end - -local function parse_vload(q) - local reg, imm = match(q, "^%[%s*([^,%s]*)%s*(.*)%]$") - if reg then - local d = shl(parse_gpr(reg), 16) - if imm == "" then return d end - imm = match(imm, "^,%s*#(.*)$") - if imm then - local n = tonumber(imm) - if n then - if n >= -1020 and n <= 1020 and n%4 == 0 then - return d + (n >= 0 and n/4+0x00800000 or -n/4) - end - werror("out of range immediate `"..imm.."'") - else - waction("IMMV8", 32768 + 32*8, imm) - return d - end - end - else - if match(q, "^[<>=%-]") or match(q, "^extern%s+") then - local mode, n, s = parse_label(q, false) - waction("REL_"..mode, n + 0x2800, s, 1) - return 15 * 65536 - end - local reg, tailr = match(q, "^([%w_:]+)%s*(.*)$") - if reg and tailr ~= "" then - local d, tp = parse_gpr(reg) - if tp then - waction("IMMV8", 32768 + 32*8, format(tp.ctypefmt, tailr)) - return shl(d, 16) - end - end - end - werror("expected address operand") -end - ------------------------------------------------------------------------------- - --- Handle opcodes defined with template strings. -local function parse_template(params, template, nparams, pos) - local op = tonumber(sub(template, 1, 8), 16) - local n = 1 - local vr = "s" - - -- Process each character. - for p in gmatch(sub(template, 9), ".") do - local q = params[n] - if p == "D" then - op = op + shl(parse_gpr(q), 12); n = n + 1 - elseif p == "N" then - op = op + shl(parse_gpr(q), 16); n = n + 1 - elseif p == "S" then - op = op + shl(parse_gpr(q), 8); n = n + 1 - elseif p == "M" then - op = op + parse_gpr(q); n = n + 1 - elseif p == "d" then - local r,h = parse_vr(q, vr); op = op+shl(r,12)+shl(h,22); n = n + 1 - elseif p == "n" then - local r,h = parse_vr(q, vr); op = op+shl(r,16)+shl(h,7); n = n + 1 - elseif p == "m" then - local r,h = parse_vr(q, vr); op = op+r+shl(h,5); n = n + 1 - elseif p == "P" then - local imm = match(q, "^#(.*)$") - if imm then - op = op + parse_imm12(imm) + 0x02000000 - else - op = op + parse_gpr(q) - end - n = n + 1 - elseif p == "p" then - op = op + parse_shift(q, true); n = n + 1 - elseif p == "L" then - op = parse_load(params, nparams, n, op) - elseif p == "l" then - op = op + parse_vload(q) - elseif p == "B" then - local mode, n, s = parse_label(q, false) - waction("REL_"..mode, n, s, 1) - elseif p == "C" then -- blx gpr vs. blx label. - if match(q, "^([%w_]+):(r1?[0-9])$") or match(q, "^r(1?[0-9])$") then - op = op + parse_gpr(q) - else - if op < 0xe0000000 then werror("unconditional instruction") end - local mode, n, s = parse_label(q, false) - waction("REL_"..mode, n, s, 1) - op = 0xfa000000 - end - elseif p == "F" then - vr = "s" - elseif p == "G" then - vr = "d" - elseif p == "o" then - local r, wb = match(q, "^([^!]*)(!?)$") - op = op + shl(parse_gpr(r), 16) + (wb == "!" and 0x00200000 or 0) - n = n + 1 - elseif p == "R" then - op = op + parse_reglist(q); n = n + 1 - elseif p == "r" then - op = op + parse_vrlist(q); n = n + 1 - elseif p == "W" then - op = op + parse_imm16(q); n = n + 1 - elseif p == "v" then - op = op + parse_imm(q, 5, 7, 0, false); n = n + 1 - elseif p == "w" then - local imm = match(q, "^#(.*)$") - if imm then - op = op + parse_imm(q, 5, 7, 0, false); n = n + 1 - else - op = op + shl(parse_gpr(q), 8) + 16 - end - elseif p == "X" then - op = op + parse_imm(q, 5, 16, 0, false); n = n + 1 - elseif p == "Y" then - local imm = tonumber(match(q, "^#(.*)$")); n = n + 1 - if not imm or shr(imm, 8) ~= 0 then - werror("bad immediate operand") - end - op = op + shl(band(imm, 0xf0), 12) + band(imm, 0x0f) - elseif p == "K" then - local imm = tonumber(match(q, "^#(.*)$")); n = n + 1 - if not imm or shr(imm, 16) ~= 0 then - werror("bad immediate operand") - end - op = op + shl(band(imm, 0xfff0), 4) + band(imm, 0x000f) - elseif p == "T" then - op = op + parse_imm(q, 24, 0, 0, false); n = n + 1 - elseif p == "s" then - -- Ignored. - else - assert(false) - end - end - wputpos(pos, op) -end - -map_op[".template__"] = function(params, template, nparams) - if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end - - -- Limit number of section buffer positions used by a single dasm_put(). - -- A single opcode needs a maximum of 3 positions. - if secpos+3 > maxsecpos then wflush() end - local pos = wpos() - local lpos, apos, spos = #actlist, #actargs, secpos - - local ok, err - for t in gmatch(template, "[^|]+") do - ok, err = pcall(parse_template, params, t, nparams, pos) - if ok then return end - secpos = spos - actlist[lpos+1] = nil - actlist[lpos+2] = nil - actlist[lpos+3] = nil - actargs[apos+1] = nil - actargs[apos+2] = nil - actargs[apos+3] = nil - end - error(err, 0) -end - ------------------------------------------------------------------------------- - --- Pseudo-opcode to mark the position where the action list is to be emitted. -map_op[".actionlist_1"] = function(params) - if not params then return "cvar" end - local name = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeactions(out, name) end) -end - --- Pseudo-opcode to mark the position where the global enum is to be emitted. -map_op[".globals_1"] = function(params) - if not params then return "prefix" end - local prefix = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeglobals(out, prefix) end) -end - --- Pseudo-opcode to mark the position where the global names are to be emitted. -map_op[".globalnames_1"] = function(params) - if not params then return "cvar" end - local name = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeglobalnames(out, name) end) -end - --- Pseudo-opcode to mark the position where the extern names are to be emitted. -map_op[".externnames_1"] = function(params) - if not params then return "cvar" end - local name = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeexternnames(out, name) end) -end - ------------------------------------------------------------------------------- - --- Label pseudo-opcode (converted from trailing colon form). -map_op[".label_1"] = function(params) - if not params then return "[1-9] | ->global | =>pcexpr" end - if secpos+1 > maxsecpos then wflush() end - local mode, n, s = parse_label(params[1], true) - if mode == "EXT" then werror("bad label definition") end - waction("LABEL_"..mode, n, s, 1) -end - ------------------------------------------------------------------------------- - --- Pseudo-opcodes for data storage. -map_op[".long_*"] = function(params) - if not params then return "imm..." end - for _,p in ipairs(params) do - local n = tonumber(p) - if not n then werror("bad immediate `"..p.."'") end - if n < 0 then n = n + 2^32 end - wputw(n) - if secpos+2 > maxsecpos then wflush() end - end -end - --- Alignment pseudo-opcode. -map_op[".align_1"] = function(params) - if not params then return "numpow2" end - if secpos+1 > maxsecpos then wflush() end - local align = tonumber(params[1]) - if align then - local x = align - -- Must be a power of 2 in the range (2 ... 256). - for i=1,8 do - x = x / 2 - if x == 1 then - waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. - return - end - end - end - werror("bad alignment") -end - ------------------------------------------------------------------------------- - --- Pseudo-opcode for (primitive) type definitions (map to C types). -map_op[".type_3"] = function(params, nparams) - if not params then - return nparams == 2 and "name, ctype" or "name, ctype, reg" - end - local name, ctype, reg = params[1], params[2], params[3] - if not match(name, "^[%a_][%w_]*$") then - werror("bad type name `"..name.."'") - end - local tp = map_type[name] - if tp then - werror("duplicate type `"..name.."'") - end - -- Add #type to defines. A bit unclean to put it in map_archdef. - map_archdef["#"..name] = "sizeof("..ctype..")" - -- Add new type and emit shortcut define. - local num = ctypenum + 1 - map_type[name] = { - ctype = ctype, - ctypefmt = format("Dt%X(%%s)", num), - reg = reg, - } - wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) - ctypenum = num -end -map_op[".type_2"] = map_op[".type_3"] - --- Dump type definitions. -local function dumptypes(out, lvl) - local t = {} - for name in pairs(map_type) do t[#t+1] = name end - sort(t) - out:write("Type definitions:\n") - for _,name in ipairs(t) do - local tp = map_type[name] - local reg = tp.reg or "" - out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) - end - out:write("\n") -end - ------------------------------------------------------------------------------- - --- Set the current section. -function _M.section(num) - waction("SECTION", num) - wflush(true) -- SECTION is a terminal action. -end - ------------------------------------------------------------------------------- - --- Dump architecture description. -function _M.dumparch(out) - out:write(format("DynASM %s version %s, released %s\n\n", - _info.arch, _info.version, _info.release)) - dumpactions(out) -end - --- Dump all user defined elements. -function _M.dumpdef(out, lvl) - dumptypes(out, lvl) - dumpglobals(out, lvl) - dumpexterns(out, lvl) -end - ------------------------------------------------------------------------------- - --- Pass callbacks from/to the DynASM core. -function _M.passcb(wl, we, wf, ww) - wline, werror, wfatal, wwarn = wl, we, wf, ww - return wflush -end - --- Setup the arch-specific module. -function _M.setup(arch, opt) - g_arch, g_opt = arch, opt -end - --- Merge the core maps and the arch-specific maps. -function _M.mergemaps(map_coreop, map_def) - setmetatable(map_op, { __index = function(t, k) - local v = map_coreop[k] - if v then return v end - local k1, cc, k2 = match(k, "^(.-)(..)([._].*)$") - local cv = map_cond[cc] - if cv then - local v = rawget(t, k1..k2) - if type(v) == "string" then - local scv = format("%x", cv) - return gsub(scv..sub(v, 2), "|e", "|"..scv) - end - end - end }) - setmetatable(map_def, { __index = map_archdef }) - return map_op, map_def -end - -return _M - ------------------------------------------------------------------------------- - diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h deleted file mode 100644 index 47e1e07493..0000000000 --- a/dynasm/dasm_arm64.h +++ /dev/null @@ -1,518 +0,0 @@ -/* -** DynASM ARM64 encoding engine. -** Copyright (C) 2005-2017 Mike Pall. All rights reserved. -** Released under the MIT license. See dynasm.lua for full copyright notice. -*/ - -#include -#include -#include -#include - -#define DASM_ARCH "arm64" - -#ifndef DASM_EXTERN -#define DASM_EXTERN(a,b,c,d) 0 -#endif - -/* Action definitions. */ -enum { - DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, - /* The following actions need a buffer position. */ - DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, - /* The following actions also have an argument. */ - DASM_REL_PC, DASM_LABEL_PC, - DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML, - DASM__MAX -}; - -/* Maximum number of section buffer positions for a single dasm_put() call. */ -#define DASM_MAXSECPOS 25 - -/* DynASM encoder status codes. Action list offset or number are or'ed in. */ -#define DASM_S_OK 0x00000000 -#define DASM_S_NOMEM 0x01000000 -#define DASM_S_PHASE 0x02000000 -#define DASM_S_MATCH_SEC 0x03000000 -#define DASM_S_RANGE_I 0x11000000 -#define DASM_S_RANGE_SEC 0x12000000 -#define DASM_S_RANGE_LG 0x13000000 -#define DASM_S_RANGE_PC 0x14000000 -#define DASM_S_RANGE_REL 0x15000000 -#define DASM_S_UNDEF_LG 0x21000000 -#define DASM_S_UNDEF_PC 0x22000000 - -/* Macros to convert positions (8 bit section + 24 bit index). */ -#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) -#define DASM_POS2BIAS(pos) ((pos)&0xff000000) -#define DASM_SEC2POS(sec) ((sec)<<24) -#define DASM_POS2SEC(pos) ((pos)>>24) -#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) - -/* Action list type. */ -typedef const unsigned int *dasm_ActList; - -/* Per-section structure. */ -typedef struct dasm_Section { - int *rbuf; /* Biased buffer pointer (negative section bias). */ - int *buf; /* True buffer pointer. */ - size_t bsize; /* Buffer size in bytes. */ - int pos; /* Biased buffer position. */ - int epos; /* End of biased buffer position - max single put. */ - int ofs; /* Byte offset into section. */ -} dasm_Section; - -/* Core structure holding the DynASM encoding state. */ -struct dasm_State { - size_t psize; /* Allocated size of this structure. */ - dasm_ActList actionlist; /* Current actionlist pointer. */ - int *lglabels; /* Local/global chain/pos ptrs. */ - size_t lgsize; - int *pclabels; /* PC label chains/pos ptrs. */ - size_t pcsize; - void **globals; /* Array of globals (bias -10). */ - dasm_Section *section; /* Pointer to active section. */ - size_t codesize; /* Total size of all code sections. */ - int maxsection; /* 0 <= sectionidx < maxsection. */ - int status; /* Status code. */ - dasm_Section sections[1]; /* All sections. Alloc-extended. */ -}; - -/* The size of the core structure depends on the max. number of sections. */ -#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) - - -/* Initialize DynASM state. */ -void dasm_init(Dst_DECL, int maxsection) -{ - dasm_State *D; - size_t psz = 0; - int i; - Dst_REF = NULL; - DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); - D = Dst_REF; - D->psize = psz; - D->lglabels = NULL; - D->lgsize = 0; - D->pclabels = NULL; - D->pcsize = 0; - D->globals = NULL; - D->maxsection = maxsection; - for (i = 0; i < maxsection; i++) { - D->sections[i].buf = NULL; /* Need this for pass3. */ - D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); - D->sections[i].bsize = 0; - D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ - } -} - -/* Free DynASM state. */ -void dasm_free(Dst_DECL) -{ - dasm_State *D = Dst_REF; - int i; - for (i = 0; i < D->maxsection; i++) - if (D->sections[i].buf) - DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); - if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); - if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); - DASM_M_FREE(Dst, D, D->psize); -} - -/* Setup global label array. Must be called before dasm_setup(). */ -void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) -{ - dasm_State *D = Dst_REF; - D->globals = gl - 10; /* Negative bias to compensate for locals. */ - DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); -} - -/* Grow PC label array. Can be called after dasm_setup(), too. */ -void dasm_growpc(Dst_DECL, unsigned int maxpc) -{ - dasm_State *D = Dst_REF; - size_t osz = D->pcsize; - DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); - memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); -} - -/* Setup encoder. */ -void dasm_setup(Dst_DECL, const void *actionlist) -{ - dasm_State *D = Dst_REF; - int i; - D->actionlist = (dasm_ActList)actionlist; - D->status = DASM_S_OK; - D->section = &D->sections[0]; - memset((void *)D->lglabels, 0, D->lgsize); - if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); - for (i = 0; i < D->maxsection; i++) { - D->sections[i].pos = DASM_SEC2POS(i); - D->sections[i].ofs = 0; - } -} - - -#ifdef DASM_CHECKS -#define CK(x, st) \ - do { if (!(x)) { \ - D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) -#define CKPL(kind, st) \ - do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ - D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) -#else -#define CK(x, st) ((void)0) -#define CKPL(kind, st) ((void)0) -#endif - -static int dasm_imm12(unsigned int n) -{ - if ((n >> 12) == 0) - return n; - else if ((n & 0xff000fff) == 0) - return (n >> 12) | 0x1000; - else - return -1; -} - -static int dasm_ffs(unsigned long long x) -{ - int n = -1; - while (x) { x >>= 1; n++; } - return n; -} - -static int dasm_imm13(int lo, int hi) -{ - int inv = 0, w = 64, s = 0xfff, xa, xb; - unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo; - unsigned long long m = 1ULL, a, b, c; - if (n & 1) { n = ~n; inv = 1; } - a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b); - xa = dasm_ffs(a); xb = dasm_ffs(b); - if (c) { - w = dasm_ffs(c) - xa; - if (w == 32) m = 0x0000000100000001UL; - else if (w == 16) m = 0x0001000100010001UL; - else if (w == 8) m = 0x0101010101010101UL; - else if (w == 4) m = 0x1111111111111111UL; - else if (w == 2) m = 0x5555555555555555UL; - else return -1; - s = (-2*w & 0x3f) - 1; - } else if (!a) { - return -1; - } else if (xb == -1) { - xb = 64; - } - if ((b-a) * m != n) return -1; - if (inv) { - return ((w - xb) << 6) | (s+w+xa-xb); - } else { - return ((w - xa) << 6) | (s+xb-xa); - } - return -1; -} - -/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ -void dasm_put(Dst_DECL, int start, ...) -{ - va_list ap; - dasm_State *D = Dst_REF; - dasm_ActList p = D->actionlist + start; - dasm_Section *sec = D->section; - int pos = sec->pos, ofs = sec->ofs; - int *b; - - if (pos >= sec->epos) { - DASM_M_GROW(Dst, int, sec->buf, sec->bsize, - sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); - sec->rbuf = sec->buf - DASM_POS2BIAS(pos); - sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); - } - - b = sec->rbuf; - b[pos++] = start; - - va_start(ap, start); - while (1) { - unsigned int ins = *p++; - unsigned int action = (ins >> 16); - if (action >= DASM__MAX) { - ofs += 4; - } else { - int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; - switch (action) { - case DASM_STOP: goto stop; - case DASM_SECTION: - n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); - D->section = &D->sections[n]; goto stop; - case DASM_ESC: p++; ofs += 4; break; - case DASM_REL_EXT: break; - case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; - case DASM_REL_LG: - n = (ins & 2047) - 10; pl = D->lglabels + n; - /* Bkwd rel or global. */ - if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } - pl += 10; n = *pl; - if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ - goto linkrel; - case DASM_REL_PC: - pl = D->pclabels + n; CKPL(pc, PC); - putrel: - n = *pl; - if (n < 0) { /* Label exists. Get label pos and store it. */ - b[pos] = -n; - } else { - linkrel: - b[pos] = n; /* Else link to rel chain, anchored at label. */ - *pl = pos; - } - pos++; - break; - case DASM_LABEL_LG: - pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; - case DASM_LABEL_PC: - pl = D->pclabels + n; CKPL(pc, PC); - putlabel: - n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ - while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; - } - *pl = -pos; /* Label exists now. */ - b[pos++] = ofs; /* Store pass1 offset estimate. */ - break; - case DASM_IMM: - CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); - n >>= ((ins>>10)&31); -#ifdef DASM_CHECKS - if ((ins & 0x8000)) - CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); - else - CK((n>>((ins>>5)&31)) == 0, RANGE_I); -#endif - b[pos++] = n; - break; - case DASM_IMM6: - CK((n >> 6) == 0, RANGE_I); - b[pos++] = n; - break; - case DASM_IMM12: - CK(dasm_imm12((unsigned int)n) != -1, RANGE_I); - b[pos++] = n; - break; - case DASM_IMM13W: - CK(dasm_imm13(n, n) != -1, RANGE_I); - b[pos++] = n; - break; - case DASM_IMM13X: { - int m = va_arg(ap, int); - CK(dasm_imm13(n, m) != -1, RANGE_I); - b[pos++] = n; - b[pos++] = m; - break; - } - case DASM_IMML: { -#ifdef DASM_CHECKS - int scale = (p[-2] >> 30); - CK((!(n & ((1<>scale) < 4096) || - (unsigned int)(n+256) < 512, RANGE_I); -#endif - b[pos++] = n; - break; - } - } - } - } -stop: - va_end(ap); - sec->pos = pos; - sec->ofs = ofs; -} -#undef CK - -/* Pass 2: Link sections, shrink aligns, fix label offsets. */ -int dasm_link(Dst_DECL, size_t *szp) -{ - dasm_State *D = Dst_REF; - int secnum; - int ofs = 0; - -#ifdef DASM_CHECKS - *szp = 0; - if (D->status != DASM_S_OK) return D->status; - { - int pc; - for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) - if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; - } -#endif - - { /* Handle globals not defined in this translation unit. */ - int idx; - for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { - int n = D->lglabels[idx]; - /* Undefined label: Collapse rel chain and replace with marker (< 0). */ - while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } - } - } - - /* Combine all code sections. No support for data sections (yet). */ - for (secnum = 0; secnum < D->maxsection; secnum++) { - dasm_Section *sec = D->sections + secnum; - int *b = sec->rbuf; - int pos = DASM_SEC2POS(secnum); - int lastpos = sec->pos; - - while (pos != lastpos) { - dasm_ActList p = D->actionlist + b[pos++]; - while (1) { - unsigned int ins = *p++; - unsigned int action = (ins >> 16); - switch (action) { - case DASM_STOP: case DASM_SECTION: goto stop; - case DASM_ESC: p++; break; - case DASM_REL_EXT: break; - case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; - case DASM_REL_LG: case DASM_REL_PC: pos++; break; - case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; - case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W: - case DASM_IMML: pos++; break; - case DASM_IMM13X: pos += 2; break; - } - } - stop: (void)0; - } - ofs += sec->ofs; /* Next section starts right after current section. */ - } - - D->codesize = ofs; /* Total size of all code sections */ - *szp = ofs; - return DASM_S_OK; -} - -#ifdef DASM_CHECKS -#define CK(x, st) \ - do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) -#else -#define CK(x, st) ((void)0) -#endif - -/* Pass 3: Encode sections. */ -int dasm_encode(Dst_DECL, void *buffer) -{ - dasm_State *D = Dst_REF; - char *base = (char *)buffer; - unsigned int *cp = (unsigned int *)buffer; - int secnum; - - /* Encode all code sections. No support for data sections (yet). */ - for (secnum = 0; secnum < D->maxsection; secnum++) { - dasm_Section *sec = D->sections + secnum; - int *b = sec->buf; - int *endb = sec->rbuf + sec->pos; - - while (b != endb) { - dasm_ActList p = D->actionlist + *b++; - while (1) { - unsigned int ins = *p++; - unsigned int action = (ins >> 16); - int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; - switch (action) { - case DASM_STOP: case DASM_SECTION: goto stop; - case DASM_ESC: *cp++ = *p++; break; - case DASM_REL_EXT: - n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048)); - goto patchrel; - case DASM_ALIGN: - ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; - break; - case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); - case DASM_REL_PC: - CK(n >= 0, UNDEF_PC); - n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4; - patchrel: - if (!(ins & 0xf800)) { /* B, BL */ - CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL); - cp[-1] |= ((n >> 2) & 0x03ffffff); - } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */ - CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL); - cp[-1] |= ((n << 3) & 0x00ffffe0); - } else if ((ins & 0x3000) == 0x2000) { /* ADR */ - CK(((n+0x00100000) >> 21) == 0, RANGE_REL); - cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29); - } else if ((ins & 0x3000) == 0x3000) { /* ADRP */ - cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29); - } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ - CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL); - cp[-1] |= ((n << 3) & 0x0007ffe0); - } - break; - case DASM_LABEL_LG: - ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); - break; - case DASM_LABEL_PC: break; - case DASM_IMM: - cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); - break; - case DASM_IMM6: - cp[-1] |= ((n&31) << 19) | ((n&32) << 26); - break; - case DASM_IMM12: - cp[-1] |= (dasm_imm12((unsigned int)n) << 10); - break; - case DASM_IMM13W: - cp[-1] |= (dasm_imm13(n, n) << 10); - break; - case DASM_IMM13X: - cp[-1] |= (dasm_imm13(n, *b++) << 10); - break; - case DASM_IMML: { - int scale = (p[-2] >> 30); - cp[-1] |= (!(n & ((1<>scale) < 4096) ? - ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12); - break; - } - default: *cp++ = ins; break; - } - } - stop: (void)0; - } - } - - if (base + D->codesize != (char *)cp) /* Check for phase errors. */ - return DASM_S_PHASE; - return DASM_S_OK; -} -#undef CK - -/* Get PC label offset. */ -int dasm_getpclabel(Dst_DECL, unsigned int pc) -{ - dasm_State *D = Dst_REF; - if (pc*sizeof(int) < D->pcsize) { - int pos = D->pclabels[pc]; - if (pos < 0) return *DASM_POS2PTR(D, -pos); - if (pos > 0) return -1; /* Undefined. */ - } - return -2; /* Unused or out of range. */ -} - -#ifdef DASM_CHECKS -/* Optional sanity checker to call between isolated encoding steps. */ -int dasm_checkstep(Dst_DECL, int secmatch) -{ - dasm_State *D = Dst_REF; - if (D->status == DASM_S_OK) { - int i; - for (i = 1; i <= 9; i++) { - if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } - D->lglabels[i] = 0; - } - } - if (D->status == DASM_S_OK && secmatch >= 0 && - D->section != &D->sections[secmatch]) - D->status = DASM_S_MATCH_SEC|(D->section-D->sections); - return D->status; -} -#endif - diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua deleted file mode 100644 index 8a5f735d7c..0000000000 --- a/dynasm/dasm_arm64.lua +++ /dev/null @@ -1,1166 +0,0 @@ ------------------------------------------------------------------------------- --- DynASM ARM64 module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------- - --- Module information: -local _info = { - arch = "arm", - description = "DynASM ARM64 module", - version = "1.4.0", - vernum = 10400, - release = "2015-10-18", - author = "Mike Pall", - license = "MIT", -} - --- Exported glue functions for the arch-specific module. -local _M = { _info = _info } - --- Cache library functions. -local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs -local assert, setmetatable, rawget = assert, setmetatable, rawget -local _s = string -local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char -local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub -local concat, sort, insert = table.concat, table.sort, table.insert -local bit = bit or require("bit") -local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift -local ror, tohex = bit.ror, bit.tohex - --- Inherited tables and callbacks. -local g_opt, g_arch -local wline, werror, wfatal, wwarn - --- Action name list. --- CHECK: Keep this in sync with the C code! -local action_names = { - "STOP", "SECTION", "ESC", "REL_EXT", - "ALIGN", "REL_LG", "LABEL_LG", - "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", -} - --- Maximum number of section buffer positions for dasm_put(). --- CHECK: Keep this in sync with the C code! -local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. - --- Action name -> action number. -local map_action = {} -for n,name in ipairs(action_names) do - map_action[name] = n-1 -end - --- Action list buffer. -local actlist = {} - --- Argument list for next dasm_put(). Start with offset 0 into action list. -local actargs = { 0 } - --- Current number of section buffer positions for dasm_put(). -local secpos = 1 - ------------------------------------------------------------------------------- - --- Dump action names and numbers. -local function dumpactions(out) - out:write("DynASM encoding engine action codes:\n") - for n,name in ipairs(action_names) do - local num = map_action[name] - out:write(format(" %-10s %02X %d\n", name, num, num)) - end - out:write("\n") -end - --- Write action list buffer as a huge static C array. -local function writeactions(out, name) - local nn = #actlist - if nn == 0 then nn = 1; actlist[0] = map_action.STOP end - out:write("static const unsigned int ", name, "[", nn, "] = {\n") - for i = 1,nn-1 do - assert(out:write("0x", tohex(actlist[i]), ",\n")) - end - assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) -end - ------------------------------------------------------------------------------- - --- Add word to action list. -local function wputxw(n) - assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") - actlist[#actlist+1] = n -end - --- Add action to list with optional arg. Advance buffer pos, too. -local function waction(action, val, a, num) - local w = assert(map_action[action], "bad action name `"..action.."'") - wputxw(w * 0x10000 + (val or 0)) - if a then actargs[#actargs+1] = a end - if a or num then secpos = secpos + (num or 1) end -end - --- Flush action list (intervening C code or buffer pos overflow). -local function wflush(term) - if #actlist == actargs[1] then return end -- Nothing to flush. - if not term then waction("STOP") end -- Terminate action list. - wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) - actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). - secpos = 1 -- The actionlist offset occupies a buffer position, too. -end - --- Put escaped word. -local function wputw(n) - if n <= 0x000fffff then waction("ESC") end - wputxw(n) -end - --- Reserve position for word. -local function wpos() - local pos = #actlist+1 - actlist[pos] = "" - return pos -end - --- Store word to reserved position. -local function wputpos(pos, n) - assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") - if n <= 0x000fffff then - insert(actlist, pos+1, n) - n = map_action.ESC * 0x10000 - end - actlist[pos] = n -end - ------------------------------------------------------------------------------- - --- Global label name -> global label number. With auto assignment on 1st use. -local next_global = 20 -local map_global = setmetatable({}, { __index = function(t, name) - if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end - local n = next_global - if n > 2047 then werror("too many global labels") end - next_global = n + 1 - t[name] = n - return n -end}) - --- Dump global labels. -local function dumpglobals(out, lvl) - local t = {} - for name, n in pairs(map_global) do t[n] = name end - out:write("Global labels:\n") - for i=20,next_global-1 do - out:write(format(" %s\n", t[i])) - end - out:write("\n") -end - --- Write global label enum. -local function writeglobals(out, prefix) - local t = {} - for name, n in pairs(map_global) do t[n] = name end - out:write("enum {\n") - for i=20,next_global-1 do - out:write(" ", prefix, t[i], ",\n") - end - out:write(" ", prefix, "_MAX\n};\n") -end - --- Write global label names. -local function writeglobalnames(out, name) - local t = {} - for name, n in pairs(map_global) do t[n] = name end - out:write("static const char *const ", name, "[] = {\n") - for i=20,next_global-1 do - out:write(" \"", t[i], "\",\n") - end - out:write(" (const char *)0\n};\n") -end - ------------------------------------------------------------------------------- - --- Extern label name -> extern label number. With auto assignment on 1st use. -local next_extern = 0 -local map_extern_ = {} -local map_extern = setmetatable({}, { __index = function(t, name) - -- No restrictions on the name for now. - local n = next_extern - if n > 2047 then werror("too many extern labels") end - next_extern = n + 1 - t[name] = n - map_extern_[n] = name - return n -end}) - --- Dump extern labels. -local function dumpexterns(out, lvl) - out:write("Extern labels:\n") - for i=0,next_extern-1 do - out:write(format(" %s\n", map_extern_[i])) - end - out:write("\n") -end - --- Write extern label names. -local function writeexternnames(out, name) - out:write("static const char *const ", name, "[] = {\n") - for i=0,next_extern-1 do - out:write(" \"", map_extern_[i], "\",\n") - end - out:write(" (const char *)0\n};\n") -end - ------------------------------------------------------------------------------- - --- Arch-specific maps. - --- Ext. register name -> int. name. -local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", } - --- Int. register name -> ext. name. -local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", } - -local map_type = {} -- Type name -> { ctype, reg } -local ctypenum = 0 -- Type number (for Dt... macros). - --- Reverse defines for registers. -function _M.revdef(s) - return map_reg_rev[s] or s -end - -local map_shift = { lsl = 0, lsr = 1, asr = 2, } - -local map_extend = { - uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3, - sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7, -} - -local map_cond = { - eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, - hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, - hs = 2, lo = 3, -} - ------------------------------------------------------------------------------- - -local parse_reg_type - -local function parse_reg(expr) - if not expr then werror("expected register name") end - local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$") - local tp = map_type[tname or expr] - if tp then - local reg = ovreg or tp.reg - if not reg then - werror("type `"..(tname or expr).."' needs a register override") - end - expr = reg - end - local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$") - if r then - r = tonumber(r) - if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then - if not parse_reg_type then - parse_reg_type = rt - elseif parse_reg_type ~= rt then - werror("register size mismatch") - end - return r, tp - end - end - werror("bad register name `"..expr.."'") -end - -local function parse_reg_base(expr) - if expr == "sp" then return 0x3e0 end - local base, tp = parse_reg(expr) - if parse_reg_type ~= "x" then werror("bad register type") end - parse_reg_type = false - return shl(base, 5), tp -end - -local parse_ctx = {} - -local loadenv = setfenv and function(s) - local code = loadstring(s, "") - if code then setfenv(code, parse_ctx) end - return code -end or function(s) - return load(s, "", nil, parse_ctx) -end - --- Try to parse simple arithmetic, too, since some basic ops are aliases. -local function parse_number(n) - local x = tonumber(n) - if x then return x end - local code = loadenv("return "..n) - if code then - local ok, y = pcall(code) - if ok then return y end - end - return nil -end - -local function parse_imm(imm, bits, shift, scale, signed) - imm = match(imm, "^#(.*)$") - if not imm then werror("expected immediate operand") end - local n = parse_number(imm) - if n then - local m = sar(n, scale) - if shl(m, scale) == n then - if signed then - local s = sar(m, bits-1) - if s == 0 then return shl(m, shift) - elseif s == -1 then return shl(m + shl(1, bits), shift) end - else - if sar(m, bits) == 0 then return shl(m, shift) end - end - end - werror("out of range immediate `"..imm.."'") - else - waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) - return 0 - end -end - -local function parse_imm12(imm) - imm = match(imm, "^#(.*)$") - if not imm then werror("expected immediate operand") end - local n = parse_number(imm) - if n then - if shr(n, 12) == 0 then - return shl(n, 10) - elseif band(n, 0xff000fff) == 0 then - return shr(n, 2) + 0x00400000 - end - werror("out of range immediate `"..imm.."'") - else - waction("IMM12", 0, imm) - return 0 - end -end - -local function parse_imm13(imm) - imm = match(imm, "^#(.*)$") - if not imm then werror("expected immediate operand") end - local n = parse_number(imm) - local r64 = parse_reg_type == "x" - if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then - local inv = false - if band(n, 1) == 1 then n = bit.bnot(n); inv = true end - local t = {} - for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end - local b = table.concat(t) - b = b..(r64 and (inv and "1" or "0"):rep(32) or b) - local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)") - if p0 then - local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a - if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then - local s = band(-2*w, 0x3f) - 1 - if w == 64 then s = s + 0x1000 end - if inv then - return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10) - else - return shl(w-#p0, 16) + shl(s+#p1, 10) - end - end - end - werror("out of range immediate `"..imm.."'") - elseif r64 then - waction("IMM13X", 0, format("(unsigned int)(%s)", imm)) - actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm) - return 0 - else - waction("IMM13W", 0, imm) - return 0 - end -end - -local function parse_imm6(imm) - imm = match(imm, "^#(.*)$") - if not imm then werror("expected immediate operand") end - local n = parse_number(imm) - if n then - if n >= 0 and n <= 63 then - return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0) - end - werror("out of range immediate `"..imm.."'") - else - waction("IMM6", 0, imm) - return 0 - end -end - -local function parse_imm_load(imm, scale) - local n = parse_number(imm) - if n then - local m = sar(n, scale) - if shl(m, scale) == n and m >= 0 and m < 0x1000 then - return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset. - elseif n >= -256 and n < 256 then - return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset. - end - werror("out of range immediate `"..imm.."'") - else - waction("IMML", 0, imm) - return 0 - end -end - -local function parse_fpimm(imm) - imm = match(imm, "^#(.*)$") - if not imm then werror("expected immediate operand") end - local n = parse_number(imm) - if n then - local m, e = math.frexp(n) - local s, e2 = 0, band(e-2, 7) - if m < 0 then m = -m; s = 0x00100000 end - m = m*32-16 - if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then - return s + shl(e2, 17) + shl(m, 13) - end - werror("out of range immediate `"..imm.."'") - else - werror("NYI fpimm action") - end -end - -local function parse_shift(expr) - local s, s2 = match(expr, "^(%S+)%s*(.*)$") - s = map_shift[s] - if not s then werror("expected shift operand") end - return parse_imm(s2, 6, 10, 0, false) + shl(s, 22) -end - -local function parse_lslx16(expr) - local n = match(expr, "^lsl%s*#(%d+)$") - n = tonumber(n) - if not n then werror("expected shift operand") end - if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then - werror("bad shift amount") - end - return shl(n, 17) -end - -local function parse_extend(expr) - local s, s2 = match(expr, "^(%S+)%s*(.*)$") - if s == "lsl" then - s = parse_reg_type == "x" and 3 or 2 - else - s = map_extend[s] - end - if not s then werror("expected extend operand") end - return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13) -end - -local function parse_cond(expr, inv) - local c = map_cond[expr] - if not c then werror("expected condition operand") end - return shl(bit.bxor(c, inv), 12) -end - -local function parse_load(params, nparams, n, op) - if params[n+2] then werror("too many operands") end - local pn, p2 = params[n], params[n+1] - local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") - if not p1 then - if not p2 then - local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") - if reg and tailr ~= "" then - local base, tp = parse_reg_base(reg) - if tp then - waction("IMML", 0, format(tp.ctypefmt, tailr)) - return op + base - end - end - end - werror("expected address operand") - end - local scale = shr(op, 30) - if p2 then - if wb == "!" then werror("bad use of '!'") end - op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400 - elseif wb == "!" then - local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$") - if not p1a then werror("bad use of '!'") end - op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00 - else - local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$") - op = op + parse_reg_base(p1a) - if p2a ~= "" then - local imm = match(p2a, "^,%s*#(.*)$") - if imm then - op = op + parse_imm_load(imm, scale) - else - local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$") - op = op + shl(parse_reg(p2b), 16) + 0x00200800 - if parse_reg_type ~= "x" and parse_reg_type ~= "w" then - werror("bad index register type") - end - if p3b == "" then - if parse_reg_type ~= "x" then werror("bad index register type") end - op = op + 0x6000 - else - if p3s == "" or p3s == "#0" then - elseif p3s == "#"..scale then - op = op + 0x1000 - else - werror("bad scale") - end - if parse_reg_type == "x" then - if p3b == "lsl" and p3s ~= "" then op = op + 0x6000 - elseif p3b == "sxtx" then op = op + 0xe000 - else - werror("bad extend/shift specifier") - end - else - if p3b == "uxtw" then op = op + 0x4000 - elseif p3b == "sxtw" then op = op + 0xc000 - else - werror("bad extend/shift specifier") - end - end - end - end - else - if wb == "!" then werror("bad use of '!'") end - op = op + 0x01000000 - end - end - return op -end - -local function parse_load_pair(params, nparams, n, op) - if params[n+2] then werror("too many operands") end - local pn, p2 = params[n], params[n+1] - local scale = shr(op, 30) == 0 and 2 or 3 - local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") - if not p1 then - if not p2 then - local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") - if reg and tailr ~= "" then - local base, tp = parse_reg_base(reg) - if tp then - waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr)) - return op + base + 0x01000000 - end - end - end - werror("expected address operand") - end - if p2 then - if wb == "!" then werror("bad use of '!'") end - op = op + 0x00800000 - else - local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$") - if p1a then p1, p2 = p1a, p2a else p2 = "#0" end - op = op + (wb == "!" and 0x01800000 or 0x01000000) - end - return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true) -end - -local function parse_label(label, def) - local prefix = sub(label, 1, 2) - -- =>label (pc label reference) - if prefix == "=>" then - return "PC", 0, sub(label, 3) - end - -- ->name (global label reference) - if prefix == "->" then - return "LG", map_global[sub(label, 3)] - end - if def then - -- [1-9] (local label definition) - if match(label, "^[1-9]$") then - return "LG", 10+tonumber(label) - end - else - -- [<>][1-9] (local label reference) - local dir, lnum = match(label, "^([<>])([1-9])$") - if dir then -- Fwd: 1-9, Bkwd: 11-19. - return "LG", lnum + (dir == ">" and 0 or 10) - end - -- extern label (extern label reference) - local extname = match(label, "^extern%s+(%S+)$") - if extname then - return "EXT", map_extern[extname] - end - end - werror("bad label `"..label.."'") -end - -local function branch_type(op) - if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL - elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or - band(op, 0x3b000000) == 0x18000000 then - return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal - elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ - elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR - elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP - else - assert(false, "unknown branch type") - end -end - ------------------------------------------------------------------------------- - -local map_op, op_template - -local function op_alias(opname, f) - return function(params, nparams) - if not params then return "-> "..opname:sub(1, -3) end - f(params, nparams) - op_template(params, map_op[opname], nparams) - end -end - -local function alias_bfx(p) - p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1" -end - -local function alias_bfiz(p) - parse_reg(p[1]) - if parse_reg_type == "w" then - p[3] = "#-("..p[3]:sub(2)..")%32" - p[4] = "#("..p[4]:sub(2)..")-1" - else - p[3] = "#-("..p[3]:sub(2)..")%64" - p[4] = "#("..p[4]:sub(2)..")-1" - end -end - -local alias_lslimm = op_alias("ubfm_4", function(p) - parse_reg(p[1]) - local sh = p[3]:sub(2) - if parse_reg_type == "w" then - p[3] = "#-("..sh..")%32" - p[4] = "#31-("..sh..")" - else - p[3] = "#-("..sh..")%64" - p[4] = "#63-("..sh..")" - end -end) - --- Template strings for ARM instructions. -map_op = { - -- Basic data processing instructions. - add_3 = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx", - add_4 = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX", - adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx", - adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX", - cmn_2 = "2b00001fNMg|3100001fpNIg|ab20601fpNMx", - cmn_3 = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX", - - sub_3 = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx", - sub_4 = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX", - subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx", - subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX", - cmp_2 = "6b00001fNMg|7100001fpNIg|eb20601fpNMx", - cmp_3 = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX", - - neg_2 = "4b0003e0DMg", - neg_3 = "4b0003e0DMSg", - negs_2 = "6b0003e0DMg", - negs_3 = "6b0003e0DMSg", - - adc_3 = "1a000000DNMg", - adcs_3 = "3a000000DNMg", - sbc_3 = "5a000000DNMg", - sbcs_3 = "7a000000DNMg", - ngc_2 = "5a0003e0DMg", - ngcs_2 = "7a0003e0DMg", - - and_3 = "0a000000DNMg|12000000pDNig", - and_4 = "0a000000DNMSg", - orr_3 = "2a000000DNMg|32000000pDNig", - orr_4 = "2a000000DNMSg", - eor_3 = "4a000000DNMg|52000000pDNig", - eor_4 = "4a000000DNMSg", - ands_3 = "6a000000DNMg|72000000DNig", - ands_4 = "6a000000DNMSg", - tst_2 = "6a00001fNMg|7200001fNig", - tst_3 = "6a00001fNMSg", - - bic_3 = "0a200000DNMg", - bic_4 = "0a200000DNMSg", - orn_3 = "2a200000DNMg", - orn_4 = "2a200000DNMSg", - eon_3 = "4a200000DNMg", - eon_4 = "4a200000DNMSg", - bics_3 = "6a200000DNMg", - bics_4 = "6a200000DNMSg", - - movn_2 = "12800000DWg", - movn_3 = "12800000DWRg", - movz_2 = "52800000DWg", - movz_3 = "52800000DWRg", - movk_2 = "72800000DWg", - movk_3 = "72800000DWRg", - - -- TODO: this doesn't cover all valid immediates for mov reg, #imm. - mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg", - mov_3 = "2a0003e0DMSg", - mvn_2 = "2a2003e0DMg", - mvn_3 = "2a2003e0DMSg", - - adr_2 = "10000000DBx", - adrp_2 = "90000000DBx", - - csel_4 = "1a800000DNMCg", - csinc_4 = "1a800400DNMCg", - csinv_4 = "5a800000DNMCg", - csneg_4 = "5a800400DNMCg", - cset_2 = "1a9f07e0Dcg", - csetm_2 = "5a9f03e0Dcg", - cinc_3 = "1a800400DNmcg", - cinv_3 = "5a800000DNmcg", - cneg_3 = "5a800400DNmcg", - - ccmn_4 = "3a400000NMVCg|3a400800N5VCg", - ccmp_4 = "7a400000NMVCg|7a400800N5VCg", - - madd_4 = "1b000000DNMAg", - msub_4 = "1b008000DNMAg", - mul_3 = "1b007c00DNMg", - mneg_3 = "1b00fc00DNMg", - - smaddl_4 = "9b200000DxNMwAx", - smsubl_4 = "9b208000DxNMwAx", - smull_3 = "9b207c00DxNMw", - smnegl_3 = "9b20fc00DxNMw", - smulh_3 = "9b407c00DNMx", - umaddl_4 = "9ba00000DxNMwAx", - umsubl_4 = "9ba08000DxNMwAx", - umull_3 = "9ba07c00DxNMw", - umnegl_3 = "9ba0fc00DxNMw", - umulh_3 = "9bc07c00DNMx", - - udiv_3 = "1ac00800DNMg", - sdiv_3 = "1ac00c00DNMg", - - -- Bit operations. - sbfm_4 = "13000000DN12w|93400000DN12x", - bfm_4 = "33000000DN12w|b3400000DN12x", - ubfm_4 = "53000000DN12w|d3400000DN12x", - extr_4 = "13800000DNM2w|93c00000DNM2x", - - sxtb_2 = "13001c00DNw|93401c00DNx", - sxth_2 = "13003c00DNw|93403c00DNx", - sxtw_2 = "93407c00DxNw", - uxtb_2 = "53001c00DNw", - uxth_2 = "53003c00DNw", - - sbfx_4 = op_alias("sbfm_4", alias_bfx), - bfxil_4 = op_alias("bfm_4", alias_bfx), - ubfx_4 = op_alias("ubfm_4", alias_bfx), - sbfiz_4 = op_alias("sbfm_4", alias_bfiz), - bfi_4 = op_alias("bfm_4", alias_bfiz), - ubfiz_4 = op_alias("ubfm_4", alias_bfiz), - - lsl_3 = function(params, nparams) - if params and params[3]:byte() == 35 then - return alias_lslimm(params, nparams) - else - return op_template(params, "1ac02000DNMg", nparams) - end - end, - lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x", - asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x", - ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x", - - clz_2 = "5ac01000DNg", - cls_2 = "5ac01400DNg", - rbit_2 = "5ac00000DNg", - rev_2 = "5ac00800DNw|dac00c00DNx", - rev16_2 = "5ac00400DNg", - rev32_2 = "dac00800DNx", - - -- Loads and stores. - ["strb_*"] = "38000000DwL", - ["ldrb_*"] = "38400000DwL", - ["ldrsb_*"] = "38c00000DwL|38800000DxL", - ["strh_*"] = "78000000DwL", - ["ldrh_*"] = "78400000DwL", - ["ldrsh_*"] = "78c00000DwL|78800000DxL", - ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL", - ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL", - ["ldrsw_*"] = "98000000DxB|b8800000DxL", - -- NOTE: ldur etc. are handled by ldr et al. - - ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP", - ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP", - ["ldpsw_*"] = "68400000DAxP", - - -- Branches. - b_1 = "14000000B", - bl_1 = "94000000B", - blr_1 = "d63f0000Nx", - br_1 = "d61f0000Nx", - ret_0 = "d65f03c0", - ret_1 = "d65f0000Nx", - -- b.cond is added below. - cbz_2 = "34000000DBg", - cbnz_2 = "35000000DBg", - tbz_3 = "36000000DTBw|36000000DTBx", - tbnz_3 = "37000000DTBw|37000000DTBx", - - -- Miscellaneous instructions. - -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr - -- TODO: sys, sysl, ic, dc, at, tlbi - -- TODO: hint, yield, wfe, wfi, sev, sevl - -- TODO: clrex, dsb, dmb, isb - nop_0 = "d503201f", - brk_0 = "d4200000", - brk_1 = "d4200000W", - - -- Floating point instructions. - fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf", - fabs_2 = "1e20c000DNf", - fneg_2 = "1e214000DNf", - fsqrt_2 = "1e21c000DNf", - - fcvt_2 = "1e22c000DdNs|1e624000DsNd", - - -- TODO: half-precision and fixed-point conversions. - fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd", - fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd", - fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd", - fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd", - fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd", - fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd", - fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd", - fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd", - fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd", - fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd", - - scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx", - ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx", - - frintn_2 = "1e244000DNf", - frintp_2 = "1e24c000DNf", - frintm_2 = "1e254000DNf", - frintz_2 = "1e25c000DNf", - frinta_2 = "1e264000DNf", - frintx_2 = "1e274000DNf", - frinti_2 = "1e27c000DNf", - - fadd_3 = "1e202800DNMf", - fsub_3 = "1e203800DNMf", - fmul_3 = "1e200800DNMf", - fnmul_3 = "1e208800DNMf", - fdiv_3 = "1e201800DNMf", - - fmadd_4 = "1f000000DNMAf", - fmsub_4 = "1f008000DNMAf", - fnmadd_4 = "1f200000DNMAf", - fnmsub_4 = "1f208000DNMAf", - - fmax_3 = "1e204800DNMf", - fmaxnm_3 = "1e206800DNMf", - fmin_3 = "1e205800DNMf", - fminnm_3 = "1e207800DNMf", - - fcmp_2 = "1e202000NMf|1e202008NZf", - fcmpe_2 = "1e202010NMf|1e202018NZf", - - fccmp_4 = "1e200400NMVCf", - fccmpe_4 = "1e200410NMVCf", - - fcsel_4 = "1e200c00DNMCf", - - -- TODO: crc32*, aes*, sha*, pmull - -- TODO: SIMD instructions. -} - -for cond,c in pairs(map_cond) do - map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B" -end - ------------------------------------------------------------------------------- - --- Handle opcodes defined with template strings. -local function parse_template(params, template, nparams, pos) - local op = tonumber(sub(template, 1, 8), 16) - local n = 1 - local rtt = {} - - parse_reg_type = false - - -- Process each character. - for p in gmatch(sub(template, 9), ".") do - local q = params[n] - if p == "D" then - op = op + parse_reg(q); n = n + 1 - elseif p == "N" then - op = op + shl(parse_reg(q), 5); n = n + 1 - elseif p == "M" then - op = op + shl(parse_reg(q), 16); n = n + 1 - elseif p == "A" then - op = op + shl(parse_reg(q), 10); n = n + 1 - elseif p == "m" then - op = op + shl(parse_reg(params[n-1]), 16) - - elseif p == "p" then - if q == "sp" then params[n] = "@x31" end - elseif p == "g" then - if parse_reg_type == "x" then - op = op + 0x80000000 - elseif parse_reg_type ~= "w" then - werror("bad register type") - end - parse_reg_type = false - elseif p == "f" then - if parse_reg_type == "d" then - op = op + 0x00400000 - elseif parse_reg_type ~= "s" then - werror("bad register type") - end - parse_reg_type = false - elseif p == "x" or p == "w" or p == "d" or p == "s" then - if parse_reg_type ~= p then - werror("register size mismatch") - end - parse_reg_type = false - - elseif p == "L" then - op = parse_load(params, nparams, n, op) - elseif p == "P" then - op = parse_load_pair(params, nparams, n, op) - - elseif p == "B" then - local mode, v, s = parse_label(q, false); n = n + 1 - local m = branch_type(op) - waction("REL_"..mode, v+m, s, 1) - - elseif p == "I" then - op = op + parse_imm12(q); n = n + 1 - elseif p == "i" then - op = op + parse_imm13(q); n = n + 1 - elseif p == "W" then - op = op + parse_imm(q, 16, 5, 0, false); n = n + 1 - elseif p == "T" then - op = op + parse_imm6(q); n = n + 1 - elseif p == "1" then - op = op + parse_imm(q, 6, 16, 0, false); n = n + 1 - elseif p == "2" then - op = op + parse_imm(q, 6, 10, 0, false); n = n + 1 - elseif p == "5" then - op = op + parse_imm(q, 5, 16, 0, false); n = n + 1 - elseif p == "V" then - op = op + parse_imm(q, 4, 0, 0, false); n = n + 1 - elseif p == "F" then - op = op + parse_fpimm(q); n = n + 1 - elseif p == "Z" then - if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end - n = n + 1 - - elseif p == "S" then - op = op + parse_shift(q); n = n + 1 - elseif p == "X" then - op = op + parse_extend(q); n = n + 1 - elseif p == "R" then - op = op + parse_lslx16(q); n = n + 1 - elseif p == "C" then - op = op + parse_cond(q, 0); n = n + 1 - elseif p == "c" then - op = op + parse_cond(q, 1); n = n + 1 - - else - assert(false) - end - end - wputpos(pos, op) -end - -function op_template(params, template, nparams) - if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end - - -- Limit number of section buffer positions used by a single dasm_put(). - -- A single opcode needs a maximum of 3 positions. - if secpos+3 > maxsecpos then wflush() end - local pos = wpos() - local lpos, apos, spos = #actlist, #actargs, secpos - - local ok, err - for t in gmatch(template, "[^|]+") do - ok, err = pcall(parse_template, params, t, nparams, pos) - if ok then return end - secpos = spos - actlist[lpos+1] = nil - actlist[lpos+2] = nil - actlist[lpos+3] = nil - actargs[apos+1] = nil - actargs[apos+2] = nil - actargs[apos+3] = nil - end - error(err, 0) -end - -map_op[".template__"] = op_template - ------------------------------------------------------------------------------- - --- Pseudo-opcode to mark the position where the action list is to be emitted. -map_op[".actionlist_1"] = function(params) - if not params then return "cvar" end - local name = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeactions(out, name) end) -end - --- Pseudo-opcode to mark the position where the global enum is to be emitted. -map_op[".globals_1"] = function(params) - if not params then return "prefix" end - local prefix = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeglobals(out, prefix) end) -end - --- Pseudo-opcode to mark the position where the global names are to be emitted. -map_op[".globalnames_1"] = function(params) - if not params then return "cvar" end - local name = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeglobalnames(out, name) end) -end - --- Pseudo-opcode to mark the position where the extern names are to be emitted. -map_op[".externnames_1"] = function(params) - if not params then return "cvar" end - local name = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeexternnames(out, name) end) -end - ------------------------------------------------------------------------------- - --- Label pseudo-opcode (converted from trailing colon form). -map_op[".label_1"] = function(params) - if not params then return "[1-9] | ->global | =>pcexpr" end - if secpos+1 > maxsecpos then wflush() end - local mode, n, s = parse_label(params[1], true) - if mode == "EXT" then werror("bad label definition") end - waction("LABEL_"..mode, n, s, 1) -end - ------------------------------------------------------------------------------- - --- Pseudo-opcodes for data storage. -map_op[".long_*"] = function(params) - if not params then return "imm..." end - for _,p in ipairs(params) do - local n = tonumber(p) - if not n then werror("bad immediate `"..p.."'") end - if n < 0 then n = n + 2^32 end - wputw(n) - if secpos+2 > maxsecpos then wflush() end - end -end - --- Alignment pseudo-opcode. -map_op[".align_1"] = function(params) - if not params then return "numpow2" end - if secpos+1 > maxsecpos then wflush() end - local align = tonumber(params[1]) - if align then - local x = align - -- Must be a power of 2 in the range (2 ... 256). - for i=1,8 do - x = x / 2 - if x == 1 then - waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. - return - end - end - end - werror("bad alignment") -end - ------------------------------------------------------------------------------- - --- Pseudo-opcode for (primitive) type definitions (map to C types). -map_op[".type_3"] = function(params, nparams) - if not params then - return nparams == 2 and "name, ctype" or "name, ctype, reg" - end - local name, ctype, reg = params[1], params[2], params[3] - if not match(name, "^[%a_][%w_]*$") then - werror("bad type name `"..name.."'") - end - local tp = map_type[name] - if tp then - werror("duplicate type `"..name.."'") - end - -- Add #type to defines. A bit unclean to put it in map_archdef. - map_archdef["#"..name] = "sizeof("..ctype..")" - -- Add new type and emit shortcut define. - local num = ctypenum + 1 - map_type[name] = { - ctype = ctype, - ctypefmt = format("Dt%X(%%s)", num), - reg = reg, - } - wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) - ctypenum = num -end -map_op[".type_2"] = map_op[".type_3"] - --- Dump type definitions. -local function dumptypes(out, lvl) - local t = {} - for name in pairs(map_type) do t[#t+1] = name end - sort(t) - out:write("Type definitions:\n") - for _,name in ipairs(t) do - local tp = map_type[name] - local reg = tp.reg or "" - out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) - end - out:write("\n") -end - ------------------------------------------------------------------------------- - --- Set the current section. -function _M.section(num) - waction("SECTION", num) - wflush(true) -- SECTION is a terminal action. -end - ------------------------------------------------------------------------------- - --- Dump architecture description. -function _M.dumparch(out) - out:write(format("DynASM %s version %s, released %s\n\n", - _info.arch, _info.version, _info.release)) - dumpactions(out) -end - --- Dump all user defined elements. -function _M.dumpdef(out, lvl) - dumptypes(out, lvl) - dumpglobals(out, lvl) - dumpexterns(out, lvl) -end - ------------------------------------------------------------------------------- - --- Pass callbacks from/to the DynASM core. -function _M.passcb(wl, we, wf, ww) - wline, werror, wfatal, wwarn = wl, we, wf, ww - return wflush -end - --- Setup the arch-specific module. -function _M.setup(arch, opt) - g_arch, g_opt = arch, opt -end - --- Merge the core maps and the arch-specific maps. -function _M.mergemaps(map_coreop, map_def) - setmetatable(map_op, { __index = map_coreop }) - setmetatable(map_def, { __index = map_archdef }) - return map_op, map_def -end - -return _M - ------------------------------------------------------------------------------- - diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h deleted file mode 100644 index 4b49fd8c7c..0000000000 --- a/dynasm/dasm_mips.h +++ /dev/null @@ -1,419 +0,0 @@ -/* -** DynASM MIPS encoding engine. -** Copyright (C) 2005-2017 Mike Pall. All rights reserved. -** Released under the MIT license. See dynasm.lua for full copyright notice. -*/ - -#include -#include -#include -#include - -#define DASM_ARCH "mips" - -#ifndef DASM_EXTERN -#define DASM_EXTERN(a,b,c,d) 0 -#endif - -/* Action definitions. */ -enum { - DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, - /* The following actions need a buffer position. */ - DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, - /* The following actions also have an argument. */ - DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS, - DASM__MAX -}; - -/* Maximum number of section buffer positions for a single dasm_put() call. */ -#define DASM_MAXSECPOS 25 - -/* DynASM encoder status codes. Action list offset or number are or'ed in. */ -#define DASM_S_OK 0x00000000 -#define DASM_S_NOMEM 0x01000000 -#define DASM_S_PHASE 0x02000000 -#define DASM_S_MATCH_SEC 0x03000000 -#define DASM_S_RANGE_I 0x11000000 -#define DASM_S_RANGE_SEC 0x12000000 -#define DASM_S_RANGE_LG 0x13000000 -#define DASM_S_RANGE_PC 0x14000000 -#define DASM_S_RANGE_REL 0x15000000 -#define DASM_S_UNDEF_LG 0x21000000 -#define DASM_S_UNDEF_PC 0x22000000 - -/* Macros to convert positions (8 bit section + 24 bit index). */ -#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) -#define DASM_POS2BIAS(pos) ((pos)&0xff000000) -#define DASM_SEC2POS(sec) ((sec)<<24) -#define DASM_POS2SEC(pos) ((pos)>>24) -#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) - -/* Action list type. */ -typedef const unsigned int *dasm_ActList; - -/* Per-section structure. */ -typedef struct dasm_Section { - int *rbuf; /* Biased buffer pointer (negative section bias). */ - int *buf; /* True buffer pointer. */ - size_t bsize; /* Buffer size in bytes. */ - int pos; /* Biased buffer position. */ - int epos; /* End of biased buffer position - max single put. */ - int ofs; /* Byte offset into section. */ -} dasm_Section; - -/* Core structure holding the DynASM encoding state. */ -struct dasm_State { - size_t psize; /* Allocated size of this structure. */ - dasm_ActList actionlist; /* Current actionlist pointer. */ - int *lglabels; /* Local/global chain/pos ptrs. */ - size_t lgsize; - int *pclabels; /* PC label chains/pos ptrs. */ - size_t pcsize; - void **globals; /* Array of globals (bias -10). */ - dasm_Section *section; /* Pointer to active section. */ - size_t codesize; /* Total size of all code sections. */ - int maxsection; /* 0 <= sectionidx < maxsection. */ - int status; /* Status code. */ - dasm_Section sections[1]; /* All sections. Alloc-extended. */ -}; - -/* The size of the core structure depends on the max. number of sections. */ -#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) - - -/* Initialize DynASM state. */ -void dasm_init(Dst_DECL, int maxsection) -{ - dasm_State *D; - size_t psz = 0; - int i; - Dst_REF = NULL; - DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); - D = Dst_REF; - D->psize = psz; - D->lglabels = NULL; - D->lgsize = 0; - D->pclabels = NULL; - D->pcsize = 0; - D->globals = NULL; - D->maxsection = maxsection; - for (i = 0; i < maxsection; i++) { - D->sections[i].buf = NULL; /* Need this for pass3. */ - D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); - D->sections[i].bsize = 0; - D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ - } -} - -/* Free DynASM state. */ -void dasm_free(Dst_DECL) -{ - dasm_State *D = Dst_REF; - int i; - for (i = 0; i < D->maxsection; i++) - if (D->sections[i].buf) - DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); - if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); - if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); - DASM_M_FREE(Dst, D, D->psize); -} - -/* Setup global label array. Must be called before dasm_setup(). */ -void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) -{ - dasm_State *D = Dst_REF; - D->globals = gl - 10; /* Negative bias to compensate for locals. */ - DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); -} - -/* Grow PC label array. Can be called after dasm_setup(), too. */ -void dasm_growpc(Dst_DECL, unsigned int maxpc) -{ - dasm_State *D = Dst_REF; - size_t osz = D->pcsize; - DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); - memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); -} - -/* Setup encoder. */ -void dasm_setup(Dst_DECL, const void *actionlist) -{ - dasm_State *D = Dst_REF; - int i; - D->actionlist = (dasm_ActList)actionlist; - D->status = DASM_S_OK; - D->section = &D->sections[0]; - memset((void *)D->lglabels, 0, D->lgsize); - if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); - for (i = 0; i < D->maxsection; i++) { - D->sections[i].pos = DASM_SEC2POS(i); - D->sections[i].ofs = 0; - } -} - - -#ifdef DASM_CHECKS -#define CK(x, st) \ - do { if (!(x)) { \ - D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) -#define CKPL(kind, st) \ - do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ - D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) -#else -#define CK(x, st) ((void)0) -#define CKPL(kind, st) ((void)0) -#endif - -/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ -void dasm_put(Dst_DECL, int start, ...) -{ - va_list ap; - dasm_State *D = Dst_REF; - dasm_ActList p = D->actionlist + start; - dasm_Section *sec = D->section; - int pos = sec->pos, ofs = sec->ofs; - int *b; - - if (pos >= sec->epos) { - DASM_M_GROW(Dst, int, sec->buf, sec->bsize, - sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); - sec->rbuf = sec->buf - DASM_POS2BIAS(pos); - sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); - } - - b = sec->rbuf; - b[pos++] = start; - - va_start(ap, start); - while (1) { - unsigned int ins = *p++; - unsigned int action = (ins >> 16) - 0xff00; - if (action >= DASM__MAX) { - ofs += 4; - } else { - int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; - switch (action) { - case DASM_STOP: goto stop; - case DASM_SECTION: - n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); - D->section = &D->sections[n]; goto stop; - case DASM_ESC: p++; ofs += 4; break; - case DASM_REL_EXT: break; - case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; - case DASM_REL_LG: - n = (ins & 2047) - 10; pl = D->lglabels + n; - /* Bkwd rel or global. */ - if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } - pl += 10; n = *pl; - if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ - goto linkrel; - case DASM_REL_PC: - pl = D->pclabels + n; CKPL(pc, PC); - putrel: - n = *pl; - if (n < 0) { /* Label exists. Get label pos and store it. */ - b[pos] = -n; - } else { - linkrel: - b[pos] = n; /* Else link to rel chain, anchored at label. */ - *pl = pos; - } - pos++; - break; - case DASM_LABEL_LG: - pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; - case DASM_LABEL_PC: - pl = D->pclabels + n; CKPL(pc, PC); - putlabel: - n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ - while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; - } - *pl = -pos; /* Label exists now. */ - b[pos++] = ofs; /* Store pass1 offset estimate. */ - break; - case DASM_IMM: case DASM_IMMS: -#ifdef DASM_CHECKS - CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); -#endif - n >>= ((ins>>10)&31); -#ifdef DASM_CHECKS - if (ins & 0x8000) - CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); - else - CK((n>>((ins>>5)&31)) == 0, RANGE_I); -#endif - b[pos++] = n; - break; - } - } - } -stop: - va_end(ap); - sec->pos = pos; - sec->ofs = ofs; -} -#undef CK - -/* Pass 2: Link sections, shrink aligns, fix label offsets. */ -int dasm_link(Dst_DECL, size_t *szp) -{ - dasm_State *D = Dst_REF; - int secnum; - int ofs = 0; - -#ifdef DASM_CHECKS - *szp = 0; - if (D->status != DASM_S_OK) return D->status; - { - int pc; - for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) - if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; - } -#endif - - { /* Handle globals not defined in this translation unit. */ - int idx; - for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { - int n = D->lglabels[idx]; - /* Undefined label: Collapse rel chain and replace with marker (< 0). */ - while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } - } - } - - /* Combine all code sections. No support for data sections (yet). */ - for (secnum = 0; secnum < D->maxsection; secnum++) { - dasm_Section *sec = D->sections + secnum; - int *b = sec->rbuf; - int pos = DASM_SEC2POS(secnum); - int lastpos = sec->pos; - - while (pos != lastpos) { - dasm_ActList p = D->actionlist + b[pos++]; - while (1) { - unsigned int ins = *p++; - unsigned int action = (ins >> 16) - 0xff00; - switch (action) { - case DASM_STOP: case DASM_SECTION: goto stop; - case DASM_ESC: p++; break; - case DASM_REL_EXT: break; - case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; - case DASM_REL_LG: case DASM_REL_PC: pos++; break; - case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; - case DASM_IMM: case DASM_IMMS: pos++; break; - } - } - stop: (void)0; - } - ofs += sec->ofs; /* Next section starts right after current section. */ - } - - D->codesize = ofs; /* Total size of all code sections */ - *szp = ofs; - return DASM_S_OK; -} - -#ifdef DASM_CHECKS -#define CK(x, st) \ - do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) -#else -#define CK(x, st) ((void)0) -#endif - -/* Pass 3: Encode sections. */ -int dasm_encode(Dst_DECL, void *buffer) -{ - dasm_State *D = Dst_REF; - char *base = (char *)buffer; - unsigned int *cp = (unsigned int *)buffer; - int secnum; - - /* Encode all code sections. No support for data sections (yet). */ - for (secnum = 0; secnum < D->maxsection; secnum++) { - dasm_Section *sec = D->sections + secnum; - int *b = sec->buf; - int *endb = sec->rbuf + sec->pos; - - while (b != endb) { - dasm_ActList p = D->actionlist + *b++; - while (1) { - unsigned int ins = *p++; - unsigned int action = (ins >> 16) - 0xff00; - int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; - switch (action) { - case DASM_STOP: case DASM_SECTION: goto stop; - case DASM_ESC: *cp++ = *p++; break; - case DASM_REL_EXT: - n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1); - goto patchrel; - case DASM_ALIGN: - ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; - break; - case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); - case DASM_REL_PC: - CK(n >= 0, UNDEF_PC); - n = *DASM_POS2PTR(D, n); - if (ins & 2048) - n = n - (int)((char *)cp - base); - else - n = (n + (int)(size_t)base) & 0x0fffffff; - patchrel: - CK((n & 3) == 0 && - ((n + ((ins & 2048) ? 0x00020000 : 0)) >> - ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL); - cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff)); - break; - case DASM_LABEL_LG: - ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); - break; - case DASM_LABEL_PC: break; - case DASM_IMMS: - cp[-1] |= ((n>>3) & 4); n &= 0x1f; - /* fallthrough */ - case DASM_IMM: - cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); - break; - default: *cp++ = ins; break; - } - } - stop: (void)0; - } - } - - if (base + D->codesize != (char *)cp) /* Check for phase errors. */ - return DASM_S_PHASE; - return DASM_S_OK; -} -#undef CK - -/* Get PC label offset. */ -int dasm_getpclabel(Dst_DECL, unsigned int pc) -{ - dasm_State *D = Dst_REF; - if (pc*sizeof(int) < D->pcsize) { - int pos = D->pclabels[pc]; - if (pos < 0) return *DASM_POS2PTR(D, -pos); - if (pos > 0) return -1; /* Undefined. */ - } - return -2; /* Unused or out of range. */ -} - -#ifdef DASM_CHECKS -/* Optional sanity checker to call between isolated encoding steps. */ -int dasm_checkstep(Dst_DECL, int secmatch) -{ - dasm_State *D = Dst_REF; - if (D->status == DASM_S_OK) { - int i; - for (i = 1; i <= 9; i++) { - if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } - D->lglabels[i] = 0; - } - } - if (D->status == DASM_S_OK && secmatch >= 0 && - D->section != &D->sections[secmatch]) - D->status = DASM_S_MATCH_SEC|(D->section-D->sections); - return D->status; -} -#endif - diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua deleted file mode 100644 index 78a4e34a09..0000000000 --- a/dynasm/dasm_mips.lua +++ /dev/null @@ -1,1008 +0,0 @@ ------------------------------------------------------------------------------- --- DynASM MIPS32/MIPS64 module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------- - -local mips64 = mips64 - --- Module information: -local _info = { - arch = mips64 and "mips64" or "mips", - description = "DynASM MIPS32/MIPS64 module", - version = "1.4.0", - vernum = 10400, - release = "2016-05-24", - author = "Mike Pall", - license = "MIT", -} - --- Exported glue functions for the arch-specific module. -local _M = { _info = _info } - --- Cache library functions. -local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs -local assert, setmetatable = assert, setmetatable -local _s = string -local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char -local match, gmatch = _s.match, _s.gmatch -local concat, sort = table.concat, table.sort -local bit = bit or require("bit") -local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift -local tohex = bit.tohex - --- Inherited tables and callbacks. -local g_opt, g_arch -local wline, werror, wfatal, wwarn - --- Action name list. --- CHECK: Keep this in sync with the C code! -local action_names = { - "STOP", "SECTION", "ESC", "REL_EXT", - "ALIGN", "REL_LG", "LABEL_LG", - "REL_PC", "LABEL_PC", "IMM", "IMMS", -} - --- Maximum number of section buffer positions for dasm_put(). --- CHECK: Keep this in sync with the C code! -local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. - --- Action name -> action number. -local map_action = {} -for n,name in ipairs(action_names) do - map_action[name] = n-1 -end - --- Action list buffer. -local actlist = {} - --- Argument list for next dasm_put(). Start with offset 0 into action list. -local actargs = { 0 } - --- Current number of section buffer positions for dasm_put(). -local secpos = 1 - ------------------------------------------------------------------------------- - --- Dump action names and numbers. -local function dumpactions(out) - out:write("DynASM encoding engine action codes:\n") - for n,name in ipairs(action_names) do - local num = map_action[name] - out:write(format(" %-10s %02X %d\n", name, num, num)) - end - out:write("\n") -end - --- Write action list buffer as a huge static C array. -local function writeactions(out, name) - local nn = #actlist - if nn == 0 then nn = 1; actlist[0] = map_action.STOP end - out:write("static const unsigned int ", name, "[", nn, "] = {\n") - for i = 1,nn-1 do - assert(out:write("0x", tohex(actlist[i]), ",\n")) - end - assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) -end - ------------------------------------------------------------------------------- - --- Add word to action list. -local function wputxw(n) - assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") - actlist[#actlist+1] = n -end - --- Add action to list with optional arg. Advance buffer pos, too. -local function waction(action, val, a, num) - local w = assert(map_action[action], "bad action name `"..action.."'") - wputxw(0xff000000 + w * 0x10000 + (val or 0)) - if a then actargs[#actargs+1] = a end - if a or num then secpos = secpos + (num or 1) end -end - --- Flush action list (intervening C code or buffer pos overflow). -local function wflush(term) - if #actlist == actargs[1] then return end -- Nothing to flush. - if not term then waction("STOP") end -- Terminate action list. - wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) - actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). - secpos = 1 -- The actionlist offset occupies a buffer position, too. -end - --- Put escaped word. -local function wputw(n) - if n >= 0xff000000 then waction("ESC") end - wputxw(n) -end - --- Reserve position for word. -local function wpos() - local pos = #actlist+1 - actlist[pos] = "" - return pos -end - --- Store word to reserved position. -local function wputpos(pos, n) - assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") - actlist[pos] = n -end - ------------------------------------------------------------------------------- - --- Global label name -> global label number. With auto assignment on 1st use. -local next_global = 20 -local map_global = setmetatable({}, { __index = function(t, name) - if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end - local n = next_global - if n > 2047 then werror("too many global labels") end - next_global = n + 1 - t[name] = n - return n -end}) - --- Dump global labels. -local function dumpglobals(out, lvl) - local t = {} - for name, n in pairs(map_global) do t[n] = name end - out:write("Global labels:\n") - for i=20,next_global-1 do - out:write(format(" %s\n", t[i])) - end - out:write("\n") -end - --- Write global label enum. -local function writeglobals(out, prefix) - local t = {} - for name, n in pairs(map_global) do t[n] = name end - out:write("enum {\n") - for i=20,next_global-1 do - out:write(" ", prefix, t[i], ",\n") - end - out:write(" ", prefix, "_MAX\n};\n") -end - --- Write global label names. -local function writeglobalnames(out, name) - local t = {} - for name, n in pairs(map_global) do t[n] = name end - out:write("static const char *const ", name, "[] = {\n") - for i=20,next_global-1 do - out:write(" \"", t[i], "\",\n") - end - out:write(" (const char *)0\n};\n") -end - ------------------------------------------------------------------------------- - --- Extern label name -> extern label number. With auto assignment on 1st use. -local next_extern = 0 -local map_extern_ = {} -local map_extern = setmetatable({}, { __index = function(t, name) - -- No restrictions on the name for now. - local n = next_extern - if n > 2047 then werror("too many extern labels") end - next_extern = n + 1 - t[name] = n - map_extern_[n] = name - return n -end}) - --- Dump extern labels. -local function dumpexterns(out, lvl) - out:write("Extern labels:\n") - for i=0,next_extern-1 do - out:write(format(" %s\n", map_extern_[i])) - end - out:write("\n") -end - --- Write extern label names. -local function writeexternnames(out, name) - out:write("static const char *const ", name, "[] = {\n") - for i=0,next_extern-1 do - out:write(" \"", map_extern_[i], "\",\n") - end - out:write(" (const char *)0\n};\n") -end - ------------------------------------------------------------------------------- - --- Arch-specific maps. -local map_archdef = { sp="r29", ra="r31" } -- Ext. register name -> int. name. - -local map_type = {} -- Type name -> { ctype, reg } -local ctypenum = 0 -- Type number (for Dt... macros). - --- Reverse defines for registers. -function _M.revdef(s) - if s == "r29" then return "sp" - elseif s == "r31" then return "ra" end - return s -end - ------------------------------------------------------------------------------- - --- Template strings for MIPS instructions. -local map_op = { - -- First-level opcodes. - j_1 = "08000000J", - jal_1 = "0c000000J", - b_1 = "10000000B", - beqz_2 = "10000000SB", - beq_3 = "10000000STB", - bnez_2 = "14000000SB", - bne_3 = "14000000STB", - blez_2 = "18000000SB", - bgtz_2 = "1c000000SB", - addi_3 = "20000000TSI", - li_2 = "24000000TI", - addiu_3 = "24000000TSI", - slti_3 = "28000000TSI", - sltiu_3 = "2c000000TSI", - andi_3 = "30000000TSU", - lu_2 = "34000000TU", - ori_3 = "34000000TSU", - xori_3 = "38000000TSU", - lui_2 = "3c000000TU", - beqzl_2 = "50000000SB", - beql_3 = "50000000STB", - bnezl_2 = "54000000SB", - bnel_3 = "54000000STB", - blezl_2 = "58000000SB", - bgtzl_2 = "5c000000SB", - daddi_3 = mips64 and "60000000TSI", - daddiu_3 = mips64 and "64000000TSI", - ldl_2 = mips64 and "68000000TO", - ldr_2 = mips64 and "6c000000TO", - lb_2 = "80000000TO", - lh_2 = "84000000TO", - lwl_2 = "88000000TO", - lw_2 = "8c000000TO", - lbu_2 = "90000000TO", - lhu_2 = "94000000TO", - lwr_2 = "98000000TO", - lwu_2 = mips64 and "9c000000TO", - sb_2 = "a0000000TO", - sh_2 = "a4000000TO", - swl_2 = "a8000000TO", - sw_2 = "ac000000TO", - sdl_2 = mips64 and "b0000000TO", - sdr_2 = mips64 and "b1000000TO", - swr_2 = "b8000000TO", - cache_2 = "bc000000NO", - ll_2 = "c0000000TO", - lwc1_2 = "c4000000HO", - pref_2 = "cc000000NO", - ldc1_2 = "d4000000HO", - ld_2 = mips64 and "dc000000TO", - sc_2 = "e0000000TO", - swc1_2 = "e4000000HO", - scd_2 = mips64 and "f0000000TO", - sdc1_2 = "f4000000HO", - sd_2 = mips64 and "fc000000TO", - - -- Opcode SPECIAL. - nop_0 = "00000000", - sll_3 = "00000000DTA", - sextw_2 = "00000000DT", - movf_2 = "00000001DS", - movf_3 = "00000001DSC", - movt_2 = "00010001DS", - movt_3 = "00010001DSC", - srl_3 = "00000002DTA", - rotr_3 = "00200002DTA", - sra_3 = "00000003DTA", - sllv_3 = "00000004DTS", - srlv_3 = "00000006DTS", - rotrv_3 = "00000046DTS", - drotrv_3 = mips64 and "00000056DTS", - srav_3 = "00000007DTS", - jr_1 = "00000008S", - jalr_1 = "0000f809S", - jalr_2 = "00000009DS", - movz_3 = "0000000aDST", - movn_3 = "0000000bDST", - syscall_0 = "0000000c", - syscall_1 = "0000000cY", - break_0 = "0000000d", - break_1 = "0000000dY", - sync_0 = "0000000f", - mfhi_1 = "00000010D", - mthi_1 = "00000011S", - mflo_1 = "00000012D", - mtlo_1 = "00000013S", - dsllv_3 = mips64 and "00000014DTS", - dsrlv_3 = mips64 and "00000016DTS", - dsrav_3 = mips64 and "00000017DTS", - mult_2 = "00000018ST", - multu_2 = "00000019ST", - div_2 = "0000001aST", - divu_2 = "0000001bST", - dmult_2 = mips64 and "0000001cST", - dmultu_2 = mips64 and "0000001dST", - ddiv_2 = mips64 and "0000001eST", - ddivu_2 = mips64 and "0000001fST", - add_3 = "00000020DST", - move_2 = mips64 and "00000025DS" or "00000021DS", - addu_3 = "00000021DST", - sub_3 = "00000022DST", - negu_2 = mips64 and "0000002fDT" or "00000023DT", - subu_3 = "00000023DST", - and_3 = "00000024DST", - or_3 = "00000025DST", - xor_3 = "00000026DST", - not_2 = "00000027DS", - nor_3 = "00000027DST", - slt_3 = "0000002aDST", - sltu_3 = "0000002bDST", - dadd_3 = mips64 and "0000002cDST", - daddu_3 = mips64 and "0000002dDST", - dsub_3 = mips64 and "0000002eDST", - dsubu_3 = mips64 and "0000002fDST", - tge_2 = "00000030ST", - tge_3 = "00000030STZ", - tgeu_2 = "00000031ST", - tgeu_3 = "00000031STZ", - tlt_2 = "00000032ST", - tlt_3 = "00000032STZ", - tltu_2 = "00000033ST", - tltu_3 = "00000033STZ", - teq_2 = "00000034ST", - teq_3 = "00000034STZ", - tne_2 = "00000036ST", - tne_3 = "00000036STZ", - dsll_3 = mips64 and "00000038DTa", - dsrl_3 = mips64 and "0000003aDTa", - drotr_3 = mips64 and "0020003aDTa", - dsra_3 = mips64 and "0000003bDTa", - dsll32_3 = mips64 and "0000003cDTA", - dsrl32_3 = mips64 and "0000003eDTA", - drotr32_3 = mips64 and "0020003eDTA", - dsra32_3 = mips64 and "0000003fDTA", - - -- Opcode REGIMM. - bltz_2 = "04000000SB", - bgez_2 = "04010000SB", - bltzl_2 = "04020000SB", - bgezl_2 = "04030000SB", - tgei_2 = "04080000SI", - tgeiu_2 = "04090000SI", - tlti_2 = "040a0000SI", - tltiu_2 = "040b0000SI", - teqi_2 = "040c0000SI", - tnei_2 = "040e0000SI", - bltzal_2 = "04100000SB", - bal_1 = "04110000B", - bgezal_2 = "04110000SB", - bltzall_2 = "04120000SB", - bgezall_2 = "04130000SB", - synci_1 = "041f0000O", - - -- Opcode SPECIAL2. - madd_2 = "70000000ST", - maddu_2 = "70000001ST", - mul_3 = "70000002DST", - msub_2 = "70000004ST", - msubu_2 = "70000005ST", - clz_2 = "70000020DS=", - clo_2 = "70000021DS=", - dclz_2 = mips64 and "70000024DS=", - dclo_2 = mips64 and "70000025DS=", - sdbbp_0 = "7000003f", - sdbbp_1 = "7000003fY", - - -- Opcode SPECIAL3. - ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 - dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32 - dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1 - dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1 - zextw_2 = mips64 and "7c00f803TS", - ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1 - dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33 - dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33 - dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1 - wsbh_2 = "7c0000a0DT", - dsbh_2 = mips64 and "7c0000a4DT", - dshd_2 = mips64 and "7c000164DT", - seb_2 = "7c000420DT", - seh_2 = "7c000620DT", - rdhwr_2 = "7c00003bTD", - - -- Opcode COP0. - mfc0_2 = "40000000TD", - mfc0_3 = "40000000TDW", - dmfc0_2 = mips64 and "40200000TD", - dmfc0_3 = mips64 and "40200000TDW", - mtc0_2 = "40800000TD", - mtc0_3 = "40800000TDW", - dmtc0_2 = mips64 and "40a00000TD", - dmtc0_3 = mips64 and "40a00000TDW", - rdpgpr_2 = "41400000DT", - di_0 = "41606000", - di_1 = "41606000T", - ei_0 = "41606020", - ei_1 = "41606020T", - wrpgpr_2 = "41c00000DT", - tlbr_0 = "42000001", - tlbwi_0 = "42000002", - tlbwr_0 = "42000006", - tlbp_0 = "42000008", - eret_0 = "42000018", - deret_0 = "4200001f", - wait_0 = "42000020", - - -- Opcode COP1. - mfc1_2 = "44000000TG", - dmfc1_2 = mips64 and "44200000TG", - cfc1_2 = "44400000TG", - mfhc1_2 = "44600000TG", - mtc1_2 = "44800000TG", - dmtc1_2 = mips64 and "44a00000TG", - ctc1_2 = "44c00000TG", - mthc1_2 = "44e00000TG", - - bc1f_1 = "45000000B", - bc1f_2 = "45000000CB", - bc1t_1 = "45010000B", - bc1t_2 = "45010000CB", - bc1fl_1 = "45020000B", - bc1fl_2 = "45020000CB", - bc1tl_1 = "45030000B", - bc1tl_2 = "45030000CB", - - ["add.s_3"] = "46000000FGH", - ["sub.s_3"] = "46000001FGH", - ["mul.s_3"] = "46000002FGH", - ["div.s_3"] = "46000003FGH", - ["sqrt.s_2"] = "46000004FG", - ["abs.s_2"] = "46000005FG", - ["mov.s_2"] = "46000006FG", - ["neg.s_2"] = "46000007FG", - ["round.l.s_2"] = "46000008FG", - ["trunc.l.s_2"] = "46000009FG", - ["ceil.l.s_2"] = "4600000aFG", - ["floor.l.s_2"] = "4600000bFG", - ["round.w.s_2"] = "4600000cFG", - ["trunc.w.s_2"] = "4600000dFG", - ["ceil.w.s_2"] = "4600000eFG", - ["floor.w.s_2"] = "4600000fFG", - ["movf.s_2"] = "46000011FG", - ["movf.s_3"] = "46000011FGC", - ["movt.s_2"] = "46010011FG", - ["movt.s_3"] = "46010011FGC", - ["movz.s_3"] = "46000012FGT", - ["movn.s_3"] = "46000013FGT", - ["recip.s_2"] = "46000015FG", - ["rsqrt.s_2"] = "46000016FG", - ["cvt.d.s_2"] = "46000021FG", - ["cvt.w.s_2"] = "46000024FG", - ["cvt.l.s_2"] = "46000025FG", - ["cvt.ps.s_3"] = "46000026FGH", - ["c.f.s_2"] = "46000030GH", - ["c.f.s_3"] = "46000030VGH", - ["c.un.s_2"] = "46000031GH", - ["c.un.s_3"] = "46000031VGH", - ["c.eq.s_2"] = "46000032GH", - ["c.eq.s_3"] = "46000032VGH", - ["c.ueq.s_2"] = "46000033GH", - ["c.ueq.s_3"] = "46000033VGH", - ["c.olt.s_2"] = "46000034GH", - ["c.olt.s_3"] = "46000034VGH", - ["c.ult.s_2"] = "46000035GH", - ["c.ult.s_3"] = "46000035VGH", - ["c.ole.s_2"] = "46000036GH", - ["c.ole.s_3"] = "46000036VGH", - ["c.ule.s_2"] = "46000037GH", - ["c.ule.s_3"] = "46000037VGH", - ["c.sf.s_2"] = "46000038GH", - ["c.sf.s_3"] = "46000038VGH", - ["c.ngle.s_2"] = "46000039GH", - ["c.ngle.s_3"] = "46000039VGH", - ["c.seq.s_2"] = "4600003aGH", - ["c.seq.s_3"] = "4600003aVGH", - ["c.ngl.s_2"] = "4600003bGH", - ["c.ngl.s_3"] = "4600003bVGH", - ["c.lt.s_2"] = "4600003cGH", - ["c.lt.s_3"] = "4600003cVGH", - ["c.nge.s_2"] = "4600003dGH", - ["c.nge.s_3"] = "4600003dVGH", - ["c.le.s_2"] = "4600003eGH", - ["c.le.s_3"] = "4600003eVGH", - ["c.ngt.s_2"] = "4600003fGH", - ["c.ngt.s_3"] = "4600003fVGH", - - ["add.d_3"] = "46200000FGH", - ["sub.d_3"] = "46200001FGH", - ["mul.d_3"] = "46200002FGH", - ["div.d_3"] = "46200003FGH", - ["sqrt.d_2"] = "46200004FG", - ["abs.d_2"] = "46200005FG", - ["mov.d_2"] = "46200006FG", - ["neg.d_2"] = "46200007FG", - ["round.l.d_2"] = "46200008FG", - ["trunc.l.d_2"] = "46200009FG", - ["ceil.l.d_2"] = "4620000aFG", - ["floor.l.d_2"] = "4620000bFG", - ["round.w.d_2"] = "4620000cFG", - ["trunc.w.d_2"] = "4620000dFG", - ["ceil.w.d_2"] = "4620000eFG", - ["floor.w.d_2"] = "4620000fFG", - ["movf.d_2"] = "46200011FG", - ["movf.d_3"] = "46200011FGC", - ["movt.d_2"] = "46210011FG", - ["movt.d_3"] = "46210011FGC", - ["movz.d_3"] = "46200012FGT", - ["movn.d_3"] = "46200013FGT", - ["recip.d_2"] = "46200015FG", - ["rsqrt.d_2"] = "46200016FG", - ["cvt.s.d_2"] = "46200020FG", - ["cvt.w.d_2"] = "46200024FG", - ["cvt.l.d_2"] = "46200025FG", - ["c.f.d_2"] = "46200030GH", - ["c.f.d_3"] = "46200030VGH", - ["c.un.d_2"] = "46200031GH", - ["c.un.d_3"] = "46200031VGH", - ["c.eq.d_2"] = "46200032GH", - ["c.eq.d_3"] = "46200032VGH", - ["c.ueq.d_2"] = "46200033GH", - ["c.ueq.d_3"] = "46200033VGH", - ["c.olt.d_2"] = "46200034GH", - ["c.olt.d_3"] = "46200034VGH", - ["c.ult.d_2"] = "46200035GH", - ["c.ult.d_3"] = "46200035VGH", - ["c.ole.d_2"] = "46200036GH", - ["c.ole.d_3"] = "46200036VGH", - ["c.ule.d_2"] = "46200037GH", - ["c.ule.d_3"] = "46200037VGH", - ["c.sf.d_2"] = "46200038GH", - ["c.sf.d_3"] = "46200038VGH", - ["c.ngle.d_2"] = "46200039GH", - ["c.ngle.d_3"] = "46200039VGH", - ["c.seq.d_2"] = "4620003aGH", - ["c.seq.d_3"] = "4620003aVGH", - ["c.ngl.d_2"] = "4620003bGH", - ["c.ngl.d_3"] = "4620003bVGH", - ["c.lt.d_2"] = "4620003cGH", - ["c.lt.d_3"] = "4620003cVGH", - ["c.nge.d_2"] = "4620003dGH", - ["c.nge.d_3"] = "4620003dVGH", - ["c.le.d_2"] = "4620003eGH", - ["c.le.d_3"] = "4620003eVGH", - ["c.ngt.d_2"] = "4620003fGH", - ["c.ngt.d_3"] = "4620003fVGH", - - ["add.ps_3"] = "46c00000FGH", - ["sub.ps_3"] = "46c00001FGH", - ["mul.ps_3"] = "46c00002FGH", - ["abs.ps_2"] = "46c00005FG", - ["mov.ps_2"] = "46c00006FG", - ["neg.ps_2"] = "46c00007FG", - ["movf.ps_2"] = "46c00011FG", - ["movf.ps_3"] = "46c00011FGC", - ["movt.ps_2"] = "46c10011FG", - ["movt.ps_3"] = "46c10011FGC", - ["movz.ps_3"] = "46c00012FGT", - ["movn.ps_3"] = "46c00013FGT", - ["cvt.s.pu_2"] = "46c00020FG", - ["cvt.s.pl_2"] = "46c00028FG", - ["pll.ps_3"] = "46c0002cFGH", - ["plu.ps_3"] = "46c0002dFGH", - ["pul.ps_3"] = "46c0002eFGH", - ["puu.ps_3"] = "46c0002fFGH", - ["c.f.ps_2"] = "46c00030GH", - ["c.f.ps_3"] = "46c00030VGH", - ["c.un.ps_2"] = "46c00031GH", - ["c.un.ps_3"] = "46c00031VGH", - ["c.eq.ps_2"] = "46c00032GH", - ["c.eq.ps_3"] = "46c00032VGH", - ["c.ueq.ps_2"] = "46c00033GH", - ["c.ueq.ps_3"] = "46c00033VGH", - ["c.olt.ps_2"] = "46c00034GH", - ["c.olt.ps_3"] = "46c00034VGH", - ["c.ult.ps_2"] = "46c00035GH", - ["c.ult.ps_3"] = "46c00035VGH", - ["c.ole.ps_2"] = "46c00036GH", - ["c.ole.ps_3"] = "46c00036VGH", - ["c.ule.ps_2"] = "46c00037GH", - ["c.ule.ps_3"] = "46c00037VGH", - ["c.sf.ps_2"] = "46c00038GH", - ["c.sf.ps_3"] = "46c00038VGH", - ["c.ngle.ps_2"] = "46c00039GH", - ["c.ngle.ps_3"] = "46c00039VGH", - ["c.seq.ps_2"] = "46c0003aGH", - ["c.seq.ps_3"] = "46c0003aVGH", - ["c.ngl.ps_2"] = "46c0003bGH", - ["c.ngl.ps_3"] = "46c0003bVGH", - ["c.lt.ps_2"] = "46c0003cGH", - ["c.lt.ps_3"] = "46c0003cVGH", - ["c.nge.ps_2"] = "46c0003dGH", - ["c.nge.ps_3"] = "46c0003dVGH", - ["c.le.ps_2"] = "46c0003eGH", - ["c.le.ps_3"] = "46c0003eVGH", - ["c.ngt.ps_2"] = "46c0003fGH", - ["c.ngt.ps_3"] = "46c0003fVGH", - - ["cvt.s.w_2"] = "46800020FG", - ["cvt.d.w_2"] = "46800021FG", - - ["cvt.s.l_2"] = "46a00020FG", - ["cvt.d.l_2"] = "46a00021FG", - - -- Opcode COP1X. - lwxc1_2 = "4c000000FX", - ldxc1_2 = "4c000001FX", - luxc1_2 = "4c000005FX", - swxc1_2 = "4c000008FX", - sdxc1_2 = "4c000009FX", - suxc1_2 = "4c00000dFX", - prefx_2 = "4c00000fMX", - ["alnv.ps_4"] = "4c00001eFGHS", - ["madd.s_4"] = "4c000020FRGH", - ["madd.d_4"] = "4c000021FRGH", - ["madd.ps_4"] = "4c000026FRGH", - ["msub.s_4"] = "4c000028FRGH", - ["msub.d_4"] = "4c000029FRGH", - ["msub.ps_4"] = "4c00002eFRGH", - ["nmadd.s_4"] = "4c000030FRGH", - ["nmadd.d_4"] = "4c000031FRGH", - ["nmadd.ps_4"] = "4c000036FRGH", - ["nmsub.s_4"] = "4c000038FRGH", - ["nmsub.d_4"] = "4c000039FRGH", - ["nmsub.ps_4"] = "4c00003eFRGH", -} - ------------------------------------------------------------------------------- - -local function parse_gpr(expr) - local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$") - local tp = map_type[tname or expr] - if tp then - local reg = ovreg or tp.reg - if not reg then - werror("type `"..(tname or expr).."' needs a register override") - end - expr = reg - end - local r = match(expr, "^r([1-3]?[0-9])$") - if r then - r = tonumber(r) - if r <= 31 then return r, tp end - end - werror("bad register name `"..expr.."'") -end - -local function parse_fpr(expr) - local r = match(expr, "^f([1-3]?[0-9])$") - if r then - r = tonumber(r) - if r <= 31 then return r end - end - werror("bad register name `"..expr.."'") -end - -local function parse_imm(imm, bits, shift, scale, signed, action) - local n = tonumber(imm) - if n then - local m = sar(n, scale) - if shl(m, scale) == n then - if signed then - local s = sar(m, bits-1) - if s == 0 then return shl(m, shift) - elseif s == -1 then return shl(m + shl(1, bits), shift) end - else - if sar(m, bits) == 0 then return shl(m, shift) end - end - end - werror("out of range immediate `"..imm.."'") - elseif match(imm, "^[rf]([1-3]?[0-9])$") or - match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then - werror("expected immediate operand, got register") - else - waction(action or "IMM", - (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm) - return 0 - end -end - -local function parse_disp(disp) - local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") - if imm then - local r = shl(parse_gpr(reg), 21) - local extname = match(imm, "^extern%s+(%S+)$") - if extname then - waction("REL_EXT", map_extern[extname], nil, 1) - return r - else - return r + parse_imm(imm, 16, 0, 0, true) - end - end - local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") - if reg and tailr ~= "" then - local r, tp = parse_gpr(reg) - if tp then - waction("IMM", 32768+16*32, format(tp.ctypefmt, tailr)) - return shl(r, 21) - end - end - werror("bad displacement `"..disp.."'") -end - -local function parse_index(idx) - local rt, rs = match(idx, "^(.*)%(([%w_:]+)%)$") - if rt then - rt = parse_gpr(rt) - rs = parse_gpr(rs) - return shl(rt, 16) + shl(rs, 21) - end - werror("bad index `"..idx.."'") -end - -local function parse_label(label, def) - local prefix = sub(label, 1, 2) - -- =>label (pc label reference) - if prefix == "=>" then - return "PC", 0, sub(label, 3) - end - -- ->name (global label reference) - if prefix == "->" then - return "LG", map_global[sub(label, 3)] - end - if def then - -- [1-9] (local label definition) - if match(label, "^[1-9]$") then - return "LG", 10+tonumber(label) - end - else - -- [<>][1-9] (local label reference) - local dir, lnum = match(label, "^([<>])([1-9])$") - if dir then -- Fwd: 1-9, Bkwd: 11-19. - return "LG", lnum + (dir == ">" and 0 or 10) - end - -- extern label (extern label reference) - local extname = match(label, "^extern%s+(%S+)$") - if extname then - return "EXT", map_extern[extname] - end - end - werror("bad label `"..label.."'") -end - ------------------------------------------------------------------------------- - --- Handle opcodes defined with template strings. -map_op[".template__"] = function(params, template, nparams) - if not params then return sub(template, 9) end - local op = tonumber(sub(template, 1, 8), 16) - local n = 1 - - -- Limit number of section buffer positions used by a single dasm_put(). - -- A single opcode needs a maximum of 2 positions (ins/ext). - if secpos+2 > maxsecpos then wflush() end - local pos = wpos() - - -- Process each character. - for p in gmatch(sub(template, 9), ".") do - if p == "D" then - op = op + shl(parse_gpr(params[n]), 11); n = n + 1 - elseif p == "T" then - op = op + shl(parse_gpr(params[n]), 16); n = n + 1 - elseif p == "S" then - op = op + shl(parse_gpr(params[n]), 21); n = n + 1 - elseif p == "F" then - op = op + shl(parse_fpr(params[n]), 6); n = n + 1 - elseif p == "G" then - op = op + shl(parse_fpr(params[n]), 11); n = n + 1 - elseif p == "H" then - op = op + shl(parse_fpr(params[n]), 16); n = n + 1 - elseif p == "R" then - op = op + shl(parse_fpr(params[n]), 21); n = n + 1 - elseif p == "I" then - op = op + parse_imm(params[n], 16, 0, 0, true); n = n + 1 - elseif p == "U" then - op = op + parse_imm(params[n], 16, 0, 0, false); n = n + 1 - elseif p == "O" then - op = op + parse_disp(params[n]); n = n + 1 - elseif p == "X" then - op = op + parse_index(params[n]); n = n + 1 - elseif p == "B" or p == "J" then - local mode, n, s = parse_label(params[n], false) - if p == "B" then n = n + 2048 end - waction("REL_"..mode, n, s, 1) - n = n + 1 - elseif p == "A" then - op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 - elseif p == "a" then - local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1 - op = op + band(m, 0x7c0) + band(shr(m, 9), 4) - elseif p == "M" then - op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1 - elseif p == "N" then - op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1 - elseif p == "C" then - op = op + parse_imm(params[n], 3, 18, 0, false); n = n + 1 - elseif p == "V" then - op = op + parse_imm(params[n], 3, 8, 0, false); n = n + 1 - elseif p == "W" then - op = op + parse_imm(params[n], 3, 0, 0, false); n = n + 1 - elseif p == "Y" then - op = op + parse_imm(params[n], 20, 6, 0, false); n = n + 1 - elseif p == "Z" then - op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1 - elseif p == "=" then - op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo. - else - assert(false) - end - end - wputpos(pos, op) -end - ------------------------------------------------------------------------------- - --- Pseudo-opcode to mark the position where the action list is to be emitted. -map_op[".actionlist_1"] = function(params) - if not params then return "cvar" end - local name = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeactions(out, name) end) -end - --- Pseudo-opcode to mark the position where the global enum is to be emitted. -map_op[".globals_1"] = function(params) - if not params then return "prefix" end - local prefix = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeglobals(out, prefix) end) -end - --- Pseudo-opcode to mark the position where the global names are to be emitted. -map_op[".globalnames_1"] = function(params) - if not params then return "cvar" end - local name = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeglobalnames(out, name) end) -end - --- Pseudo-opcode to mark the position where the extern names are to be emitted. -map_op[".externnames_1"] = function(params) - if not params then return "cvar" end - local name = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeexternnames(out, name) end) -end - ------------------------------------------------------------------------------- - --- Label pseudo-opcode (converted from trailing colon form). -map_op[".label_1"] = function(params) - if not params then return "[1-9] | ->global | =>pcexpr" end - if secpos+1 > maxsecpos then wflush() end - local mode, n, s = parse_label(params[1], true) - if mode == "EXT" then werror("bad label definition") end - waction("LABEL_"..mode, n, s, 1) -end - ------------------------------------------------------------------------------- - --- Pseudo-opcodes for data storage. -map_op[".long_*"] = function(params) - if not params then return "imm..." end - for _,p in ipairs(params) do - local n = tonumber(p) - if not n then werror("bad immediate `"..p.."'") end - if n < 0 then n = n + 2^32 end - wputw(n) - if secpos+2 > maxsecpos then wflush() end - end -end - --- Alignment pseudo-opcode. -map_op[".align_1"] = function(params) - if not params then return "numpow2" end - if secpos+1 > maxsecpos then wflush() end - local align = tonumber(params[1]) - if align then - local x = align - -- Must be a power of 2 in the range (2 ... 256). - for i=1,8 do - x = x / 2 - if x == 1 then - waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. - return - end - end - end - werror("bad alignment") -end - ------------------------------------------------------------------------------- - --- Pseudo-opcode for (primitive) type definitions (map to C types). -map_op[".type_3"] = function(params, nparams) - if not params then - return nparams == 2 and "name, ctype" or "name, ctype, reg" - end - local name, ctype, reg = params[1], params[2], params[3] - if not match(name, "^[%a_][%w_]*$") then - werror("bad type name `"..name.."'") - end - local tp = map_type[name] - if tp then - werror("duplicate type `"..name.."'") - end - -- Add #type to defines. A bit unclean to put it in map_archdef. - map_archdef["#"..name] = "sizeof("..ctype..")" - -- Add new type and emit shortcut define. - local num = ctypenum + 1 - map_type[name] = { - ctype = ctype, - ctypefmt = format("Dt%X(%%s)", num), - reg = reg, - } - wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) - ctypenum = num -end -map_op[".type_2"] = map_op[".type_3"] - --- Dump type definitions. -local function dumptypes(out, lvl) - local t = {} - for name in pairs(map_type) do t[#t+1] = name end - sort(t) - out:write("Type definitions:\n") - for _,name in ipairs(t) do - local tp = map_type[name] - local reg = tp.reg or "" - out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) - end - out:write("\n") -end - ------------------------------------------------------------------------------- - --- Set the current section. -function _M.section(num) - waction("SECTION", num) - wflush(true) -- SECTION is a terminal action. -end - ------------------------------------------------------------------------------- - --- Dump architecture description. -function _M.dumparch(out) - out:write(format("DynASM %s version %s, released %s\n\n", - _info.arch, _info.version, _info.release)) - dumpactions(out) -end - --- Dump all user defined elements. -function _M.dumpdef(out, lvl) - dumptypes(out, lvl) - dumpglobals(out, lvl) - dumpexterns(out, lvl) -end - ------------------------------------------------------------------------------- - --- Pass callbacks from/to the DynASM core. -function _M.passcb(wl, we, wf, ww) - wline, werror, wfatal, wwarn = wl, we, wf, ww - return wflush -end - --- Setup the arch-specific module. -function _M.setup(arch, opt) - g_arch, g_opt = arch, opt -end - --- Merge the core maps and the arch-specific maps. -function _M.mergemaps(map_coreop, map_def) - setmetatable(map_op, { __index = map_coreop }) - setmetatable(map_def, { __index = map_archdef }) - return map_op, map_def -end - -return _M - ------------------------------------------------------------------------------- - diff --git a/dynasm/dasm_mips64.lua b/dynasm/dasm_mips64.lua deleted file mode 100644 index 5636b23a6e..0000000000 --- a/dynasm/dasm_mips64.lua +++ /dev/null @@ -1,12 +0,0 @@ ------------------------------------------------------------------------------- --- DynASM MIPS64 module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------- --- This module just sets 64 bit mode for the combined MIPS/MIPS64 module. --- All the interesting stuff is there. ------------------------------------------------------------------------------- - -mips64 = true -- Using a global is an ugly, but effective solution. -return require("dasm_mips") diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h deleted file mode 100644 index 3a7ee9b0e9..0000000000 --- a/dynasm/dasm_ppc.h +++ /dev/null @@ -1,419 +0,0 @@ -/* -** DynASM PPC/PPC64 encoding engine. -** Copyright (C) 2005-2017 Mike Pall. All rights reserved. -** Released under the MIT license. See dynasm.lua for full copyright notice. -*/ - -#include -#include -#include -#include - -#define DASM_ARCH "ppc" - -#ifndef DASM_EXTERN -#define DASM_EXTERN(a,b,c,d) 0 -#endif - -/* Action definitions. */ -enum { - DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, - /* The following actions need a buffer position. */ - DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, - /* The following actions also have an argument. */ - DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH, - DASM__MAX -}; - -/* Maximum number of section buffer positions for a single dasm_put() call. */ -#define DASM_MAXSECPOS 25 - -/* DynASM encoder status codes. Action list offset or number are or'ed in. */ -#define DASM_S_OK 0x00000000 -#define DASM_S_NOMEM 0x01000000 -#define DASM_S_PHASE 0x02000000 -#define DASM_S_MATCH_SEC 0x03000000 -#define DASM_S_RANGE_I 0x11000000 -#define DASM_S_RANGE_SEC 0x12000000 -#define DASM_S_RANGE_LG 0x13000000 -#define DASM_S_RANGE_PC 0x14000000 -#define DASM_S_RANGE_REL 0x15000000 -#define DASM_S_UNDEF_LG 0x21000000 -#define DASM_S_UNDEF_PC 0x22000000 - -/* Macros to convert positions (8 bit section + 24 bit index). */ -#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) -#define DASM_POS2BIAS(pos) ((pos)&0xff000000) -#define DASM_SEC2POS(sec) ((sec)<<24) -#define DASM_POS2SEC(pos) ((pos)>>24) -#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) - -/* Action list type. */ -typedef const unsigned int *dasm_ActList; - -/* Per-section structure. */ -typedef struct dasm_Section { - int *rbuf; /* Biased buffer pointer (negative section bias). */ - int *buf; /* True buffer pointer. */ - size_t bsize; /* Buffer size in bytes. */ - int pos; /* Biased buffer position. */ - int epos; /* End of biased buffer position - max single put. */ - int ofs; /* Byte offset into section. */ -} dasm_Section; - -/* Core structure holding the DynASM encoding state. */ -struct dasm_State { - size_t psize; /* Allocated size of this structure. */ - dasm_ActList actionlist; /* Current actionlist pointer. */ - int *lglabels; /* Local/global chain/pos ptrs. */ - size_t lgsize; - int *pclabels; /* PC label chains/pos ptrs. */ - size_t pcsize; - void **globals; /* Array of globals (bias -10). */ - dasm_Section *section; /* Pointer to active section. */ - size_t codesize; /* Total size of all code sections. */ - int maxsection; /* 0 <= sectionidx < maxsection. */ - int status; /* Status code. */ - dasm_Section sections[1]; /* All sections. Alloc-extended. */ -}; - -/* The size of the core structure depends on the max. number of sections. */ -#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) - - -/* Initialize DynASM state. */ -void dasm_init(Dst_DECL, int maxsection) -{ - dasm_State *D; - size_t psz = 0; - int i; - Dst_REF = NULL; - DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); - D = Dst_REF; - D->psize = psz; - D->lglabels = NULL; - D->lgsize = 0; - D->pclabels = NULL; - D->pcsize = 0; - D->globals = NULL; - D->maxsection = maxsection; - for (i = 0; i < maxsection; i++) { - D->sections[i].buf = NULL; /* Need this for pass3. */ - D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); - D->sections[i].bsize = 0; - D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ - } -} - -/* Free DynASM state. */ -void dasm_free(Dst_DECL) -{ - dasm_State *D = Dst_REF; - int i; - for (i = 0; i < D->maxsection; i++) - if (D->sections[i].buf) - DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); - if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); - if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); - DASM_M_FREE(Dst, D, D->psize); -} - -/* Setup global label array. Must be called before dasm_setup(). */ -void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) -{ - dasm_State *D = Dst_REF; - D->globals = gl - 10; /* Negative bias to compensate for locals. */ - DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); -} - -/* Grow PC label array. Can be called after dasm_setup(), too. */ -void dasm_growpc(Dst_DECL, unsigned int maxpc) -{ - dasm_State *D = Dst_REF; - size_t osz = D->pcsize; - DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); - memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); -} - -/* Setup encoder. */ -void dasm_setup(Dst_DECL, const void *actionlist) -{ - dasm_State *D = Dst_REF; - int i; - D->actionlist = (dasm_ActList)actionlist; - D->status = DASM_S_OK; - D->section = &D->sections[0]; - memset((void *)D->lglabels, 0, D->lgsize); - if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); - for (i = 0; i < D->maxsection; i++) { - D->sections[i].pos = DASM_SEC2POS(i); - D->sections[i].ofs = 0; - } -} - - -#ifdef DASM_CHECKS -#define CK(x, st) \ - do { if (!(x)) { \ - D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) -#define CKPL(kind, st) \ - do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ - D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) -#else -#define CK(x, st) ((void)0) -#define CKPL(kind, st) ((void)0) -#endif - -/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ -void dasm_put(Dst_DECL, int start, ...) -{ - va_list ap; - dasm_State *D = Dst_REF; - dasm_ActList p = D->actionlist + start; - dasm_Section *sec = D->section; - int pos = sec->pos, ofs = sec->ofs; - int *b; - - if (pos >= sec->epos) { - DASM_M_GROW(Dst, int, sec->buf, sec->bsize, - sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); - sec->rbuf = sec->buf - DASM_POS2BIAS(pos); - sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); - } - - b = sec->rbuf; - b[pos++] = start; - - va_start(ap, start); - while (1) { - unsigned int ins = *p++; - unsigned int action = (ins >> 16); - if (action >= DASM__MAX) { - ofs += 4; - } else { - int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; - switch (action) { - case DASM_STOP: goto stop; - case DASM_SECTION: - n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); - D->section = &D->sections[n]; goto stop; - case DASM_ESC: p++; ofs += 4; break; - case DASM_REL_EXT: break; - case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; - case DASM_REL_LG: - n = (ins & 2047) - 10; pl = D->lglabels + n; - /* Bkwd rel or global. */ - if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } - pl += 10; n = *pl; - if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ - goto linkrel; - case DASM_REL_PC: - pl = D->pclabels + n; CKPL(pc, PC); - putrel: - n = *pl; - if (n < 0) { /* Label exists. Get label pos and store it. */ - b[pos] = -n; - } else { - linkrel: - b[pos] = n; /* Else link to rel chain, anchored at label. */ - *pl = pos; - } - pos++; - break; - case DASM_LABEL_LG: - pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; - case DASM_LABEL_PC: - pl = D->pclabels + n; CKPL(pc, PC); - putlabel: - n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ - while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; - } - *pl = -pos; /* Label exists now. */ - b[pos++] = ofs; /* Store pass1 offset estimate. */ - break; - case DASM_IMM: -#ifdef DASM_CHECKS - CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); -#endif - n >>= ((ins>>10)&31); -#ifdef DASM_CHECKS - if (ins & 0x8000) - CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); - else - CK((n>>((ins>>5)&31)) == 0, RANGE_I); -#endif - b[pos++] = n; - break; - case DASM_IMMSH: - CK((n >> 6) == 0, RANGE_I); - b[pos++] = n; - break; - } - } - } -stop: - va_end(ap); - sec->pos = pos; - sec->ofs = ofs; -} -#undef CK - -/* Pass 2: Link sections, shrink aligns, fix label offsets. */ -int dasm_link(Dst_DECL, size_t *szp) -{ - dasm_State *D = Dst_REF; - int secnum; - int ofs = 0; - -#ifdef DASM_CHECKS - *szp = 0; - if (D->status != DASM_S_OK) return D->status; - { - int pc; - for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) - if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; - } -#endif - - { /* Handle globals not defined in this translation unit. */ - int idx; - for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { - int n = D->lglabels[idx]; - /* Undefined label: Collapse rel chain and replace with marker (< 0). */ - while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } - } - } - - /* Combine all code sections. No support for data sections (yet). */ - for (secnum = 0; secnum < D->maxsection; secnum++) { - dasm_Section *sec = D->sections + secnum; - int *b = sec->rbuf; - int pos = DASM_SEC2POS(secnum); - int lastpos = sec->pos; - - while (pos != lastpos) { - dasm_ActList p = D->actionlist + b[pos++]; - while (1) { - unsigned int ins = *p++; - unsigned int action = (ins >> 16); - switch (action) { - case DASM_STOP: case DASM_SECTION: goto stop; - case DASM_ESC: p++; break; - case DASM_REL_EXT: break; - case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; - case DASM_REL_LG: case DASM_REL_PC: pos++; break; - case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; - case DASM_IMM: case DASM_IMMSH: pos++; break; - } - } - stop: (void)0; - } - ofs += sec->ofs; /* Next section starts right after current section. */ - } - - D->codesize = ofs; /* Total size of all code sections */ - *szp = ofs; - return DASM_S_OK; -} - -#ifdef DASM_CHECKS -#define CK(x, st) \ - do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) -#else -#define CK(x, st) ((void)0) -#endif - -/* Pass 3: Encode sections. */ -int dasm_encode(Dst_DECL, void *buffer) -{ - dasm_State *D = Dst_REF; - char *base = (char *)buffer; - unsigned int *cp = (unsigned int *)buffer; - int secnum; - - /* Encode all code sections. No support for data sections (yet). */ - for (secnum = 0; secnum < D->maxsection; secnum++) { - dasm_Section *sec = D->sections + secnum; - int *b = sec->buf; - int *endb = sec->rbuf + sec->pos; - - while (b != endb) { - dasm_ActList p = D->actionlist + *b++; - while (1) { - unsigned int ins = *p++; - unsigned int action = (ins >> 16); - int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; - switch (action) { - case DASM_STOP: case DASM_SECTION: goto stop; - case DASM_ESC: *cp++ = *p++; break; - case DASM_REL_EXT: - n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4; - goto patchrel; - case DASM_ALIGN: - ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; - break; - case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); - case DASM_REL_PC: - CK(n >= 0, UNDEF_PC); - n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); - patchrel: - CK((n & 3) == 0 && - (((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >> - ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL); - cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc)); - break; - case DASM_LABEL_LG: - ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); - break; - case DASM_LABEL_PC: break; - case DASM_IMM: - cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); - break; - case DASM_IMMSH: - cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32); - break; - default: *cp++ = ins; break; - } - } - stop: (void)0; - } - } - - if (base + D->codesize != (char *)cp) /* Check for phase errors. */ - return DASM_S_PHASE; - return DASM_S_OK; -} -#undef CK - -/* Get PC label offset. */ -int dasm_getpclabel(Dst_DECL, unsigned int pc) -{ - dasm_State *D = Dst_REF; - if (pc*sizeof(int) < D->pcsize) { - int pos = D->pclabels[pc]; - if (pos < 0) return *DASM_POS2PTR(D, -pos); - if (pos > 0) return -1; /* Undefined. */ - } - return -2; /* Unused or out of range. */ -} - -#ifdef DASM_CHECKS -/* Optional sanity checker to call between isolated encoding steps. */ -int dasm_checkstep(Dst_DECL, int secmatch) -{ - dasm_State *D = Dst_REF; - if (D->status == DASM_S_OK) { - int i; - for (i = 1; i <= 9; i++) { - if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } - D->lglabels[i] = 0; - } - } - if (D->status == DASM_S_OK && secmatch >= 0 && - D->section != &D->sections[secmatch]) - D->status = DASM_S_MATCH_SEC|(D->section-D->sections); - return D->status; -} -#endif - diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua deleted file mode 100644 index f73974dd7f..0000000000 --- a/dynasm/dasm_ppc.lua +++ /dev/null @@ -1,1919 +0,0 @@ ------------------------------------------------------------------------------- --- DynASM PPC/PPC64 module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- See dynasm.lua for full copyright notice. --- --- Support for various extensions contributed by Caio Souza Oliveira. ------------------------------------------------------------------------------- - --- Module information: -local _info = { - arch = "ppc", - description = "DynASM PPC module", - version = "1.4.0", - vernum = 10400, - release = "2015-10-18", - author = "Mike Pall", - license = "MIT", -} - --- Exported glue functions for the arch-specific module. -local _M = { _info = _info } - --- Cache library functions. -local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs -local assert, setmetatable = assert, setmetatable -local _s = string -local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char -local match, gmatch = _s.match, _s.gmatch -local concat, sort = table.concat, table.sort -local bit = bit or require("bit") -local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift -local tohex = bit.tohex - --- Inherited tables and callbacks. -local g_opt, g_arch -local wline, werror, wfatal, wwarn - --- Action name list. --- CHECK: Keep this in sync with the C code! -local action_names = { - "STOP", "SECTION", "ESC", "REL_EXT", - "ALIGN", "REL_LG", "LABEL_LG", - "REL_PC", "LABEL_PC", "IMM", "IMMSH" -} - --- Maximum number of section buffer positions for dasm_put(). --- CHECK: Keep this in sync with the C code! -local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. - --- Action name -> action number. -local map_action = {} -for n,name in ipairs(action_names) do - map_action[name] = n-1 -end - --- Action list buffer. -local actlist = {} - --- Argument list for next dasm_put(). Start with offset 0 into action list. -local actargs = { 0 } - --- Current number of section buffer positions for dasm_put(). -local secpos = 1 - ------------------------------------------------------------------------------- - --- Dump action names and numbers. -local function dumpactions(out) - out:write("DynASM encoding engine action codes:\n") - for n,name in ipairs(action_names) do - local num = map_action[name] - out:write(format(" %-10s %02X %d\n", name, num, num)) - end - out:write("\n") -end - --- Write action list buffer as a huge static C array. -local function writeactions(out, name) - local nn = #actlist - if nn == 0 then nn = 1; actlist[0] = map_action.STOP end - out:write("static const unsigned int ", name, "[", nn, "] = {\n") - for i = 1,nn-1 do - assert(out:write("0x", tohex(actlist[i]), ",\n")) - end - assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) -end - ------------------------------------------------------------------------------- - --- Add word to action list. -local function wputxw(n) - assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") - actlist[#actlist+1] = n -end - --- Add action to list with optional arg. Advance buffer pos, too. -local function waction(action, val, a, num) - local w = assert(map_action[action], "bad action name `"..action.."'") - wputxw(w * 0x10000 + (val or 0)) - if a then actargs[#actargs+1] = a end - if a or num then secpos = secpos + (num or 1) end -end - --- Flush action list (intervening C code or buffer pos overflow). -local function wflush(term) - if #actlist == actargs[1] then return end -- Nothing to flush. - if not term then waction("STOP") end -- Terminate action list. - wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) - actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). - secpos = 1 -- The actionlist offset occupies a buffer position, too. -end - --- Put escaped word. -local function wputw(n) - if n <= 0xffffff then waction("ESC") end - wputxw(n) -end - --- Reserve position for word. -local function wpos() - local pos = #actlist+1 - actlist[pos] = "" - return pos -end - --- Store word to reserved position. -local function wputpos(pos, n) - assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") - actlist[pos] = n -end - ------------------------------------------------------------------------------- - --- Global label name -> global label number. With auto assignment on 1st use. -local next_global = 20 -local map_global = setmetatable({}, { __index = function(t, name) - if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end - local n = next_global - if n > 2047 then werror("too many global labels") end - next_global = n + 1 - t[name] = n - return n -end}) - --- Dump global labels. -local function dumpglobals(out, lvl) - local t = {} - for name, n in pairs(map_global) do t[n] = name end - out:write("Global labels:\n") - for i=20,next_global-1 do - out:write(format(" %s\n", t[i])) - end - out:write("\n") -end - --- Write global label enum. -local function writeglobals(out, prefix) - local t = {} - for name, n in pairs(map_global) do t[n] = name end - out:write("enum {\n") - for i=20,next_global-1 do - out:write(" ", prefix, t[i], ",\n") - end - out:write(" ", prefix, "_MAX\n};\n") -end - --- Write global label names. -local function writeglobalnames(out, name) - local t = {} - for name, n in pairs(map_global) do t[n] = name end - out:write("static const char *const ", name, "[] = {\n") - for i=20,next_global-1 do - out:write(" \"", t[i], "\",\n") - end - out:write(" (const char *)0\n};\n") -end - ------------------------------------------------------------------------------- - --- Extern label name -> extern label number. With auto assignment on 1st use. -local next_extern = 0 -local map_extern_ = {} -local map_extern = setmetatable({}, { __index = function(t, name) - -- No restrictions on the name for now. - local n = next_extern - if n > 2047 then werror("too many extern labels") end - next_extern = n + 1 - t[name] = n - map_extern_[n] = name - return n -end}) - --- Dump extern labels. -local function dumpexterns(out, lvl) - out:write("Extern labels:\n") - for i=0,next_extern-1 do - out:write(format(" %s\n", map_extern_[i])) - end - out:write("\n") -end - --- Write extern label names. -local function writeexternnames(out, name) - out:write("static const char *const ", name, "[] = {\n") - for i=0,next_extern-1 do - out:write(" \"", map_extern_[i], "\",\n") - end - out:write(" (const char *)0\n};\n") -end - ------------------------------------------------------------------------------- - --- Arch-specific maps. -local map_archdef = { sp = "r1" } -- Ext. register name -> int. name. - -local map_type = {} -- Type name -> { ctype, reg } -local ctypenum = 0 -- Type number (for Dt... macros). - --- Reverse defines for registers. -function _M.revdef(s) - if s == "r1" then return "sp" end - return s -end - -local map_cond = { - lt = 0, gt = 1, eq = 2, so = 3, - ge = 4, le = 5, ne = 6, ns = 7, -} - ------------------------------------------------------------------------------- - -local map_op, op_template - -local function op_alias(opname, f) - return function(params, nparams) - if not params then return "-> "..opname:sub(1, -3) end - f(params, nparams) - op_template(params, map_op[opname], nparams) - end -end - --- Template strings for PPC instructions. -map_op = { - tdi_3 = "08000000ARI", - twi_3 = "0c000000ARI", - mulli_3 = "1c000000RRI", - subfic_3 = "20000000RRI", - cmplwi_3 = "28000000XRU", - cmplwi_2 = "28000000-RU", - cmpldi_3 = "28200000XRU", - cmpldi_2 = "28200000-RU", - cmpwi_3 = "2c000000XRI", - cmpwi_2 = "2c000000-RI", - cmpdi_3 = "2c200000XRI", - cmpdi_2 = "2c200000-RI", - addic_3 = "30000000RRI", - ["addic._3"] = "34000000RRI", - addi_3 = "38000000RR0I", - li_2 = "38000000RI", - la_2 = "38000000RD", - addis_3 = "3c000000RR0I", - lis_2 = "3c000000RI", - lus_2 = "3c000000RU", - bc_3 = "40000000AAK", - bcl_3 = "40000001AAK", - bdnz_1 = "42000000K", - bdz_1 = "42400000K", - sc_0 = "44000000", - b_1 = "48000000J", - bl_1 = "48000001J", - rlwimi_5 = "50000000RR~AAA.", - rlwinm_5 = "54000000RR~AAA.", - rlwnm_5 = "5c000000RR~RAA.", - ori_3 = "60000000RR~U", - nop_0 = "60000000", - oris_3 = "64000000RR~U", - xori_3 = "68000000RR~U", - xoris_3 = "6c000000RR~U", - ["andi._3"] = "70000000RR~U", - ["andis._3"] = "74000000RR~U", - lwz_2 = "80000000RD", - lwzu_2 = "84000000RD", - lbz_2 = "88000000RD", - lbzu_2 = "8c000000RD", - stw_2 = "90000000RD", - stwu_2 = "94000000RD", - stb_2 = "98000000RD", - stbu_2 = "9c000000RD", - lhz_2 = "a0000000RD", - lhzu_2 = "a4000000RD", - lha_2 = "a8000000RD", - lhau_2 = "ac000000RD", - sth_2 = "b0000000RD", - sthu_2 = "b4000000RD", - lmw_2 = "b8000000RD", - stmw_2 = "bc000000RD", - lfs_2 = "c0000000FD", - lfsu_2 = "c4000000FD", - lfd_2 = "c8000000FD", - lfdu_2 = "cc000000FD", - stfs_2 = "d0000000FD", - stfsu_2 = "d4000000FD", - stfd_2 = "d8000000FD", - stfdu_2 = "dc000000FD", - ld_2 = "e8000000RD", -- NYI: displacement must be divisible by 4. - ldu_2 = "e8000001RD", - lwa_2 = "e8000002RD", - std_2 = "f8000000RD", - stdu_2 = "f8000001RD", - - subi_3 = op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end), - subis_3 = op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end), - subic_3 = op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end), - ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end), - - rotlwi_3 = op_alias("rlwinm_5", function(p) - p[4] = "0"; p[5] = "31" - end), - rotrwi_3 = op_alias("rlwinm_5", function(p) - p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31" - end), - rotlw_3 = op_alias("rlwnm_5", function(p) - p[4] = "0"; p[5] = "31" - end), - slwi_3 = op_alias("rlwinm_5", function(p) - p[5] = "31-("..p[3]..")"; p[4] = "0" - end), - srwi_3 = op_alias("rlwinm_5", function(p) - p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31" - end), - clrlwi_3 = op_alias("rlwinm_5", function(p) - p[4] = p[3]; p[3] = "0"; p[5] = "31" - end), - clrrwi_3 = op_alias("rlwinm_5", function(p) - p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0" - end), - - -- Primary opcode 4: - mulhhwu_3 = "10000010RRR.", - machhwu_3 = "10000018RRR.", - mulhhw_3 = "10000050RRR.", - nmachhw_3 = "1000005cRRR.", - machhwsu_3 = "10000098RRR.", - machhws_3 = "100000d8RRR.", - nmachhws_3 = "100000dcRRR.", - mulchwu_3 = "10000110RRR.", - macchwu_3 = "10000118RRR.", - mulchw_3 = "10000150RRR.", - macchw_3 = "10000158RRR.", - nmacchw_3 = "1000015cRRR.", - macchwsu_3 = "10000198RRR.", - macchws_3 = "100001d8RRR.", - nmacchws_3 = "100001dcRRR.", - mullhw_3 = "10000350RRR.", - maclhw_3 = "10000358RRR.", - nmaclhw_3 = "1000035cRRR.", - maclhwsu_3 = "10000398RRR.", - maclhws_3 = "100003d8RRR.", - nmaclhws_3 = "100003dcRRR.", - machhwuo_3 = "10000418RRR.", - nmachhwo_3 = "1000045cRRR.", - machhwsuo_3 = "10000498RRR.", - machhwso_3 = "100004d8RRR.", - nmachhwso_3 = "100004dcRRR.", - macchwuo_3 = "10000518RRR.", - macchwo_3 = "10000558RRR.", - nmacchwo_3 = "1000055cRRR.", - macchwsuo_3 = "10000598RRR.", - macchwso_3 = "100005d8RRR.", - nmacchwso_3 = "100005dcRRR.", - maclhwo_3 = "10000758RRR.", - nmaclhwo_3 = "1000075cRRR.", - maclhwsuo_3 = "10000798RRR.", - maclhwso_3 = "100007d8RRR.", - nmaclhwso_3 = "100007dcRRR.", - - vaddubm_3 = "10000000VVV", - vmaxub_3 = "10000002VVV", - vrlb_3 = "10000004VVV", - vcmpequb_3 = "10000006VVV", - vmuloub_3 = "10000008VVV", - vaddfp_3 = "1000000aVVV", - vmrghb_3 = "1000000cVVV", - vpkuhum_3 = "1000000eVVV", - vmhaddshs_4 = "10000020VVVV", - vmhraddshs_4 = "10000021VVVV", - vmladduhm_4 = "10000022VVVV", - vmsumubm_4 = "10000024VVVV", - vmsummbm_4 = "10000025VVVV", - vmsumuhm_4 = "10000026VVVV", - vmsumuhs_4 = "10000027VVVV", - vmsumshm_4 = "10000028VVVV", - vmsumshs_4 = "10000029VVVV", - vsel_4 = "1000002aVVVV", - vperm_4 = "1000002bVVVV", - vsldoi_4 = "1000002cVVVP", - vpermxor_4 = "1000002dVVVV", - vmaddfp_4 = "1000002eVVVV~", - vnmsubfp_4 = "1000002fVVVV~", - vaddeuqm_4 = "1000003cVVVV", - vaddecuq_4 = "1000003dVVVV", - vsubeuqm_4 = "1000003eVVVV", - vsubecuq_4 = "1000003fVVVV", - vadduhm_3 = "10000040VVV", - vmaxuh_3 = "10000042VVV", - vrlh_3 = "10000044VVV", - vcmpequh_3 = "10000046VVV", - vmulouh_3 = "10000048VVV", - vsubfp_3 = "1000004aVVV", - vmrghh_3 = "1000004cVVV", - vpkuwum_3 = "1000004eVVV", - vadduwm_3 = "10000080VVV", - vmaxuw_3 = "10000082VVV", - vrlw_3 = "10000084VVV", - vcmpequw_3 = "10000086VVV", - vmulouw_3 = "10000088VVV", - vmuluwm_3 = "10000089VVV", - vmrghw_3 = "1000008cVVV", - vpkuhus_3 = "1000008eVVV", - vaddudm_3 = "100000c0VVV", - vmaxud_3 = "100000c2VVV", - vrld_3 = "100000c4VVV", - vcmpeqfp_3 = "100000c6VVV", - vcmpequd_3 = "100000c7VVV", - vpkuwus_3 = "100000ceVVV", - vadduqm_3 = "10000100VVV", - vmaxsb_3 = "10000102VVV", - vslb_3 = "10000104VVV", - vmulosb_3 = "10000108VVV", - vrefp_2 = "1000010aV-V", - vmrglb_3 = "1000010cVVV", - vpkshus_3 = "1000010eVVV", - vaddcuq_3 = "10000140VVV", - vmaxsh_3 = "10000142VVV", - vslh_3 = "10000144VVV", - vmulosh_3 = "10000148VVV", - vrsqrtefp_2 = "1000014aV-V", - vmrglh_3 = "1000014cVVV", - vpkswus_3 = "1000014eVVV", - vaddcuw_3 = "10000180VVV", - vmaxsw_3 = "10000182VVV", - vslw_3 = "10000184VVV", - vmulosw_3 = "10000188VVV", - vexptefp_2 = "1000018aV-V", - vmrglw_3 = "1000018cVVV", - vpkshss_3 = "1000018eVVV", - vmaxsd_3 = "100001c2VVV", - vsl_3 = "100001c4VVV", - vcmpgefp_3 = "100001c6VVV", - vlogefp_2 = "100001caV-V", - vpkswss_3 = "100001ceVVV", - vadduhs_3 = "10000240VVV", - vminuh_3 = "10000242VVV", - vsrh_3 = "10000244VVV", - vcmpgtuh_3 = "10000246VVV", - vmuleuh_3 = "10000248VVV", - vrfiz_2 = "1000024aV-V", - vsplth_3 = "1000024cVV3", - vupkhsh_2 = "1000024eV-V", - vminuw_3 = "10000282VVV", - vminud_3 = "100002c2VVV", - vcmpgtud_3 = "100002c7VVV", - vrfim_2 = "100002caV-V", - vcmpgtsb_3 = "10000306VVV", - vcfux_3 = "1000030aVVA~", - vaddshs_3 = "10000340VVV", - vminsh_3 = "10000342VVV", - vsrah_3 = "10000344VVV", - vcmpgtsh_3 = "10000346VVV", - vmulesh_3 = "10000348VVV", - vcfsx_3 = "1000034aVVA~", - vspltish_2 = "1000034cVS", - vupkhpx_2 = "1000034eV-V", - vaddsws_3 = "10000380VVV", - vminsw_3 = "10000382VVV", - vsraw_3 = "10000384VVV", - vcmpgtsw_3 = "10000386VVV", - vmulesw_3 = "10000388VVV", - vctuxs_3 = "1000038aVVA~", - vspltisw_2 = "1000038cVS", - vminsd_3 = "100003c2VVV", - vsrad_3 = "100003c4VVV", - vcmpbfp_3 = "100003c6VVV", - vcmpgtsd_3 = "100003c7VVV", - vctsxs_3 = "100003caVVA~", - vupklpx_2 = "100003ceV-V", - vsububm_3 = "10000400VVV", - ["bcdadd._4"] = "10000401VVVy.", - vavgub_3 = "10000402VVV", - vand_3 = "10000404VVV", - ["vcmpequb._3"] = "10000406VVV", - vmaxfp_3 = "1000040aVVV", - vsubuhm_3 = "10000440VVV", - ["bcdsub._4"] = "10000441VVVy.", - vavguh_3 = "10000442VVV", - vandc_3 = "10000444VVV", - ["vcmpequh._3"] = "10000446VVV", - vminfp_3 = "1000044aVVV", - vpkudum_3 = "1000044eVVV", - vsubuwm_3 = "10000480VVV", - vavguw_3 = "10000482VVV", - vor_3 = "10000484VVV", - ["vcmpequw._3"] = "10000486VVV", - vpmsumw_3 = "10000488VVV", - ["vcmpeqfp._3"] = "100004c6VVV", - ["vcmpequd._3"] = "100004c7VVV", - vpkudus_3 = "100004ceVVV", - vavgsb_3 = "10000502VVV", - vavgsh_3 = "10000542VVV", - vorc_3 = "10000544VVV", - vbpermq_3 = "1000054cVVV", - vpksdus_3 = "1000054eVVV", - vavgsw_3 = "10000582VVV", - vsld_3 = "100005c4VVV", - ["vcmpgefp._3"] = "100005c6VVV", - vpksdss_3 = "100005ceVVV", - vsububs_3 = "10000600VVV", - mfvscr_1 = "10000604V--", - vsum4ubs_3 = "10000608VVV", - vsubuhs_3 = "10000640VVV", - mtvscr_1 = "10000644--V", - ["vcmpgtuh._3"] = "10000646VVV", - vsum4shs_3 = "10000648VVV", - vupkhsw_2 = "1000064eV-V", - vsubuws_3 = "10000680VVV", - vshasigmaw_4 = "10000682VVYp", - veqv_3 = "10000684VVV", - vsum2sws_3 = "10000688VVV", - vmrgow_3 = "1000068cVVV", - vshasigmad_4 = "100006c2VVYp", - vsrd_3 = "100006c4VVV", - ["vcmpgtud._3"] = "100006c7VVV", - vupklsw_2 = "100006ceV-V", - vupkslw_2 = "100006ceV-V", - vsubsbs_3 = "10000700VVV", - vclzb_2 = "10000702V-V", - vpopcntb_2 = "10000703V-V", - ["vcmpgtsb._3"] = "10000706VVV", - vsum4sbs_3 = "10000708VVV", - vsubshs_3 = "10000740VVV", - vclzh_2 = "10000742V-V", - vpopcnth_2 = "10000743V-V", - ["vcmpgtsh._3"] = "10000746VVV", - vsubsws_3 = "10000780VVV", - vclzw_2 = "10000782V-V", - vpopcntw_2 = "10000783V-V", - ["vcmpgtsw._3"] = "10000786VVV", - vsumsws_3 = "10000788VVV", - vmrgew_3 = "1000078cVVV", - vclzd_2 = "100007c2V-V", - vpopcntd_2 = "100007c3V-V", - ["vcmpbfp._3"] = "100007c6VVV", - ["vcmpgtsd._3"] = "100007c7VVV", - - -- Primary opcode 19: - mcrf_2 = "4c000000XX", - isync_0 = "4c00012c", - crnor_3 = "4c000042CCC", - crnot_2 = "4c000042CC=", - crandc_3 = "4c000102CCC", - crxor_3 = "4c000182CCC", - crclr_1 = "4c000182C==", - crnand_3 = "4c0001c2CCC", - crand_3 = "4c000202CCC", - creqv_3 = "4c000242CCC", - crset_1 = "4c000242C==", - crorc_3 = "4c000342CCC", - cror_3 = "4c000382CCC", - crmove_2 = "4c000382CC=", - bclr_2 = "4c000020AA", - bclrl_2 = "4c000021AA", - bcctr_2 = "4c000420AA", - bcctrl_2 = "4c000421AA", - bctar_2 = "4c000460AA", - bctarl_2 = "4c000461AA", - blr_0 = "4e800020", - blrl_0 = "4e800021", - bctr_0 = "4e800420", - bctrl_0 = "4e800421", - - -- Primary opcode 31: - cmpw_3 = "7c000000XRR", - cmpw_2 = "7c000000-RR", - cmpd_3 = "7c200000XRR", - cmpd_2 = "7c200000-RR", - tw_3 = "7c000008ARR", - lvsl_3 = "7c00000cVRR", - subfc_3 = "7c000010RRR.", - subc_3 = "7c000010RRR~.", - mulhdu_3 = "7c000012RRR.", - addc_3 = "7c000014RRR.", - mulhwu_3 = "7c000016RRR.", - isel_4 = "7c00001eRRRC", - isellt_3 = "7c00001eRRR", - iselgt_3 = "7c00005eRRR", - iseleq_3 = "7c00009eRRR", - mfcr_1 = "7c000026R", - mfocrf_2 = "7c100026RG", - mtcrf_2 = "7c000120GR", - mtocrf_2 = "7c100120GR", - lwarx_3 = "7c000028RR0R", - ldx_3 = "7c00002aRR0R", - lwzx_3 = "7c00002eRR0R", - slw_3 = "7c000030RR~R.", - cntlzw_2 = "7c000034RR~", - sld_3 = "7c000036RR~R.", - and_3 = "7c000038RR~R.", - cmplw_3 = "7c000040XRR", - cmplw_2 = "7c000040-RR", - cmpld_3 = "7c200040XRR", - cmpld_2 = "7c200040-RR", - lvsr_3 = "7c00004cVRR", - subf_3 = "7c000050RRR.", - sub_3 = "7c000050RRR~.", - lbarx_3 = "7c000068RR0R", - ldux_3 = "7c00006aRR0R", - dcbst_2 = "7c00006c-RR", - lwzux_3 = "7c00006eRR0R", - cntlzd_2 = "7c000074RR~", - andc_3 = "7c000078RR~R.", - td_3 = "7c000088ARR", - lvewx_3 = "7c00008eVRR", - mulhd_3 = "7c000092RRR.", - addg6s_3 = "7c000094RRR", - mulhw_3 = "7c000096RRR.", - dlmzb_3 = "7c00009cRR~R.", - ldarx_3 = "7c0000a8RR0R", - dcbf_2 = "7c0000ac-RR", - lbzx_3 = "7c0000aeRR0R", - lvx_3 = "7c0000ceVRR", - neg_2 = "7c0000d0RR.", - lharx_3 = "7c0000e8RR0R", - lbzux_3 = "7c0000eeRR0R", - popcntb_2 = "7c0000f4RR~", - not_2 = "7c0000f8RR~%.", - nor_3 = "7c0000f8RR~R.", - stvebx_3 = "7c00010eVRR", - subfe_3 = "7c000110RRR.", - sube_3 = "7c000110RRR~.", - adde_3 = "7c000114RRR.", - stdx_3 = "7c00012aRR0R", - ["stwcx._3"] = "7c00012dRR0R.", - stwx_3 = "7c00012eRR0R", - prtyw_2 = "7c000134RR~", - stvehx_3 = "7c00014eVRR", - stdux_3 = "7c00016aRR0R", - ["stqcx._3"] = "7c00016dR:R0R.", - stwux_3 = "7c00016eRR0R", - prtyd_2 = "7c000174RR~", - stvewx_3 = "7c00018eVRR", - subfze_2 = "7c000190RR.", - addze_2 = "7c000194RR.", - ["stdcx._3"] = "7c0001adRR0R.", - stbx_3 = "7c0001aeRR0R", - stvx_3 = "7c0001ceVRR", - subfme_2 = "7c0001d0RR.", - mulld_3 = "7c0001d2RRR.", - addme_2 = "7c0001d4RR.", - mullw_3 = "7c0001d6RRR.", - dcbtst_2 = "7c0001ec-RR", - stbux_3 = "7c0001eeRR0R", - bpermd_3 = "7c0001f8RR~R", - lvepxl_3 = "7c00020eVRR", - add_3 = "7c000214RRR.", - lqarx_3 = "7c000228R:R0R", - dcbt_2 = "7c00022c-RR", - lhzx_3 = "7c00022eRR0R", - cdtbcd_2 = "7c000234RR~", - eqv_3 = "7c000238RR~R.", - lvepx_3 = "7c00024eVRR", - eciwx_3 = "7c00026cRR0R", - lhzux_3 = "7c00026eRR0R", - cbcdtd_2 = "7c000274RR~", - xor_3 = "7c000278RR~R.", - mfspefscr_1 = "7c0082a6R", - mfxer_1 = "7c0102a6R", - mflr_1 = "7c0802a6R", - mfctr_1 = "7c0902a6R", - lwax_3 = "7c0002aaRR0R", - lhax_3 = "7c0002aeRR0R", - mftb_1 = "7c0c42e6R", - mftbu_1 = "7c0d42e6R", - lvxl_3 = "7c0002ceVRR", - lwaux_3 = "7c0002eaRR0R", - lhaux_3 = "7c0002eeRR0R", - popcntw_2 = "7c0002f4RR~", - divdeu_3 = "7c000312RRR.", - divweu_3 = "7c000316RRR.", - sthx_3 = "7c00032eRR0R", - orc_3 = "7c000338RR~R.", - ecowx_3 = "7c00036cRR0R", - sthux_3 = "7c00036eRR0R", - or_3 = "7c000378RR~R.", - mr_2 = "7c000378RR~%.", - divdu_3 = "7c000392RRR.", - divwu_3 = "7c000396RRR.", - mtspefscr_1 = "7c0083a6R", - mtxer_1 = "7c0103a6R", - mtlr_1 = "7c0803a6R", - mtctr_1 = "7c0903a6R", - dcbi_2 = "7c0003ac-RR", - nand_3 = "7c0003b8RR~R.", - dsn_2 = "7c0003c6-RR", - stvxl_3 = "7c0003ceVRR", - divd_3 = "7c0003d2RRR.", - divw_3 = "7c0003d6RRR.", - popcntd_2 = "7c0003f4RR~", - cmpb_3 = "7c0003f8RR~R.", - mcrxr_1 = "7c000400X", - lbdx_3 = "7c000406RRR", - subfco_3 = "7c000410RRR.", - subco_3 = "7c000410RRR~.", - addco_3 = "7c000414RRR.", - ldbrx_3 = "7c000428RR0R", - lswx_3 = "7c00042aRR0R", - lwbrx_3 = "7c00042cRR0R", - lfsx_3 = "7c00042eFR0R", - srw_3 = "7c000430RR~R.", - srd_3 = "7c000436RR~R.", - lhdx_3 = "7c000446RRR", - subfo_3 = "7c000450RRR.", - subo_3 = "7c000450RRR~.", - lfsux_3 = "7c00046eFR0R", - lwdx_3 = "7c000486RRR", - lswi_3 = "7c0004aaRR0A", - sync_0 = "7c0004ac", - lwsync_0 = "7c2004ac", - ptesync_0 = "7c4004ac", - lfdx_3 = "7c0004aeFR0R", - lddx_3 = "7c0004c6RRR", - nego_2 = "7c0004d0RR.", - lfdux_3 = "7c0004eeFR0R", - stbdx_3 = "7c000506RRR", - subfeo_3 = "7c000510RRR.", - subeo_3 = "7c000510RRR~.", - addeo_3 = "7c000514RRR.", - stdbrx_3 = "7c000528RR0R", - stswx_3 = "7c00052aRR0R", - stwbrx_3 = "7c00052cRR0R", - stfsx_3 = "7c00052eFR0R", - sthdx_3 = "7c000546RRR", - ["stbcx._3"] = "7c00056dRRR", - stfsux_3 = "7c00056eFR0R", - stwdx_3 = "7c000586RRR", - subfzeo_2 = "7c000590RR.", - addzeo_2 = "7c000594RR.", - stswi_3 = "7c0005aaRR0A", - ["sthcx._3"] = "7c0005adRRR", - stfdx_3 = "7c0005aeFR0R", - stddx_3 = "7c0005c6RRR", - subfmeo_2 = "7c0005d0RR.", - mulldo_3 = "7c0005d2RRR.", - addmeo_2 = "7c0005d4RR.", - mullwo_3 = "7c0005d6RRR.", - dcba_2 = "7c0005ec-RR", - stfdux_3 = "7c0005eeFR0R", - stvepxl_3 = "7c00060eVRR", - addo_3 = "7c000614RRR.", - lhbrx_3 = "7c00062cRR0R", - lfdpx_3 = "7c00062eF:RR", - sraw_3 = "7c000630RR~R.", - srad_3 = "7c000634RR~R.", - lfddx_3 = "7c000646FRR", - stvepx_3 = "7c00064eVRR", - srawi_3 = "7c000670RR~A.", - sradi_3 = "7c000674RR~H.", - eieio_0 = "7c0006ac", - lfiwax_3 = "7c0006aeFR0R", - divdeuo_3 = "7c000712RRR.", - divweuo_3 = "7c000716RRR.", - sthbrx_3 = "7c00072cRR0R", - stfdpx_3 = "7c00072eF:RR", - extsh_2 = "7c000734RR~.", - stfddx_3 = "7c000746FRR", - divdeo_3 = "7c000752RRR.", - divweo_3 = "7c000756RRR.", - extsb_2 = "7c000774RR~.", - divduo_3 = "7c000792RRR.", - divwou_3 = "7c000796RRR.", - icbi_2 = "7c0007ac-RR", - stfiwx_3 = "7c0007aeFR0R", - extsw_2 = "7c0007b4RR~.", - divdo_3 = "7c0007d2RRR.", - divwo_3 = "7c0007d6RRR.", - dcbz_2 = "7c0007ec-RR", - - ["tbegin._1"] = "7c00051d1", - ["tbegin._0"] = "7c00051d", - ["tend._1"] = "7c00055dY", - ["tend._0"] = "7c00055d", - ["tendall._0"] = "7e00055d", - tcheck_1 = "7c00059cX", - ["tsr._1"] = "7c0005dd1", - ["tsuspend._0"] = "7c0005dd", - ["tresume._0"] = "7c2005dd", - ["tabortwc._3"] = "7c00061dARR", - ["tabortdc._3"] = "7c00065dARR", - ["tabortwci._3"] = "7c00069dARS", - ["tabortdci._3"] = "7c0006ddARS", - ["tabort._1"] = "7c00071d-R-", - ["treclaim._1"] = "7c00075d-R", - ["trechkpt._0"] = "7c0007dd", - - lxsiwzx_3 = "7c000018QRR", - lxsiwax_3 = "7c000098QRR", - mfvsrd_2 = "7c000066-Rq", - mfvsrwz_2 = "7c0000e6-Rq", - stxsiwx_3 = "7c000118QRR", - mtvsrd_2 = "7c000166QR", - mtvsrwa_2 = "7c0001a6QR", - lxvdsx_3 = "7c000298QRR", - lxsspx_3 = "7c000418QRR", - lxsdx_3 = "7c000498QRR", - stxsspx_3 = "7c000518QRR", - stxsdx_3 = "7c000598QRR", - lxvw4x_3 = "7c000618QRR", - lxvd2x_3 = "7c000698QRR", - stxvw4x_3 = "7c000718QRR", - stxvd2x_3 = "7c000798QRR", - - -- Primary opcode 30: - rldicl_4 = "78000000RR~HM.", - rldicr_4 = "78000004RR~HM.", - rldic_4 = "78000008RR~HM.", - rldimi_4 = "7800000cRR~HM.", - rldcl_4 = "78000010RR~RM.", - rldcr_4 = "78000012RR~RM.", - - rotldi_3 = op_alias("rldicl_4", function(p) - p[4] = "0" - end), - rotrdi_3 = op_alias("rldicl_4", function(p) - p[3] = "64-("..p[3]..")"; p[4] = "0" - end), - rotld_3 = op_alias("rldcl_4", function(p) - p[4] = "0" - end), - sldi_3 = op_alias("rldicr_4", function(p) - p[4] = "63-("..p[3]..")" - end), - srdi_3 = op_alias("rldicl_4", function(p) - p[4] = p[3]; p[3] = "64-("..p[3]..")" - end), - clrldi_3 = op_alias("rldicl_4", function(p) - p[4] = p[3]; p[3] = "0" - end), - clrrdi_3 = op_alias("rldicr_4", function(p) - p[4] = "63-("..p[3]..")"; p[3] = "0" - end), - - -- Primary opcode 56: - lq_2 = "e0000000R:D", -- NYI: displacement must be divisible by 8. - - -- Primary opcode 57: - lfdp_2 = "e4000000F:D", -- NYI: displacement must be divisible by 4. - - -- Primary opcode 59: - fdivs_3 = "ec000024FFF.", - fsubs_3 = "ec000028FFF.", - fadds_3 = "ec00002aFFF.", - fsqrts_2 = "ec00002cF-F.", - fres_2 = "ec000030F-F.", - fmuls_3 = "ec000032FF-F.", - frsqrtes_2 = "ec000034F-F.", - fmsubs_4 = "ec000038FFFF~.", - fmadds_4 = "ec00003aFFFF~.", - fnmsubs_4 = "ec00003cFFFF~.", - fnmadds_4 = "ec00003eFFFF~.", - fcfids_2 = "ec00069cF-F.", - fcfidus_2 = "ec00079cF-F.", - - dadd_3 = "ec000004FFF.", - dqua_4 = "ec000006FFFZ.", - dmul_3 = "ec000044FFF.", - drrnd_4 = "ec000046FFFZ.", - dscli_3 = "ec000084FF6.", - dquai_4 = "ec000086SF~FZ.", - dscri_3 = "ec0000c4FF6.", - drintx_4 = "ec0000c61F~FZ.", - dcmpo_3 = "ec000104XFF", - dtstex_3 = "ec000144XFF", - dtstdc_3 = "ec000184XF6", - dtstdg_3 = "ec0001c4XF6", - drintn_4 = "ec0001c61F~FZ.", - dctdp_2 = "ec000204F-F.", - dctfix_2 = "ec000244F-F.", - ddedpd_3 = "ec000284ZF~F.", - dxex_2 = "ec0002c4F-F.", - dsub_3 = "ec000404FFF.", - ddiv_3 = "ec000444FFF.", - dcmpu_3 = "ec000504XFF", - dtstsf_3 = "ec000544XFF", - drsp_2 = "ec000604F-F.", - dcffix_2 = "ec000644F-F.", - denbcd_3 = "ec000684YF~F.", - diex_3 = "ec0006c4FFF.", - - -- Primary opcode 60: - xsaddsp_3 = "f0000000QQQ", - xsmaddasp_3 = "f0000008QQQ", - xxsldwi_4 = "f0000010QQQz", - xsrsqrtesp_2 = "f0000028Q-Q", - xssqrtsp_2 = "f000002cQ-Q", - xxsel_4 = "f0000030QQQQ", - xssubsp_3 = "f0000040QQQ", - xsmaddmsp_3 = "f0000048QQQ", - xxpermdi_4 = "f0000050QQQz", - xsresp_2 = "f0000068Q-Q", - xsmulsp_3 = "f0000080QQQ", - xsmsubasp_3 = "f0000088QQQ", - xxmrghw_3 = "f0000090QQQ", - xsdivsp_3 = "f00000c0QQQ", - xsmsubmsp_3 = "f00000c8QQQ", - xsadddp_3 = "f0000100QQQ", - xsmaddadp_3 = "f0000108QQQ", - xscmpudp_3 = "f0000118XQQ", - xscvdpuxws_2 = "f0000120Q-Q", - xsrdpi_2 = "f0000124Q-Q", - xsrsqrtedp_2 = "f0000128Q-Q", - xssqrtdp_2 = "f000012cQ-Q", - xssubdp_3 = "f0000140QQQ", - xsmaddmdp_3 = "f0000148QQQ", - xscmpodp_3 = "f0000158XQQ", - xscvdpsxws_2 = "f0000160Q-Q", - xsrdpiz_2 = "f0000164Q-Q", - xsredp_2 = "f0000168Q-Q", - xsmuldp_3 = "f0000180QQQ", - xsmsubadp_3 = "f0000188QQQ", - xxmrglw_3 = "f0000190QQQ", - xsrdpip_2 = "f00001a4Q-Q", - xstsqrtdp_2 = "f00001a8X-Q", - xsrdpic_2 = "f00001acQ-Q", - xsdivdp_3 = "f00001c0QQQ", - xsmsubmdp_3 = "f00001c8QQQ", - xsrdpim_2 = "f00001e4Q-Q", - xstdivdp_3 = "f00001e8XQQ", - xvaddsp_3 = "f0000200QQQ", - xvmaddasp_3 = "f0000208QQQ", - xvcmpeqsp_3 = "f0000218QQQ", - xvcvspuxws_2 = "f0000220Q-Q", - xvrspi_2 = "f0000224Q-Q", - xvrsqrtesp_2 = "f0000228Q-Q", - xvsqrtsp_2 = "f000022cQ-Q", - xvsubsp_3 = "f0000240QQQ", - xvmaddmsp_3 = "f0000248QQQ", - xvcmpgtsp_3 = "f0000258QQQ", - xvcvspsxws_2 = "f0000260Q-Q", - xvrspiz_2 = "f0000264Q-Q", - xvresp_2 = "f0000268Q-Q", - xvmulsp_3 = "f0000280QQQ", - xvmsubasp_3 = "f0000288QQQ", - xxspltw_3 = "f0000290QQg~", - xvcmpgesp_3 = "f0000298QQQ", - xvcvuxwsp_2 = "f00002a0Q-Q", - xvrspip_2 = "f00002a4Q-Q", - xvtsqrtsp_2 = "f00002a8X-Q", - xvrspic_2 = "f00002acQ-Q", - xvdivsp_3 = "f00002c0QQQ", - xvmsubmsp_3 = "f00002c8QQQ", - xvcvsxwsp_2 = "f00002e0Q-Q", - xvrspim_2 = "f00002e4Q-Q", - xvtdivsp_3 = "f00002e8XQQ", - xvadddp_3 = "f0000300QQQ", - xvmaddadp_3 = "f0000308QQQ", - xvcmpeqdp_3 = "f0000318QQQ", - xvcvdpuxws_2 = "f0000320Q-Q", - xvrdpi_2 = "f0000324Q-Q", - xvrsqrtedp_2 = "f0000328Q-Q", - xvsqrtdp_2 = "f000032cQ-Q", - xvsubdp_3 = "f0000340QQQ", - xvmaddmdp_3 = "f0000348QQQ", - xvcmpgtdp_3 = "f0000358QQQ", - xvcvdpsxws_2 = "f0000360Q-Q", - xvrdpiz_2 = "f0000364Q-Q", - xvredp_2 = "f0000368Q-Q", - xvmuldp_3 = "f0000380QQQ", - xvmsubadp_3 = "f0000388QQQ", - xvcmpgedp_3 = "f0000398QQQ", - xvcvuxwdp_2 = "f00003a0Q-Q", - xvrdpip_2 = "f00003a4Q-Q", - xvtsqrtdp_2 = "f00003a8X-Q", - xvrdpic_2 = "f00003acQ-Q", - xvdivdp_3 = "f00003c0QQQ", - xvmsubmdp_3 = "f00003c8QQQ", - xvcvsxwdp_2 = "f00003e0Q-Q", - xvrdpim_2 = "f00003e4Q-Q", - xvtdivdp_3 = "f00003e8XQQ", - xsnmaddasp_3 = "f0000408QQQ", - xxland_3 = "f0000410QQQ", - xscvdpsp_2 = "f0000424Q-Q", - xscvdpspn_2 = "f000042cQ-Q", - xsnmaddmsp_3 = "f0000448QQQ", - xxlandc_3 = "f0000450QQQ", - xsrsp_2 = "f0000464Q-Q", - xsnmsubasp_3 = "f0000488QQQ", - xxlor_3 = "f0000490QQQ", - xscvuxdsp_2 = "f00004a0Q-Q", - xsnmsubmsp_3 = "f00004c8QQQ", - xxlxor_3 = "f00004d0QQQ", - xscvsxdsp_2 = "f00004e0Q-Q", - xsmaxdp_3 = "f0000500QQQ", - xsnmaddadp_3 = "f0000508QQQ", - xxlnor_3 = "f0000510QQQ", - xscvdpuxds_2 = "f0000520Q-Q", - xscvspdp_2 = "f0000524Q-Q", - xscvspdpn_2 = "f000052cQ-Q", - xsmindp_3 = "f0000540QQQ", - xsnmaddmdp_3 = "f0000548QQQ", - xxlorc_3 = "f0000550QQQ", - xscvdpsxds_2 = "f0000560Q-Q", - xsabsdp_2 = "f0000564Q-Q", - xscpsgndp_3 = "f0000580QQQ", - xsnmsubadp_3 = "f0000588QQQ", - xxlnand_3 = "f0000590QQQ", - xscvuxddp_2 = "f00005a0Q-Q", - xsnabsdp_2 = "f00005a4Q-Q", - xsnmsubmdp_3 = "f00005c8QQQ", - xxleqv_3 = "f00005d0QQQ", - xscvsxddp_2 = "f00005e0Q-Q", - xsnegdp_2 = "f00005e4Q-Q", - xvmaxsp_3 = "f0000600QQQ", - xvnmaddasp_3 = "f0000608QQQ", - ["xvcmpeqsp._3"] = "f0000618QQQ", - xvcvspuxds_2 = "f0000620Q-Q", - xvcvdpsp_2 = "f0000624Q-Q", - xvminsp_3 = "f0000640QQQ", - xvnmaddmsp_3 = "f0000648QQQ", - ["xvcmpgtsp._3"] = "f0000658QQQ", - xvcvspsxds_2 = "f0000660Q-Q", - xvabssp_2 = "f0000664Q-Q", - xvcpsgnsp_3 = "f0000680QQQ", - xvnmsubasp_3 = "f0000688QQQ", - ["xvcmpgesp._3"] = "f0000698QQQ", - xvcvuxdsp_2 = "f00006a0Q-Q", - xvnabssp_2 = "f00006a4Q-Q", - xvnmsubmsp_3 = "f00006c8QQQ", - xvcvsxdsp_2 = "f00006e0Q-Q", - xvnegsp_2 = "f00006e4Q-Q", - xvmaxdp_3 = "f0000700QQQ", - xvnmaddadp_3 = "f0000708QQQ", - ["xvcmpeqdp._3"] = "f0000718QQQ", - xvcvdpuxds_2 = "f0000720Q-Q", - xvcvspdp_2 = "f0000724Q-Q", - xvmindp_3 = "f0000740QQQ", - xvnmaddmdp_3 = "f0000748QQQ", - ["xvcmpgtdp._3"] = "f0000758QQQ", - xvcvdpsxds_2 = "f0000760Q-Q", - xvabsdp_2 = "f0000764Q-Q", - xvcpsgndp_3 = "f0000780QQQ", - xvnmsubadp_3 = "f0000788QQQ", - ["xvcmpgedp._3"] = "f0000798QQQ", - xvcvuxddp_2 = "f00007a0Q-Q", - xvnabsdp_2 = "f00007a4Q-Q", - xvnmsubmdp_3 = "f00007c8QQQ", - xvcvsxddp_2 = "f00007e0Q-Q", - xvnegdp_2 = "f00007e4Q-Q", - - -- Primary opcode 61: - stfdp_2 = "f4000000F:D", -- NYI: displacement must be divisible by 4. - - -- Primary opcode 62: - stq_2 = "f8000002R:D", -- NYI: displacement must be divisible by 8. - - -- Primary opcode 63: - fdiv_3 = "fc000024FFF.", - fsub_3 = "fc000028FFF.", - fadd_3 = "fc00002aFFF.", - fsqrt_2 = "fc00002cF-F.", - fsel_4 = "fc00002eFFFF~.", - fre_2 = "fc000030F-F.", - fmul_3 = "fc000032FF-F.", - frsqrte_2 = "fc000034F-F.", - fmsub_4 = "fc000038FFFF~.", - fmadd_4 = "fc00003aFFFF~.", - fnmsub_4 = "fc00003cFFFF~.", - fnmadd_4 = "fc00003eFFFF~.", - fcmpu_3 = "fc000000XFF", - fcpsgn_3 = "fc000010FFF.", - fcmpo_3 = "fc000040XFF", - mtfsb1_1 = "fc00004cA", - fneg_2 = "fc000050F-F.", - mcrfs_2 = "fc000080XX", - mtfsb0_1 = "fc00008cA", - fmr_2 = "fc000090F-F.", - frsp_2 = "fc000018F-F.", - fctiw_2 = "fc00001cF-F.", - fctiwz_2 = "fc00001eF-F.", - ftdiv_2 = "fc000100X-F.", - fctiwu_2 = "fc00011cF-F.", - fctiwuz_2 = "fc00011eF-F.", - mtfsfi_2 = "fc00010cAA", -- NYI: upshift. - fnabs_2 = "fc000110F-F.", - ftsqrt_2 = "fc000140X-F.", - fabs_2 = "fc000210F-F.", - frin_2 = "fc000310F-F.", - friz_2 = "fc000350F-F.", - frip_2 = "fc000390F-F.", - frim_2 = "fc0003d0F-F.", - mffs_1 = "fc00048eF.", - -- NYI: mtfsf, mtfsb0, mtfsb1. - fctid_2 = "fc00065cF-F.", - fctidz_2 = "fc00065eF-F.", - fmrgow_3 = "fc00068cFFF", - fcfid_2 = "fc00069cF-F.", - fctidu_2 = "fc00075cF-F.", - fctiduz_2 = "fc00075eF-F.", - fmrgew_3 = "fc00078cFFF", - fcfidu_2 = "fc00079cF-F.", - - daddq_3 = "fc000004F:F:F:.", - dquaq_4 = "fc000006F:F:F:Z.", - dmulq_3 = "fc000044F:F:F:.", - drrndq_4 = "fc000046F:F:F:Z.", - dscliq_3 = "fc000084F:F:6.", - dquaiq_4 = "fc000086SF:~F:Z.", - dscriq_3 = "fc0000c4F:F:6.", - drintxq_4 = "fc0000c61F:~F:Z.", - dcmpoq_3 = "fc000104XF:F:", - dtstexq_3 = "fc000144XF:F:", - dtstdcq_3 = "fc000184XF:6", - dtstdgq_3 = "fc0001c4XF:6", - drintnq_4 = "fc0001c61F:~F:Z.", - dctqpq_2 = "fc000204F:-F:.", - dctfixq_2 = "fc000244F:-F:.", - ddedpdq_3 = "fc000284ZF:~F:.", - dxexq_2 = "fc0002c4F:-F:.", - dsubq_3 = "fc000404F:F:F:.", - ddivq_3 = "fc000444F:F:F:.", - dcmpuq_3 = "fc000504XF:F:", - dtstsfq_3 = "fc000544XF:F:", - drdpq_2 = "fc000604F:-F:.", - dcffixq_2 = "fc000644F:-F:.", - denbcdq_3 = "fc000684YF:~F:.", - diexq_3 = "fc0006c4F:FF:.", - - -- Primary opcode 4, SPE APU extension: - evaddw_3 = "10000200RRR", - evaddiw_3 = "10000202RAR~", - evsubw_3 = "10000204RRR~", - evsubiw_3 = "10000206RAR~", - evabs_2 = "10000208RR", - evneg_2 = "10000209RR", - evextsb_2 = "1000020aRR", - evextsh_2 = "1000020bRR", - evrndw_2 = "1000020cRR", - evcntlzw_2 = "1000020dRR", - evcntlsw_2 = "1000020eRR", - brinc_3 = "1000020fRRR", - evand_3 = "10000211RRR", - evandc_3 = "10000212RRR", - evxor_3 = "10000216RRR", - evor_3 = "10000217RRR", - evmr_2 = "10000217RR=", - evnor_3 = "10000218RRR", - evnot_2 = "10000218RR=", - eveqv_3 = "10000219RRR", - evorc_3 = "1000021bRRR", - evnand_3 = "1000021eRRR", - evsrwu_3 = "10000220RRR", - evsrws_3 = "10000221RRR", - evsrwiu_3 = "10000222RRA", - evsrwis_3 = "10000223RRA", - evslw_3 = "10000224RRR", - evslwi_3 = "10000226RRA", - evrlw_3 = "10000228RRR", - evsplati_2 = "10000229RS", - evrlwi_3 = "1000022aRRA", - evsplatfi_2 = "1000022bRS", - evmergehi_3 = "1000022cRRR", - evmergelo_3 = "1000022dRRR", - evcmpgtu_3 = "10000230XRR", - evcmpgtu_2 = "10000230-RR", - evcmpgts_3 = "10000231XRR", - evcmpgts_2 = "10000231-RR", - evcmpltu_3 = "10000232XRR", - evcmpltu_2 = "10000232-RR", - evcmplts_3 = "10000233XRR", - evcmplts_2 = "10000233-RR", - evcmpeq_3 = "10000234XRR", - evcmpeq_2 = "10000234-RR", - evsel_4 = "10000278RRRW", - evsel_3 = "10000278RRR", - evfsadd_3 = "10000280RRR", - evfssub_3 = "10000281RRR", - evfsabs_2 = "10000284RR", - evfsnabs_2 = "10000285RR", - evfsneg_2 = "10000286RR", - evfsmul_3 = "10000288RRR", - evfsdiv_3 = "10000289RRR", - evfscmpgt_3 = "1000028cXRR", - evfscmpgt_2 = "1000028c-RR", - evfscmplt_3 = "1000028dXRR", - evfscmplt_2 = "1000028d-RR", - evfscmpeq_3 = "1000028eXRR", - evfscmpeq_2 = "1000028e-RR", - evfscfui_2 = "10000290R-R", - evfscfsi_2 = "10000291R-R", - evfscfuf_2 = "10000292R-R", - evfscfsf_2 = "10000293R-R", - evfsctui_2 = "10000294R-R", - evfsctsi_2 = "10000295R-R", - evfsctuf_2 = "10000296R-R", - evfsctsf_2 = "10000297R-R", - evfsctuiz_2 = "10000298R-R", - evfsctsiz_2 = "1000029aR-R", - evfststgt_3 = "1000029cXRR", - evfststgt_2 = "1000029c-RR", - evfststlt_3 = "1000029dXRR", - evfststlt_2 = "1000029d-RR", - evfststeq_3 = "1000029eXRR", - evfststeq_2 = "1000029e-RR", - efsadd_3 = "100002c0RRR", - efssub_3 = "100002c1RRR", - efsabs_2 = "100002c4RR", - efsnabs_2 = "100002c5RR", - efsneg_2 = "100002c6RR", - efsmul_3 = "100002c8RRR", - efsdiv_3 = "100002c9RRR", - efscmpgt_3 = "100002ccXRR", - efscmpgt_2 = "100002cc-RR", - efscmplt_3 = "100002cdXRR", - efscmplt_2 = "100002cd-RR", - efscmpeq_3 = "100002ceXRR", - efscmpeq_2 = "100002ce-RR", - efscfd_2 = "100002cfR-R", - efscfui_2 = "100002d0R-R", - efscfsi_2 = "100002d1R-R", - efscfuf_2 = "100002d2R-R", - efscfsf_2 = "100002d3R-R", - efsctui_2 = "100002d4R-R", - efsctsi_2 = "100002d5R-R", - efsctuf_2 = "100002d6R-R", - efsctsf_2 = "100002d7R-R", - efsctuiz_2 = "100002d8R-R", - efsctsiz_2 = "100002daR-R", - efststgt_3 = "100002dcXRR", - efststgt_2 = "100002dc-RR", - efststlt_3 = "100002ddXRR", - efststlt_2 = "100002dd-RR", - efststeq_3 = "100002deXRR", - efststeq_2 = "100002de-RR", - efdadd_3 = "100002e0RRR", - efdsub_3 = "100002e1RRR", - efdcfuid_2 = "100002e2R-R", - efdcfsid_2 = "100002e3R-R", - efdabs_2 = "100002e4RR", - efdnabs_2 = "100002e5RR", - efdneg_2 = "100002e6RR", - efdmul_3 = "100002e8RRR", - efddiv_3 = "100002e9RRR", - efdctuidz_2 = "100002eaR-R", - efdctsidz_2 = "100002ebR-R", - efdcmpgt_3 = "100002ecXRR", - efdcmpgt_2 = "100002ec-RR", - efdcmplt_3 = "100002edXRR", - efdcmplt_2 = "100002ed-RR", - efdcmpeq_3 = "100002eeXRR", - efdcmpeq_2 = "100002ee-RR", - efdcfs_2 = "100002efR-R", - efdcfui_2 = "100002f0R-R", - efdcfsi_2 = "100002f1R-R", - efdcfuf_2 = "100002f2R-R", - efdcfsf_2 = "100002f3R-R", - efdctui_2 = "100002f4R-R", - efdctsi_2 = "100002f5R-R", - efdctuf_2 = "100002f6R-R", - efdctsf_2 = "100002f7R-R", - efdctuiz_2 = "100002f8R-R", - efdctsiz_2 = "100002faR-R", - efdtstgt_3 = "100002fcXRR", - efdtstgt_2 = "100002fc-RR", - efdtstlt_3 = "100002fdXRR", - efdtstlt_2 = "100002fd-RR", - efdtsteq_3 = "100002feXRR", - efdtsteq_2 = "100002fe-RR", - evlddx_3 = "10000300RR0R", - evldd_2 = "10000301R8", - evldwx_3 = "10000302RR0R", - evldw_2 = "10000303R8", - evldhx_3 = "10000304RR0R", - evldh_2 = "10000305R8", - evlwhex_3 = "10000310RR0R", - evlwhe_2 = "10000311R4", - evlwhoux_3 = "10000314RR0R", - evlwhou_2 = "10000315R4", - evlwhosx_3 = "10000316RR0R", - evlwhos_2 = "10000317R4", - evstddx_3 = "10000320RR0R", - evstdd_2 = "10000321R8", - evstdwx_3 = "10000322RR0R", - evstdw_2 = "10000323R8", - evstdhx_3 = "10000324RR0R", - evstdh_2 = "10000325R8", - evstwhex_3 = "10000330RR0R", - evstwhe_2 = "10000331R4", - evstwhox_3 = "10000334RR0R", - evstwho_2 = "10000335R4", - evstwwex_3 = "10000338RR0R", - evstwwe_2 = "10000339R4", - evstwwox_3 = "1000033cRR0R", - evstwwo_2 = "1000033dR4", - evmhessf_3 = "10000403RRR", - evmhossf_3 = "10000407RRR", - evmheumi_3 = "10000408RRR", - evmhesmi_3 = "10000409RRR", - evmhesmf_3 = "1000040bRRR", - evmhoumi_3 = "1000040cRRR", - evmhosmi_3 = "1000040dRRR", - evmhosmf_3 = "1000040fRRR", - evmhessfa_3 = "10000423RRR", - evmhossfa_3 = "10000427RRR", - evmheumia_3 = "10000428RRR", - evmhesmia_3 = "10000429RRR", - evmhesmfa_3 = "1000042bRRR", - evmhoumia_3 = "1000042cRRR", - evmhosmia_3 = "1000042dRRR", - evmhosmfa_3 = "1000042fRRR", - evmwhssf_3 = "10000447RRR", - evmwlumi_3 = "10000448RRR", - evmwhumi_3 = "1000044cRRR", - evmwhsmi_3 = "1000044dRRR", - evmwhsmf_3 = "1000044fRRR", - evmwssf_3 = "10000453RRR", - evmwumi_3 = "10000458RRR", - evmwsmi_3 = "10000459RRR", - evmwsmf_3 = "1000045bRRR", - evmwhssfa_3 = "10000467RRR", - evmwlumia_3 = "10000468RRR", - evmwhumia_3 = "1000046cRRR", - evmwhsmia_3 = "1000046dRRR", - evmwhsmfa_3 = "1000046fRRR", - evmwssfa_3 = "10000473RRR", - evmwumia_3 = "10000478RRR", - evmwsmia_3 = "10000479RRR", - evmwsmfa_3 = "1000047bRRR", - evmra_2 = "100004c4RR", - evdivws_3 = "100004c6RRR", - evdivwu_3 = "100004c7RRR", - evmwssfaa_3 = "10000553RRR", - evmwumiaa_3 = "10000558RRR", - evmwsmiaa_3 = "10000559RRR", - evmwsmfaa_3 = "1000055bRRR", - evmwssfan_3 = "100005d3RRR", - evmwumian_3 = "100005d8RRR", - evmwsmian_3 = "100005d9RRR", - evmwsmfan_3 = "100005dbRRR", - evmergehilo_3 = "1000022eRRR", - evmergelohi_3 = "1000022fRRR", - evlhhesplatx_3 = "10000308RR0R", - evlhhesplat_2 = "10000309R2", - evlhhousplatx_3 = "1000030cRR0R", - evlhhousplat_2 = "1000030dR2", - evlhhossplatx_3 = "1000030eRR0R", - evlhhossplat_2 = "1000030fR2", - evlwwsplatx_3 = "10000318RR0R", - evlwwsplat_2 = "10000319R4", - evlwhsplatx_3 = "1000031cRR0R", - evlwhsplat_2 = "1000031dR4", - evaddusiaaw_2 = "100004c0RR", - evaddssiaaw_2 = "100004c1RR", - evsubfusiaaw_2 = "100004c2RR", - evsubfssiaaw_2 = "100004c3RR", - evaddumiaaw_2 = "100004c8RR", - evaddsmiaaw_2 = "100004c9RR", - evsubfumiaaw_2 = "100004caRR", - evsubfsmiaaw_2 = "100004cbRR", - evmheusiaaw_3 = "10000500RRR", - evmhessiaaw_3 = "10000501RRR", - evmhessfaaw_3 = "10000503RRR", - evmhousiaaw_3 = "10000504RRR", - evmhossiaaw_3 = "10000505RRR", - evmhossfaaw_3 = "10000507RRR", - evmheumiaaw_3 = "10000508RRR", - evmhesmiaaw_3 = "10000509RRR", - evmhesmfaaw_3 = "1000050bRRR", - evmhoumiaaw_3 = "1000050cRRR", - evmhosmiaaw_3 = "1000050dRRR", - evmhosmfaaw_3 = "1000050fRRR", - evmhegumiaa_3 = "10000528RRR", - evmhegsmiaa_3 = "10000529RRR", - evmhegsmfaa_3 = "1000052bRRR", - evmhogumiaa_3 = "1000052cRRR", - evmhogsmiaa_3 = "1000052dRRR", - evmhogsmfaa_3 = "1000052fRRR", - evmwlusiaaw_3 = "10000540RRR", - evmwlssiaaw_3 = "10000541RRR", - evmwlumiaaw_3 = "10000548RRR", - evmwlsmiaaw_3 = "10000549RRR", - evmheusianw_3 = "10000580RRR", - evmhessianw_3 = "10000581RRR", - evmhessfanw_3 = "10000583RRR", - evmhousianw_3 = "10000584RRR", - evmhossianw_3 = "10000585RRR", - evmhossfanw_3 = "10000587RRR", - evmheumianw_3 = "10000588RRR", - evmhesmianw_3 = "10000589RRR", - evmhesmfanw_3 = "1000058bRRR", - evmhoumianw_3 = "1000058cRRR", - evmhosmianw_3 = "1000058dRRR", - evmhosmfanw_3 = "1000058fRRR", - evmhegumian_3 = "100005a8RRR", - evmhegsmian_3 = "100005a9RRR", - evmhegsmfan_3 = "100005abRRR", - evmhogumian_3 = "100005acRRR", - evmhogsmian_3 = "100005adRRR", - evmhogsmfan_3 = "100005afRRR", - evmwlusianw_3 = "100005c0RRR", - evmwlssianw_3 = "100005c1RRR", - evmwlumianw_3 = "100005c8RRR", - evmwlsmianw_3 = "100005c9RRR", - - -- NYI: Book E instructions. -} - --- Add mnemonics for "." variants. -do - local t = {} - for k,v in pairs(map_op) do - if type(v) == "string" and sub(v, -1) == "." then - local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2) - t[sub(k, 1, -3).."."..sub(k, -2)] = v2 - end - end - for k,v in pairs(t) do - map_op[k] = v - end -end - --- Add more branch mnemonics. -for cond,c in pairs(map_cond) do - local b1 = "b"..cond - local c1 = shl(band(c, 3), 16) + (c < 4 and 0x01000000 or 0) - -- bX[l] - map_op[b1.."_1"] = tohex(0x40800000 + c1).."K" - map_op[b1.."y_1"] = tohex(0x40a00000 + c1).."K" - map_op[b1.."l_1"] = tohex(0x40800001 + c1).."K" - map_op[b1.."_2"] = tohex(0x40800000 + c1).."-XK" - map_op[b1.."y_2"] = tohex(0x40a00000 + c1).."-XK" - map_op[b1.."l_2"] = tohex(0x40800001 + c1).."-XK" - -- bXlr[l] - map_op[b1.."lr_0"] = tohex(0x4c800020 + c1) - map_op[b1.."lrl_0"] = tohex(0x4c800021 + c1) - map_op[b1.."ctr_0"] = tohex(0x4c800420 + c1) - map_op[b1.."ctrl_0"] = tohex(0x4c800421 + c1) - -- bXctr[l] - map_op[b1.."lr_1"] = tohex(0x4c800020 + c1).."-X" - map_op[b1.."lrl_1"] = tohex(0x4c800021 + c1).."-X" - map_op[b1.."ctr_1"] = tohex(0x4c800420 + c1).."-X" - map_op[b1.."ctrl_1"] = tohex(0x4c800421 + c1).."-X" -end - ------------------------------------------------------------------------------- - -local function parse_gpr(expr) - local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$") - local tp = map_type[tname or expr] - if tp then - local reg = ovreg or tp.reg - if not reg then - werror("type `"..(tname or expr).."' needs a register override") - end - expr = reg - end - local r = match(expr, "^r([1-3]?[0-9])$") - if r then - r = tonumber(r) - if r <= 31 then return r, tp end - end - werror("bad register name `"..expr.."'") -end - -local function parse_fpr(expr) - local r = match(expr, "^f([1-3]?[0-9])$") - if r then - r = tonumber(r) - if r <= 31 then return r end - end - werror("bad register name `"..expr.."'") -end - -local function parse_vr(expr) - local r = match(expr, "^v([1-3]?[0-9])$") - if r then - r = tonumber(r) - if r <= 31 then return r end - end - werror("bad register name `"..expr.."'") -end - -local function parse_vs(expr) - local r = match(expr, "^vs([1-6]?[0-9])$") - if r then - r = tonumber(r) - if r <= 63 then return r end - end - werror("bad register name `"..expr.."'") -end - -local function parse_cr(expr) - local r = match(expr, "^cr([0-7])$") - if r then return tonumber(r) end - werror("bad condition register name `"..expr.."'") -end - -local function parse_cond(expr) - local r, cond = match(expr, "^4%*cr([0-7])%+(%w%w)$") - if r then - r = tonumber(r) - local c = map_cond[cond] - if c and c < 4 then return r*4+c end - end - werror("bad condition bit name `"..expr.."'") -end - -local parse_ctx = {} - -local loadenv = setfenv and function(s) - local code = loadstring(s, "") - if code then setfenv(code, parse_ctx) end - return code -end or function(s) - return load(s, "", nil, parse_ctx) -end - --- Try to parse simple arithmetic, too, since some basic ops are aliases. -local function parse_number(n) - local x = tonumber(n) - if x then return x end - local code = loadenv("return "..n) - if code then - local ok, y = pcall(code) - if ok then return y end - end - return nil -end - -local function parse_imm(imm, bits, shift, scale, signed) - local n = parse_number(imm) - if n then - local m = sar(n, scale) - if shl(m, scale) == n then - if signed then - local s = sar(m, bits-1) - if s == 0 then return shl(m, shift) - elseif s == -1 then return shl(m + shl(1, bits), shift) end - else - if sar(m, bits) == 0 then return shl(m, shift) end - end - end - werror("out of range immediate `"..imm.."'") - elseif match(imm, "^[rfv]([1-3]?[0-9])$") or - match(imm, "^vs([1-6]?[0-9])$") or - match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then - werror("expected immediate operand, got register") - else - waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) - return 0 - end -end - -local function parse_shiftmask(imm, isshift) - local n = parse_number(imm) - if n then - if shr(n, 6) == 0 then - local lsb = band(n, 31) - local msb = n - lsb - return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb) - end - werror("out of range immediate `"..imm.."'") - elseif match(imm, "^r([1-3]?[0-9])$") or - match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then - werror("expected immediate operand, got register") - else - waction("IMMSH", isshift and 1 or 0, imm) - return 0; - end -end - -local function parse_disp(disp) - local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") - if imm then - local r = parse_gpr(reg) - if r == 0 then werror("cannot use r0 in displacement") end - return shl(r, 16) + parse_imm(imm, 16, 0, 0, true) - end - local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") - if reg and tailr ~= "" then - local r, tp = parse_gpr(reg) - if r == 0 then werror("cannot use r0 in displacement") end - if tp then - waction("IMM", 32768+16*32, format(tp.ctypefmt, tailr)) - return shl(r, 16) - end - end - werror("bad displacement `"..disp.."'") -end - -local function parse_u5disp(disp, scale) - local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") - if imm then - local r = parse_gpr(reg) - if r == 0 then werror("cannot use r0 in displacement") end - return shl(r, 16) + parse_imm(imm, 5, 11, scale, false) - end - local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") - if reg and tailr ~= "" then - local r, tp = parse_gpr(reg) - if r == 0 then werror("cannot use r0 in displacement") end - if tp then - waction("IMM", scale*1024+5*32+11, format(tp.ctypefmt, tailr)) - return shl(r, 16) - end - end - werror("bad displacement `"..disp.."'") -end - -local function parse_label(label, def) - local prefix = sub(label, 1, 2) - -- =>label (pc label reference) - if prefix == "=>" then - return "PC", 0, sub(label, 3) - end - -- ->name (global label reference) - if prefix == "->" then - return "LG", map_global[sub(label, 3)] - end - if def then - -- [1-9] (local label definition) - if match(label, "^[1-9]$") then - return "LG", 10+tonumber(label) - end - else - -- [<>][1-9] (local label reference) - local dir, lnum = match(label, "^([<>])([1-9])$") - if dir then -- Fwd: 1-9, Bkwd: 11-19. - return "LG", lnum + (dir == ">" and 0 or 10) - end - -- extern label (extern label reference) - local extname = match(label, "^extern%s+(%S+)$") - if extname then - return "EXT", map_extern[extname] - end - end - werror("bad label `"..label.."'") -end - ------------------------------------------------------------------------------- - --- Handle opcodes defined with template strings. -op_template = function(params, template, nparams) - if not params then return sub(template, 9) end - local op = tonumber(sub(template, 1, 8), 16) - local n, rs = 1, 26 - - -- Limit number of section buffer positions used by a single dasm_put(). - -- A single opcode needs a maximum of 3 positions (rlwinm). - if secpos+3 > maxsecpos then wflush() end - local pos = wpos() - - -- Process each character. - for p in gmatch(sub(template, 9), ".") do - if p == "R" then - rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1 - elseif p == "F" then - rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1 - elseif p == "V" then - rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1 - elseif p == "Q" then - local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5 - local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3) - op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh) - elseif p == "q" then - local vs = parse_vs(params[n]); n = n + 1 - op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5) - elseif p == "A" then - rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1 - elseif p == "S" then - rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, true); n = n + 1 - elseif p == "I" then - op = op + parse_imm(params[n], 16, 0, 0, true); n = n + 1 - elseif p == "U" then - op = op + parse_imm(params[n], 16, 0, 0, false); n = n + 1 - elseif p == "D" then - op = op + parse_disp(params[n]); n = n + 1 - elseif p == "2" then - op = op + parse_u5disp(params[n], 1); n = n + 1 - elseif p == "4" then - op = op + parse_u5disp(params[n], 2); n = n + 1 - elseif p == "8" then - op = op + parse_u5disp(params[n], 3); n = n + 1 - elseif p == "C" then - rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1 - elseif p == "X" then - rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1 - elseif p == "1" then - rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1 - elseif p == "g" then - rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1 - elseif p == "3" then - rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1 - elseif p == "P" then - rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1 - elseif p == "p" then - op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1 - elseif p == "6" then - rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1 - elseif p == "Y" then - rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1 - elseif p == "y" then - rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1 - elseif p == "Z" then - rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1 - elseif p == "z" then - rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1 - elseif p == "W" then - op = op + parse_cr(params[n]); n = n + 1 - elseif p == "G" then - op = op + parse_imm(params[n], 8, 12, 0, false); n = n + 1 - elseif p == "H" then - op = op + parse_shiftmask(params[n], true); n = n + 1 - elseif p == "M" then - op = op + parse_shiftmask(params[n], false); n = n + 1 - elseif p == "J" or p == "K" then - local mode, n, s = parse_label(params[n], false) - if p == "K" then n = n + 2048 end - waction("REL_"..mode, n, s, 1) - n = n + 1 - elseif p == "0" then - if band(shr(op, rs), 31) == 0 then werror("cannot use r0") end - elseif p == "=" or p == "%" then - local t = band(shr(op, p == "%" and rs+5 or rs), 31) - rs = rs - 5 - op = op + shl(t, rs) - elseif p == "~" then - local mm = shl(31, rs) - local lo = band(op, mm) - local hi = band(op, shl(mm, 5)) - op = op - lo - hi + shl(lo, 5) + shr(hi, 5) - elseif p == ":" then - if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end - elseif p == "-" then - rs = rs - 5 - elseif p == "." then - -- Ignored. - else - assert(false) - end - end - wputpos(pos, op) -end - -map_op[".template__"] = op_template - ------------------------------------------------------------------------------- - --- Pseudo-opcode to mark the position where the action list is to be emitted. -map_op[".actionlist_1"] = function(params) - if not params then return "cvar" end - local name = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeactions(out, name) end) -end - --- Pseudo-opcode to mark the position where the global enum is to be emitted. -map_op[".globals_1"] = function(params) - if not params then return "prefix" end - local prefix = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeglobals(out, prefix) end) -end - --- Pseudo-opcode to mark the position where the global names are to be emitted. -map_op[".globalnames_1"] = function(params) - if not params then return "cvar" end - local name = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeglobalnames(out, name) end) -end - --- Pseudo-opcode to mark the position where the extern names are to be emitted. -map_op[".externnames_1"] = function(params) - if not params then return "cvar" end - local name = params[1] -- No syntax check. You get to keep the pieces. - wline(function(out) writeexternnames(out, name) end) -end - ------------------------------------------------------------------------------- - --- Label pseudo-opcode (converted from trailing colon form). -map_op[".label_1"] = function(params) - if not params then return "[1-9] | ->global | =>pcexpr" end - if secpos+1 > maxsecpos then wflush() end - local mode, n, s = parse_label(params[1], true) - if mode == "EXT" then werror("bad label definition") end - waction("LABEL_"..mode, n, s, 1) -end - ------------------------------------------------------------------------------- - --- Pseudo-opcodes for data storage. -map_op[".long_*"] = function(params) - if not params then return "imm..." end - for _,p in ipairs(params) do - local n = tonumber(p) - if not n then werror("bad immediate `"..p.."'") end - if n < 0 then n = n + 2^32 end - wputw(n) - if secpos+2 > maxsecpos then wflush() end - end -end - --- Alignment pseudo-opcode. -map_op[".align_1"] = function(params) - if not params then return "numpow2" end - if secpos+1 > maxsecpos then wflush() end - local align = tonumber(params[1]) - if align then - local x = align - -- Must be a power of 2 in the range (2 ... 256). - for i=1,8 do - x = x / 2 - if x == 1 then - waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. - return - end - end - end - werror("bad alignment") -end - ------------------------------------------------------------------------------- - --- Pseudo-opcode for (primitive) type definitions (map to C types). -map_op[".type_3"] = function(params, nparams) - if not params then - return nparams == 2 and "name, ctype" or "name, ctype, reg" - end - local name, ctype, reg = params[1], params[2], params[3] - if not match(name, "^[%a_][%w_]*$") then - werror("bad type name `"..name.."'") - end - local tp = map_type[name] - if tp then - werror("duplicate type `"..name.."'") - end - -- Add #type to defines. A bit unclean to put it in map_archdef. - map_archdef["#"..name] = "sizeof("..ctype..")" - -- Add new type and emit shortcut define. - local num = ctypenum + 1 - map_type[name] = { - ctype = ctype, - ctypefmt = format("Dt%X(%%s)", num), - reg = reg, - } - wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) - ctypenum = num -end -map_op[".type_2"] = map_op[".type_3"] - --- Dump type definitions. -local function dumptypes(out, lvl) - local t = {} - for name in pairs(map_type) do t[#t+1] = name end - sort(t) - out:write("Type definitions:\n") - for _,name in ipairs(t) do - local tp = map_type[name] - local reg = tp.reg or "" - out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) - end - out:write("\n") -end - ------------------------------------------------------------------------------- - --- Set the current section. -function _M.section(num) - waction("SECTION", num) - wflush(true) -- SECTION is a terminal action. -end - ------------------------------------------------------------------------------- - --- Dump architecture description. -function _M.dumparch(out) - out:write(format("DynASM %s version %s, released %s\n\n", - _info.arch, _info.version, _info.release)) - dumpactions(out) -end - --- Dump all user defined elements. -function _M.dumpdef(out, lvl) - dumptypes(out, lvl) - dumpglobals(out, lvl) - dumpexterns(out, lvl) -end - ------------------------------------------------------------------------------- - --- Pass callbacks from/to the DynASM core. -function _M.passcb(wl, we, wf, ww) - wline, werror, wfatal, wwarn = wl, we, wf, ww - return wflush -end - --- Setup the arch-specific module. -function _M.setup(arch, opt) - g_arch, g_opt = arch, opt -end - --- Merge the core maps and the arch-specific maps. -function _M.mergemaps(map_coreop, map_def) - setmetatable(map_op, { __index = map_coreop }) - setmetatable(map_def, { __index = map_archdef }) - return map_op, map_def -end - -return _M - ------------------------------------------------------------------------------- - diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h index bc636357a6..f598da9afd 100644 --- a/dynasm/dasm_x86.h +++ b/dynasm/dasm_x86.h @@ -421,7 +421,8 @@ int dasm_encode(Dst_DECL, void *buffer) } case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; b++; n = (int)(ptrdiff_t)D->globals[-n]; - case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ + case DASM_REL_A: rel_a: + n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ case DASM_REL_PC: rel_pc: { int shrink = *b++; int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } diff --git a/etc/luajit.1 b/etc/luajit.1 index 0d263db79f..09e57e78cf 100644 --- a/etc/luajit.1 +++ b/etc/luajit.1 @@ -68,10 +68,6 @@ luajit \-e "local x=0; for i=1,1e9 do x=x+i end; print(x)" Calculates the sum of the numbers from 1 to 1000000000. .br And finishes in a reasonable amount of time, too. -.TP -luajit \-jv \-e "for i=1,10 do for j=1,10 do for k=1,100 do end end end" - -Runs some nested loops and shows the resulting traces. .SH COPYRIGHT .PP \fBLuaJIT\fR is Copyright \(co 2005-2017 Mike Pall. diff --git a/etc/luajit.pc b/etc/raptorjit.pc similarity index 63% rename from etc/luajit.pc rename to etc/raptorjit.pc index 0fdd1efd9b..126c12f177 100644 --- a/etc/luajit.pc +++ b/etc/raptorjit.pc @@ -1,21 +1,21 @@ -# Package information for LuaJIT to be used by pkg-config. -majver=2 -minver=1 +# Package information for RaptorJIT be used by pkg-config. +majver=1 +minver=0 relver=0 -version=${majver}.${minver}.${relver}-beta2 +version=${majver}.${minver}.${relver} abiver=5.1 prefix=/usr/local multilib=lib exec_prefix=${prefix} libdir=${exec_prefix}/${multilib} -libname=luajit-${abiver} -includedir=${prefix}/include/luajit-${majver}.${minver} +libname=raptorjit-${abiver} +includedir=${prefix}/include/raptorjit-${majver}.${minver} INSTALL_LMOD=${prefix}/share/lua/${abiver} INSTALL_CMOD=${prefix}/${multilib}/lua/${abiver} -Name: LuaJIT +Name: RaptorJIT Description: Just-in-time compiler for Lua URL: http://luajit.org Version: ${version} diff --git a/pkgs.nix b/pkgs.nix new file mode 100644 index 0000000000..a48840921d --- /dev/null +++ b/pkgs.nix @@ -0,0 +1 @@ +import (fetchTarball https://github.com/NixOS/nixpkgs-channels/archive/6a0155d2b7cb10aef1c63b654a2b172d78fd89b4.tar.gz) diff --git a/raptorjit.nix b/raptorjit.nix new file mode 100644 index 0000000000..6a057ff069 --- /dev/null +++ b/raptorjit.nix @@ -0,0 +1,32 @@ +# raptorjit.nix - compile RaptorJIT with reference toolchain + +{ pkgs, source, version }: + +with pkgs; +with stdenv; + +mkDerivation rec { + name = "raptorjit-${version}"; + inherit version; + src = source; + buildInputs = [ luajit ]; # LuaJIT to bootstrap DynASM + dontStrip = true; + patchPhase = '' + substituteInPlace Makefile --replace "/usr/local" "$out" + ''; + configurePhase = false; + installPhase = '' + make install PREFIX="$out" + ''; + # Simple inventory test. + installCheckPhase = '' + for file in bin/raptorjit lib/libraptorjit-5.1.so \ + lib/pkgconfig/raptorjit.pc; do + echo "Checking for $file" + test -f $out/$file + done + ''; + doInstallCheck = true; + enableParallelBuilding = true; # Do 'make -j' +} + diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000000..f654fd33d3 --- /dev/null +++ b/shell.nix @@ -0,0 +1,2 @@ +(import ./default.nix {}).raptorjit + diff --git a/src/.gitignore b/src/.gitignore index 1a30573c9f..df66435708 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -1,7 +1 @@ luajit -lj_bcdef.h -lj_ffdef.h -lj_libdef.h -lj_recdef.h -lj_folddef.h -lj_vm.[sS] diff --git a/src/Makefile b/src/Makefile index 7cb4c14ad9..fac8fd53fa 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,5 +1,5 @@ ############################################################################## -# LuaJIT Makefile. Requires GNU Make. +# RaptorJIT Makefile. Requires GNU Make. # # Please read doc/install.html before changing any variables! # @@ -10,8 +10,8 @@ # Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h ############################################################################## -MAJVER= 2 -MINVER= 1 +MAJVER= 1 +MINVER= 0 RELVER= 0 ABIVER= 5.1 NODOTABIVER= 51 @@ -24,14 +24,6 @@ NODOTABIVER= 51 # removing the '#' in front of them. Make sure you force a full recompile # with "make clean", followed by "make" if you change any options. # -DEFAULT_CC = gcc -# -# LuaJIT builds as a native 32 or 64 bit binary by default. -CC= $(DEFAULT_CC) -# -# Use this if you want to force a 32 bit build on a 64 bit multilib OS. -#CC= $(DEFAULT_CC) -m32 -# # Since the assembler part does NOT maintain a frame pointer, it's pointless # to slow down the C part by not omitting it. Debugging, tracebacks and # unwinding are not affected -- the assembler part has frame unwind @@ -42,18 +34,6 @@ CCOPT= -O2 -fomit-frame-pointer # Note: it's no longer recommended to use -O3 with GCC 4.x. # The I-Cache bloat usually outweighs the benefits from aggressive inlining. # -# Target-specific compiler options: -# -# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute -# the binaries to a different machine you could also use: -march=native -# -CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse -CCOPT_x64= -CCOPT_arm= -CCOPT_arm64= -CCOPT_ppc= -CCOPT_mips= -# CCDEBUG= # Uncomment the next line to generate debug information: #CCDEBUG= -g @@ -64,24 +44,6 @@ CCWARN= -Wall # ############################################################################## -############################################################################## -################################ BUILD MODE ################################ -############################################################################## -# The default build mode is mixed mode on POSIX. On Windows this is the same -# as dynamic mode. -# -# Mixed mode creates a static + dynamic library and a statically linked luajit. -BUILDMODE= mixed -# -# Static mode creates a static library and a statically linked luajit. -#BUILDMODE= static -# -# Dynamic mode creates a dynamic library and a dynamically linked luajit. -# Note: this executable will only run when the library is installed! -#BUILDMODE= dynamic -# -############################################################################## - ############################################################################## ################################# FEATURES ################################# ############################################################################## @@ -110,9 +72,6 @@ XCFLAGS= #XCFLAGS+= -DLUAJIT_NUMMODE=1 #XCFLAGS+= -DLUAJIT_NUMMODE=2 # -# Enable GC64 mode for x64. -#XCFLAGS+= -DLUAJIT_ENABLE_GC64 -# ############################################################################## ############################################################################## @@ -122,14 +81,6 @@ XCFLAGS= # with "make clean", followed by "make". # Note that most of these are NOT suitable for benchmarking or release mode! # -# Use the system provided memory allocator (realloc) instead of the -# bundled memory allocator. This is slower, but sometimes helpful for -# debugging. This option cannot be enabled on x64 without GC64, since -# realloc usually doesn't return addresses in the right address range. -# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and -# the only way to get useful results from it for all other architectures. -#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC -# # This define is required to run LuaJIT under Valgrind. The Valgrind # header files must be installed. You should enable debug information, too. # Use --suppressions=lj.supp to avoid some false positives. @@ -152,25 +103,6 @@ XCFLAGS= # You probably don't need to change anything below this line! ############################################################################## -############################################################################## -# Host system detection. -############################################################################## - -ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM)) - HOST_SYS= Windows - HOST_RM= del -else - HOST_SYS:= $(shell uname -s) - ifneq (,$(findstring MINGW,$(HOST_SYS))) - HOST_SYS= Windows - HOST_MSYS= mingw - endif - ifneq (,$(findstring CYGWIN,$(HOST_SYS))) - HOST_SYS= Windows - HOST_MSYS= cygwin - endif -endif - ############################################################################## # Flags and options for host and target. ############################################################################## @@ -192,9 +124,7 @@ LDOPTIONS= $(CCDEBUG) $(LDFLAGS) HOST_CC= $(CC) HOST_RM= rm -f -# If left blank, minilua is built and used. You can supply an installed -# copy of (plain) Lua 5.1 or 5.2, plus Lua BitOp. E.g. with: HOST_LUA=lua -HOST_LUA= +HOST_LUA=luajit HOST_XCFLAGS= -I. HOST_XLDFLAGS= @@ -210,11 +140,10 @@ TARGET_STCC= $(STATIC_CC) TARGET_DYNCC= $(DYNAMIC_CC) TARGET_LD= $(CROSS)$(CC) TARGET_AR= $(CROSS)ar rcus 2>/dev/null -TARGET_STRIP= $(CROSS)strip TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) -TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER) -TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib +TARGET_SONAME= libraptorjit-$(ABIVER).so.$(MAJVER) +TARGET_DYLIBNAME= libraptorjit-$(ABIVER).$(MAJVER).dylib TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME) TARGET_DLLNAME= lua$(NODOTABIVER).dll TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME) @@ -222,7 +151,7 @@ TARGET_DYNXLDOPTS= TARGET_LFSFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE TARGET_XCFLAGS= $(TARGET_LFSFLAGS) -U_FORTIFY_SOURCE -TARGET_XLDFLAGS= +TARGET_XLDFLAGS= -Wl,-E TARGET_XLIBS= -lm TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) @@ -232,52 +161,8 @@ TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SH TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM) -ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH))) - TARGET_LJARCH= x64 -else -ifneq (,$(findstring LJ_TARGET_X86 ,$(TARGET_TESTARCH))) - TARGET_LJARCH= x86 -else -ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) - TARGET_LJARCH= arm -else -ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) - TARGET_LJARCH= arm64 -else -ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) - ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) - TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE - else - TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE - endif - TARGET_LJARCH= ppc -else -ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) - ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) - TARGET_ARCH= -D__MIPSEL__=1 - endif - ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH))) - TARGET_LJARCH= mips64 - else - TARGET_LJARCH= mips - endif -else - $(error Unsupported target architecture) -endif -endif -endif -endif -endif -endif +TARGET_LJARCH= x64 -ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) - TARGET_SYS= PS3 - TARGET_ARCH+= -D__CELLOS_LV2__ - TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC - TARGET_XLIBS+= -lpthread -endif - -TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH)) TARGET_ARCH+= $(patsubst %,-DLUAJIT_TARGET=LUAJIT_ARCH_%,$(TARGET_LJARCH)) ifneq (,$(PREFIX)) @@ -299,164 +184,17 @@ endif # Target system detection. ############################################################################## -TARGET_SYS?= $(HOST_SYS) -ifeq (Windows,$(TARGET_SYS)) - TARGET_STRIP+= --strip-unneeded - TARGET_XSHLDFLAGS= -shared - TARGET_DYNXLDOPTS= -else -ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1)) - TARGET_XCFLAGS+= -fno-stack-protector -endif -ifeq (Darwin,$(TARGET_SYS)) - ifeq (,$(MACOSX_DEPLOYMENT_TARGET)) - export MACOSX_DEPLOYMENT_TARGET=10.4 - endif - TARGET_STRIP+= -x - TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC - TARGET_DYNXLDOPTS= - TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) - ifeq (x64,$(TARGET_LJARCH)) - TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000 - TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000 - endif -else -ifeq (iOS,$(TARGET_SYS)) - TARGET_STRIP+= -x - TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC - TARGET_DYNXLDOPTS= - TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) - ifeq (arm64,$(TARGET_LJARCH)) - TARGET_XCFLAGS+= -fno-omit-frame-pointer - endif -else - ifneq (SunOS,$(TARGET_SYS)) - ifneq (PS3,$(TARGET_SYS)) - TARGET_XLDFLAGS+= -Wl,-E - endif - endif - ifeq (Linux,$(TARGET_SYS)) - TARGET_XLIBS+= -ldl - endif - ifeq (GNU/kFreeBSD,$(TARGET_SYS)) - TARGET_XLIBS+= -ldl - endif -endif -endif -endif - -ifneq ($(HOST_SYS),$(TARGET_SYS)) - ifeq (Windows,$(TARGET_SYS)) - HOST_XCFLAGS+= -malign-double -DLUAJIT_OS=LUAJIT_OS_WINDOWS - else - ifeq (Linux,$(TARGET_SYS)) - HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_LINUX - else - ifeq (Darwin,$(TARGET_SYS)) - HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX - else - ifeq (iOS,$(TARGET_SYS)) - HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX - else - HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER - endif - endif - endif - endif -endif - -ifneq (,$(CCDEBUG)) - TARGET_STRIP= @: -endif +TARGET_XCFLAGS+= -fno-stack-protector +TARGET_XLIBS+= -ldl +Host_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_LINUX ############################################################################## # Files and pathnames. ############################################################################## -MINILUA_O= host/minilua.o -MINILUA_LIBS= -lm -MINILUA_T= host/minilua -MINILUA_X= $(MINILUA_T) - -ifeq (,$(HOST_LUA)) - HOST_LUA= $(MINILUA_X) - DASM_DEP= $(MINILUA_T) -endif - DASM_DIR= ../dynasm DASM= $(HOST_LUA) $(DASM_DIR)/dynasm.lua -DASM_XFLAGS= -DASM_AFLAGS= -DASM_ARCH= $(TARGET_LJARCH) - -ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D ENDIAN_LE -else - DASM_AFLAGS+= -D ENDIAN_BE -endif -ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D P64 -endif -ifneq (,$(findstring LJ_HASJIT 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D JIT -endif -ifneq (,$(findstring LJ_HASFFI 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D FFI -endif -ifneq (,$(findstring LJ_DUALNUM 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D DUALNUM -endif -ifneq (,$(findstring LJ_ARCH_HASFPU 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D FPU - TARGET_ARCH+= -DLJ_ARCH_HASFPU=1 -else - TARGET_ARCH+= -DLJ_ARCH_HASFPU=0 -endif -ifeq (,$(findstring LJ_ABI_SOFTFP 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D HFABI - TARGET_ARCH+= -DLJ_ABI_SOFTFP=0 -else - TARGET_ARCH+= -DLJ_ABI_SOFTFP=1 -endif -ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D NO_UNWIND - TARGET_ARCH+= -DLUAJIT_NO_UNWIND -endif -DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) -ifeq (Windows,$(TARGET_SYS)) - DASM_AFLAGS+= -D WIN -endif -ifeq (x64,$(TARGET_LJARCH)) - ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH))) - DASM_ARCH= x86 - endif -else -ifeq (arm,$(TARGET_LJARCH)) - ifeq (iOS,$(TARGET_SYS)) - DASM_AFLAGS+= -D IOS - endif -else -ifeq (ppc,$(TARGET_LJARCH)) - ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D SQRT - endif - ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D ROUND - endif - ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D GPR64 - endif - ifeq (PS3,$(TARGET_SYS)) - DASM_AFLAGS+= -D PPE -D TOC - endif - ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH))) - DASM_ARCH= ppc64 - endif -endif -endif -endif - -DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) +DASM_ARCH= x64 DASM_DASC= vm_$(DASM_ARCH).dasc BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \ @@ -464,8 +202,8 @@ BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \ BUILDVM_T= host/buildvm BUILDVM_X= $(BUILDVM_T) -HOST_O= $(MINILUA_O) $(BUILDVM_O) -HOST_T= $(MINILUA_T) $(BUILDVM_T) +HOST_O= $(BUILDVM_O) +HOST_T= $(BUILDVM_T) LJVM_S= lj_vm.S LJVM_O= lj_vm.o @@ -478,109 +216,42 @@ LJLIB_C= $(LJLIB_O:.o=.c) LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ - lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ - lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \ + lj_state.o lj_dispatch.o lj_vmmath.o lj_strscan.o \ + lj_strfmt.o lj_strfmt_num.o lj_api.o lj_vmprofile.o \ lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ - lj_asm.o lj_trace.o lj_gdbjit.o \ + lj_asm.o lj_trace.o lj_gdbjit.o lj_auditlog.o \ lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ lj_carith.o lj_clib.o lj_cparse.o \ - lj_lib.o lj_alloc.o lib_aux.o \ + lj_lib.o lib_aux.o lj_dwarf_dwo.o \ $(LJLIB_O) lib_init.o +DWARF_DWO= lj_dwarf.dwo + LJVMCORE_O= $(LJVM_O) $(LJCORE_O) LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o) -LIB_VMDEF= jit/vmdef.lua -LIB_VMDEFP= $(LIB_VMDEF) - -LUAJIT_O= luajit.o -LUAJIT_A= libluajit.a -LUAJIT_SO= libluajit.so -LUAJIT_T= luajit +LUAJIT_O= raptorjit.o +LUAJIT_A= raptorjit.a +LUAJIT_SO= libraptorjit.so +LUAJIT_T= raptorjit ALL_T= $(LUAJIT_T) $(LUAJIT_A) $(LUAJIT_SO) $(HOST_T) ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \ host/buildvm_arch.h -ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) $(LIB_VMDEFP) +ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest *.pdb *.ilk -ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM) +ALL_RM= $(ALL_T) *.o host/*.o $(WIN_RM) ############################################################################## # Build mode handling. ############################################################################## -# Mixed mode defaults. TARGET_O= $(LUAJIT_A) TARGET_T= $(LUAJIT_T) $(LUAJIT_SO) -TARGET_DEP= $(LIB_VMDEF) $(LUAJIT_SO) - -ifeq (Windows,$(TARGET_SYS)) - TARGET_DYNCC= $(STATIC_CC) - LJVM_MODE= peobj - LJVM_BOUT= $(LJVM_O) - LUAJIT_T= luajit.exe - ifeq (cygwin,$(HOST_MSYS)) - LUAJIT_SO= cyg$(TARGET_DLLNAME) - else - LUAJIT_SO= $(TARGET_DLLNAME) - endif - # Mixed mode is not supported on Windows. And static mode doesn't work well. - # C modules cannot be loaded, because they bind to lua51.dll. - ifneq (static,$(BUILDMODE)) - BUILDMODE= dynamic - TARGET_XCFLAGS+= -DLUA_BUILD_AS_DLL - endif -endif -ifeq (Darwin,$(TARGET_SYS)) - LJVM_MODE= machasm -endif -ifeq (iOS,$(TARGET_SYS)) - LJVM_MODE= machasm -endif -ifeq (SunOS,$(TARGET_SYS)) - BUILDMODE= static -endif -ifeq (PS3,$(TARGET_SYS)) - BUILDMODE= static -endif - -ifeq (Windows,$(HOST_SYS)) - MINILUA_T= host/minilua.exe - BUILDVM_T= host/buildvm.exe - ifeq (,$(HOST_MSYS)) - MINILUA_X= host\minilua - BUILDVM_X= host\buildvm - ALL_RM:= $(subst /,\,$(ALL_RM)) - endif -endif - -ifeq (static,$(BUILDMODE)) - TARGET_DYNCC= @: - TARGET_T= $(LUAJIT_T) - TARGET_DEP= $(LIB_VMDEF) -else -ifeq (dynamic,$(BUILDMODE)) - ifneq (Windows,$(TARGET_SYS)) - TARGET_CC= $(DYNAMIC_CC) - endif - TARGET_DYNCC= @: - LJVMCORE_DYNO= $(LJVMCORE_O) - TARGET_O= $(LUAJIT_SO) - TARGET_XLDFLAGS+= $(TARGET_DYNXLDOPTS) -else -ifeq (Darwin,$(TARGET_SYS)) - TARGET_DYNCC= @: - LJVMCORE_DYNO= $(LJVMCORE_O) -endif -ifeq (iOS,$(TARGET_SYS)) - TARGET_DYNCC= @: - LJVMCORE_DYNO= $(LJVMCORE_O) -endif -endif -endif +TARGET_DEP= $(LUAJIT_SO) Q= @ E= @echo @@ -591,14 +262,16 @@ E= @echo # Make targets. ############################################################################## -default all: $(TARGET_T) - -amalg: - @grep "^[+|]" ljamalg.c - $(MAKE) all "LJCORE_O=ljamalg.o" +default all: $(TARGET_T) $(DWARF_DWO) clean: $(HOST_RM) $(ALL_RM) + $(HOST_RM) $(ALL_GEN) + +reusevm: + cd reusevm; \ + cp -r * ../ + @echo "Copied reference VM. Ready to build." libbc: ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C) @@ -618,19 +291,30 @@ depend: test -s $$file || $(HOST_RM) $$file; \ done -.PHONY: default all amalg clean libbc depend +.PHONY: default all clean libbc depend reusevm ############################################################################## # Rules for generated files. ############################################################################## -$(MINILUA_T): $(MINILUA_O) - $(E) "HOSTLINK $@" - $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) - host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua + @command -v $(DASM) 2>/dev/null || { \ + echo "Error: Missing dependency (luajit) for bootstrapping the VM."; \ + echo ""; \ + echo "Here are your options to build RaptorJIT:"; \ + echo " make reusevm # copy reference VM from reusevm/"; \ + echo " (install luajit) # satisfy the dependency"; \ + echo " nix-build # use nix to satisfy the dependencies"; \ + echo ""; \ + echo "Warning:"; \ + echo " Only reusevm when running a pristine copy of RaptorJIT from";\ + echo " a repository that keeps the reference VM up-to-date."; \ + echo " (Otherwise you might mix some stale code into your build.)"; \ + echo; \ + exit 1; \ + } $(E) "DYNASM $@" - $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) + $(Q)$(DASM) -o $@ $(DASM_DASC) host/buildvm.o: $(DASM_DIR)/dasm_*.h @@ -658,10 +342,6 @@ lj_recdef.h: $(BUILDVM_T) $(LJLIB_C) $(E) "BUILDVM $@" $(Q)$(BUILDVM_X) -m recdef -o $@ $(LJLIB_C) -$(LIB_VMDEF): $(BUILDVM_T) $(LJLIB_C) - $(E) "BUILDVM $@" - $(Q)$(BUILDVM_X) -m vmdef -o $(LIB_VMDEFP) $(LJLIB_C) - lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c $(E) "BUILDVM $@" $(Q)$(BUILDVM_X) -m folddef -o $@ lj_opt_fold.c @@ -682,12 +362,24 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c $(LUAJIT_O): $(E) "CC $@" - $(Q)$(TARGET_STCC) $(TARGET_ACFLAGS) -c -o $@ $< + $(Q)$(TARGET_STCC) $(TARGET_ACFLAGS) -c -o $@ luajit.c $(HOST_O): %.o: %.c $(E) "HOSTCC $@" $(Q)$(HOST_CC) $(HOST_ACFLAGS) -c -o $@ $< +$(DWARF_DWO): $(ALL_HDRGEN) + +$(DWARF_DWO): %.dwo: %.c + $(E) "CC(debug) $@" + $(Q)$(TARGET_CC) -g3 -fno-eliminate-unused-debug-types -gsplit-dwarf -c $< + +# Embed DWARF debug information as binary data available to raptorjit. +lj_dwarf_dwo.o lj_dwarf_dwo_dyn.o: $(DWARF_DWO) + $(E) "EMBED $@" + $(Q)$(LD) -r -b binary -o $@ $< + $(Q)$(LD) -b binary -o $(@:.o=_dyn.o) $< + include Makefile.dep ############################################################################## @@ -702,12 +394,10 @@ $(LUAJIT_A): $(LJVMCORE_O) $(LUAJIT_SO): $(LJVMCORE_O) $(E) "DYNLINK $@" $(Q)$(TARGET_LD) $(TARGET_ASHLDFLAGS) -o $@ $(LJVMCORE_DYNO) $(TARGET_ALIBS) - $(Q)$(TARGET_STRIP) $@ $(LUAJIT_T): $(TARGET_O) $(LUAJIT_O) $(TARGET_DEP) $(E) "LINK $@" $(Q)$(TARGET_LD) $(TARGET_ALDFLAGS) -o $@ $(LUAJIT_O) $(TARGET_O) $(TARGET_ALIBS) - $(Q)$(TARGET_STRIP) $@ - $(E) "OK Successfully built LuaJIT" + $(E) "OK Successfully built RaptorJIT" ############################################################################## diff --git a/src/Makefile.dep b/src/Makefile.dep index 2b1cb5ef29..7db253d6bc 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -1,246 +1,215 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ - lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \ - lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h + lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \ + lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ - lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ - lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \ - lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ - lj_strfmt.h lj_lib.h lj_libdef.h + lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ + lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \ + lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h \ + lj_strscan.h lj_strfmt.h lj_lib.h lj_libdef.h lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ - lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \ - lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \ - lj_ffdef.h lj_lib.h lj_libdef.h + lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \ + lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \ + lj_ffdef.h lj_lib.h lj_libdef.h lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ - lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ - lj_libdef.h + lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ + lj_libdef.h lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ - lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \ - lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \ - lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \ - lj_libdef.h + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \ + lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \ + lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \ + lj_libdef.h lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ - lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \ - lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \ + lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ - lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ - lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ - lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \ - lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ + lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ + lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \ + lj_vm.h lj_lib.h luajit.h lj_libdef.h lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ - lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h + lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ - lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \ - lj_libdef.h + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \ + lj_libdef.h lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ - lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h + lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ - lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ - lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \ - lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h + lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ + lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \ + lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ - lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ - lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h -lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h + lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ + lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ - lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ - lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h + lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ + lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ + lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ - lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ - lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \ - lj_asm_*.h + lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ + lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \ + lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \ + lj_emit_*.h lj_asm_*.h lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ - lj_bcdef.h + lj_bcdef.h lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \ - lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \ - lj_strfmt.h + lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \ + lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \ + lj_strfmt.h lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \ - lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h + lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \ + lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h + lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \ - lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h -lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \ - lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ - lj_traceerr.h + lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \ + lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ - lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \ - lj_bc.h lj_ctype.h lj_cconv.h lj_ccall.h lj_ccallback.h lj_target.h \ - lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \ - lj_traceerr.h lj_vm.h + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \ + lj_bc.h lj_ctype.h lj_cconv.h lj_ccall.h lj_ccallback.h lj_target.h \ + lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \ + lj_traceerr.h lj_vm.h +lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \ + lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ + lj_traceerr.h lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ - lj_ccallback.h + lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ + lj_ccallback.h lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h + lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \ - lj_cdata.h lj_clib.h lj_strfmt.h + lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h \ + lj_cconv.h lj_cdata.h lj_clib.h lj_strfmt.h lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \ - lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h + lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \ + lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \ - lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \ - lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ - lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ - lj_crecord.h lj_strfmt.h + lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \ + lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \ + lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ + lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ + lj_crecord.h lj_strfmt.h lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \ - lj_ccallback.h lj_buf.h + lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \ + lj_ccallback.h lj_buf.h lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \ - lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h + lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \ + lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \ - lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \ - lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \ - lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h + lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \ + lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \ + lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \ + lj_dispatch.h lj_traceerr.h lj_vm.h luajit.h lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ - lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ - lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ - lj_traceerr.h lj_vm.h lj_strfmt.h + lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h \ + lj_bc.h lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ + lj_traceerr.h lj_vm.h lj_strfmt.h lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ - lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ - lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ - lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h + lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ + lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ + lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ + lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ - lj_traceerr.h lj_vm.h + lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ + lj_traceerr.h lj_vm.h lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ - lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \ - lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h + lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ + lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \ + lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \ - lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h + lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \ + lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ - lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \ - lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h + lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ + lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \ + lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \ - lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \ - lj_strfmt.h + lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \ + lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \ + lj_strfmt.h lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ - lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \ - lj_bcdump.h lj_lib.h + lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ + lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h \ + lj_lex.h lj_bcdump.h lj_lib.h lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ - lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \ - lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \ + lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ - lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h + lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ + lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \ - lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h + lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \ + lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_ir.h lj_jit.h lj_iropt.h + lj_ir.h lj_jit.h lj_iropt.h lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \ - lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \ - lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h + lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \ + lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \ + lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \ - lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \ - lj_vm.h + lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \ + lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \ + lj_vm.h lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h + lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ - lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ - lj_traceerr.h lj_vm.h lj_strscan.h + lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ + lj_traceerr.h lj_vm.h lj_strscan.h lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h + lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ - lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \ - lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h + lj_arch.h lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \ - lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \ - lj_vm.h lj_vmevent.h -lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \ - lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h + lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \ + lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h \ + lj_parse.h lj_vm.h lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ - lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \ - lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \ - lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h + lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ + lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \ + lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \ + lj_ffrecord.h lj_snap.h lj_vm.h lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ - lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ - lj_target_*.h lj_ctype.h lj_cdata.h + lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ + lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ + lj_target_*.h lj_ctype.h lj_cdata.h lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \ - lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \ - lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h + lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \ + lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \ + lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h luajit.h lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_str.h lj_char.h + lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h + lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \ - lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h + lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_char.h lj_strscan.h + lj_char.h lj_strscan.h lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_tab.h + lj_err.h lj_errmsg.h lj_tab.h lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \ - lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ - lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \ - lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h + lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \ + lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ + lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \ + lj_vm.h lj_target.h lj_target_*.h lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_udata.h -lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \ - lj_vm.h lj_vmevent.h + lj_gc.h lj_udata.h lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_ir.h lj_vm.h -ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ - lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \ - lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \ - lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \ - lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \ - lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \ - lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \ - lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \ - lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \ - lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \ - lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \ - lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \ - lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \ - lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \ - lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \ - lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \ - lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \ - lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \ - lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \ - lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \ - lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \ - lib_ffi.c lib_init.c + lj_ir.h lj_vm.h luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h -host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ - lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ - lj_ircall.h lj_ir.h lj_jit.h lj_frame.h lj_bc.h lj_dispatch.h lj_ctype.h \ - lj_gc.h lj_ccall.h lj_ctype.h luajit.h \ - host/buildvm_arch.h lj_traceerr.h host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \ - lj_arch.h lj_bc.h lj_def.h lj_arch.h + lj_arch.h lj_bc.h +host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ + lj_arch.h lj_obj.h lj_gc.h lj_bc.h lj_ir.h lj_ircall.h lj_jit.h \ + lj_frame.h lj_dispatch.h lj_ctype.h lj_ccall.h luajit.h \ + host/buildvm_arch.h \ + lj_traceerr.h host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ - luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h + luaconf.h lj_arch.h lj_obj.h lj_ir.h host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ - lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \ - host/buildvm_libbc.h + lj_arch.h lj_obj.h lj_bc.h lj_lib.h host/buildvm_libbc.h host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ - luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h + luaconf.h lj_arch.h lj_bc.h host/minilua.o: host/minilua.c diff --git a/src/host/.gitignore b/src/host/.gitignore index 762ac2a0c6..2b6d6618e0 100644 --- a/src/host/.gitignore +++ b/src/host/.gitignore @@ -1,3 +1,2 @@ minilua buildvm -buildvm_arch.h diff --git a/src/host/buildvm.c b/src/host/buildvm.c index de23fabdce..3b45858e4d 100644 --- a/src/host/buildvm.c +++ b/src/host/buildvm.c @@ -22,10 +22,8 @@ #include "lj_ircall.h" #include "lj_frame.h" #include "lj_dispatch.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_ccall.h" -#endif #include "luajit.h" #if defined(_WIN32) @@ -98,20 +96,11 @@ static const char *sym_decorate(BuildCtx *ctx, { char name[256]; char *p; -#if LJ_64 const char *symprefix = ctx->mode == BUILD_machasm ? "_" : ""; -#elif LJ_TARGET_XBOX360 - const char *symprefix = ""; -#else - const char *symprefix = ctx->mode != BUILD_elfasm ? "_" : ""; -#endif sprintf(name, "%s%s%s", symprefix, prefix, suffix); p = strchr(name, '@'); if (p) { #if LJ_TARGET_X86ORX64 - if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) - name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */ - else *p = '\0'; #elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE /* Keep @plt etc. */ @@ -214,7 +203,7 @@ static int build_code(BuildCtx *ctx) if ((LJ_HASJIT || !(i == BC_JFORI || i == BC_JFORL || i == BC_JITERL || i == BC_JLOOP || i == BC_IFORL || i == BC_IITERL || i == BC_ILOOP)) && - (LJ_HASFFI || i != BC_KCDATA)) + i != BC_KCDATA) sym_insert(ctx, ofs, LABEL_PREFIX_BC, bc_names[i]); } @@ -372,7 +361,6 @@ static void usage(void) { int i; fprintf(stderr, LUAJIT_VERSION " VM builder.\n"); - fprintf(stderr, LUAJIT_COPYRIGHT ", " LUAJIT_URL "\n"); fprintf(stderr, "Target architecture: " LJ_ARCH_NAME "\n\n"); fprintf(stderr, "Usage: buildvm -m mode [-o outfile] [infiles...]\n\n"); fprintf(stderr, "Available modes:\n"); @@ -435,7 +423,7 @@ int main(int argc, char **argv) BuildCtx *ctx = &ctx_; int status, binmode; - if (sizeof(void *) != 4*LJ_32+8*LJ_64) { + if (sizeof(void *) != 8) { fprintf(stderr,"Error: pointer size mismatch in cross-build.\n"); fprintf(stderr,"Try: make HOST_CC=\"gcc -m32\" CROSS=...\n\n"); return 1; diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index addf281f35..37227e5607 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c @@ -74,14 +74,7 @@ static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n, if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) { /* Various fixups for external symbols outside of our binary. */ if (ctx->mode == BUILD_elfasm) { - if (LJ_32) - fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym); fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym); - if (LJ_32) - fprintf(ctx->fp, "#endif\n"); - return; - } else if (LJ_32 && ctx->mode == BUILD_machasm) { - fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym); return; } } @@ -282,7 +275,7 @@ void emit_asm(BuildCtx *ctx) for (i = rel = 0; i < ctx->nsym; i++) { int32_t ofs = ctx->sym[i].ofs; int32_t next = ctx->sym[i+1].ofs; -#if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND && LJ_HASFFI +#if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND if (!strcmp(ctx->sym[i].name, "lj_vm_ffi_call")) fprintf(ctx->fp, ".globl lj_err_unwind_arm\n" @@ -320,10 +313,6 @@ void emit_asm(BuildCtx *ctx) #if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND fprintf(ctx->fp, -#if !LJ_HASFFI - ".globl lj_err_unwind_arm\n" - ".personality lj_err_unwind_arm\n" -#endif ".fnend\n"); #endif diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c index 2956fdb6cd..dbe7306eaa 100644 --- a/src/host/buildvm_lib.c +++ b/src/host/buildvm_lib.c @@ -176,7 +176,7 @@ static void libdef_fixupbc(uint8_t *p) uint8_t ra = p[libbc_endian ? 2 : 1]; uint8_t rc = p[libbc_endian ? 1 : 2]; uint8_t rb = p[libbc_endian ? 0 : 3]; - if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) { + if (op == BC_ISTYPE && rc == ~LJ_TNUMX+1) { op = BC_ISNUM; rc++; } p[LJ_ENDIAN_SELECT(0, 3)] = op; @@ -383,8 +383,6 @@ void emit_lib(BuildCtx *ctx) ok = LJ_52; else if (!strcmp(buf, "#if LJ_HASJIT\n")) ok = LJ_HASJIT; - else if (!strcmp(buf, "#if LJ_HASFFI\n")) - ok = LJ_HASFFI; if (!ok) { int lvl = 1; while (fgets(buf, sizeof(buf), fp) != NULL) { diff --git a/src/ifdef-defile b/src/ifdef-defile new file mode 100644 index 0000000000..768de36b8e --- /dev/null +++ b/src/ifdef-defile @@ -0,0 +1,123 @@ +// Config file for the 'unifdef' program to strip out unwanted source code. + +#define LJ_HASJIT 1 +#define LJ_NO_UNWIND 1 +#define LUAJIT_DISABLE_PROFILE 1 +#define LUAJIT_DISABLE_VMEVENT 1 +#define LUAJIT_ENABLE_GC64 1 +#define LUAJIT_ENABLE_JIT 1 +#undef LJ_HASPROFILE +#undef LJ_UNWIND_EXT +#undef LUAJIT_DISABLE_FFI +#undef LUAJIT_DISABLE_JIT +#undef LUAJIT_NOFFI +#undef LUAJIT_USE_GDBJIT + +#define LJ_ARCH_BITS 64 +#define LJ_ARCH_ENDIAN == LUAJIT_LE +#define LJ_DUALNUM 0 +#define LJ_FR2 1 +#define LJ_NUMMODE LJ_NUMMODE_SINGLE_DUAL +#define LJ_NUMMODE_DUAL 2 +#define LJ_NUMMODE_DUAL_SINGLE 3 +#define LJ_NUMMODE_SINGLE 0 +#define LJ_NUMMODE_SINGLE_DUAL 1 +#define LJ_TARGET_DLOPEN 1 +#define LJ_TARGET_GC64 1 +#define LJ_TARGET_LINUX 1 +#define LJ_TARGET_POSIX 1 +#define LJ_TARGET_X64 1 +#define LJ_TARGET_X86ORX64 1 +#define LUAJIT_ARCH_ARM 3 +#define LUAJIT_ARCH_ARM64 4 +#define LUAJIT_ARCH_MIPS 6 +#define LUAJIT_ARCH_MIPS32 6 +#define LUAJIT_ARCH_MIPS64 7 +#define LUAJIT_ARCH_PPC 5 +#define LUAJIT_ARCH_X64 2 +#define LUAJIT_ARCH_X86 1 +#define LUAJIT_ARCH_arm 3 +#define LUAJIT_ARCH_arm64 4 +#define LUAJIT_ARCH_mips 6 +#define LUAJIT_ARCH_mips32 6 +#define LUAJIT_ARCH_mips64 7 +#define LUAJIT_ARCH_ppc 5 +#define LUAJIT_ARCH_x64 2 +#define LUAJIT_ARCH_x86 1 +#define LUAJIT_LE 1 +#define LUAJIT_NUMMODE=1 +#define LUAJIT_OS LUAJIT_OS_LINUX +#define LUAJIT_OS_LINUX 2 +#define LUAJIT_OS_WINDOWS 3 +#define LUAJIT_TARGET LUAJIT_ARCH_X64 +#define LUAJIT_TARGET_POSIX 1 +#define __linux__ +#undef JL_TARGET_PPC +#undef LJ_ABI_EABI +#undef LJ_ABI_SOFTFP +#undef LJ_ABI_WIN +#undef LJ_ALLOC_NTAVM +#undef LJ_ALLOC_VIRTUALALLOC +#undef LJ_ARCH_NOFFI +#undef LJ_ARCH_PPC +#undef LJ_OS_NOJIT +#undef LJ_SOFTFP +#undef LJ_TARGET_ARM +#undef LJ_TARGET_ARM64 +#undef LJ_TARGET_CONSOLE +#undef LJ_TARGET_CYGWIN +#undef LJ_TARGET_IOS +#undef LJ_TARGET_MIPS +#undef LJ_TARGET_MIPS64 +#undef LJ_TARGET_PPC +#undef LJ_TARGET_PS3 +#undef LJ_TARGET_PS4 +#undef LJ_TARGET_PSVITA +#undef LJ_TARGET_WINDOWS +#undef LJ_TARGET_WINDOWS +#undef LJ_TARGET_X86 +#undef LJ_TARGET_XBOX360 +#undef LJ_TARGET_XBOX360 +#undef LUAJIT_ARCH_MIPS32 +#undef LUAJIT_ARCH_X86 +#undef LUAJIT_BE +#undef MINGW_SDK_INIT +#undef _ARCH_PPCSQ +#undef _ARCH_PWR4 +#undef _ARCH_PWR5 +#undef _ARCH_PWR5X +#undef _ARCH_PWR6 +#undef _ARCH_PWR7 +#undef _DURANGO +#undef _MIPSEL +#undef _MSC_VER +#undef _XBOX_VER +#undef __ANDROID__ +#undef __ARM_ARCH_6T2__ +#undef __ARM_ARCH_6T2__ +#undef __ARM_ARCH_6Z__ +#undef __ARM_ARCH_6__ +#undef __ARM_ARCH_7A__ +#undef __ARM_ARCH_7R__ +#undef __ARM_ARCH_7S__ +#undef __ARM_ARCH_7VE__ +#undef __ARM_ARCH_7__ +#undef __ARM_ARCH_8A__ +#undef __ARM_ARCH____ARM_ARCH_8__ +#undef __ARM_ARH_6J__ +#undef __ARM_PCS_VFP +#undef __CELLOS_LV2__ +#undef __FreeBSD__ +#undef __FreeBSD_kernel__ +#undef __MIPSEL +#undef __MIPSEL__ +#undef __ORBIS__ +#undef __OpenBSD__ +#undef __arm__ +#undef __i386__ +#undef __mips_soft_float +#undef __psp2__ +#undef __sun__ +#undef __symbian__ +#undef __symbian__ +#undef ljamalg_c diff --git a/src/jit/.gitignore b/src/jit/.gitignore deleted file mode 100644 index 500e2855af..0000000000 --- a/src/jit/.gitignore +++ /dev/null @@ -1 +0,0 @@ -vmdef.lua diff --git a/src/jit/bc.lua b/src/jit/bc.lua index 193cf01f93..3e934bd3a3 100644 --- a/src/jit/bc.lua +++ b/src/jit/bc.lua @@ -41,7 +41,7 @@ -- Cache some library functions and objects. local jit = require("jit") -assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") +assert(jit.version_num == 10000, "LuaJIT core/library version mismatch") local jutil = require("jit.util") local vmdef = require("jit.vmdef") local bit = require("bit") diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index 9ee22a013d..09429d6c46 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -11,7 +11,7 @@ ------------------------------------------------------------------------------ local jit = require("jit") -assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") +assert(jit.version_num == 10000, "LuaJIT core/library version mismatch") local bit = require("bit") -- Symbol name prefix for LuaJIT bytecode. @@ -275,7 +275,7 @@ typedef struct { o.sect[2].size = fofs(ofs) o.sect[3].type = f32(3) -- .strtab o.sect[3].ofs = fofs(sofs + ofs) - o.sect[3].size = fofs(#symname+1) + o.sect[3].size = fofs(#symname+2) ffi.copy(o.space+ofs+1, symname) ofs = ofs + #symname + 2 o.sect[4].type = f32(1) -- .rodata diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua deleted file mode 100644 index c2dd776991..0000000000 --- a/src/jit/dis_arm.lua +++ /dev/null @@ -1,689 +0,0 @@ ----------------------------------------------------------------------------- --- LuaJIT ARM disassembler module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- Released under the MIT license. See Copyright Notice in luajit.h ----------------------------------------------------------------------------- --- This is a helper module used by the LuaJIT machine code dumper module. --- --- It disassembles most user-mode ARMv7 instructions --- NYI: Advanced SIMD and VFP instructions. ------------------------------------------------------------------------------- - -local type = type -local sub, byte, format = string.sub, string.byte, string.format -local match, gmatch = string.match, string.gmatch -local concat = table.concat -local bit = require("bit") -local band, bor, ror, tohex = bit.band, bit.bor, bit.ror, bit.tohex -local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift - ------------------------------------------------------------------------------- --- Opcode maps ------------------------------------------------------------------------------- - -local map_loadc = { - shift = 8, mask = 15, - [10] = { - shift = 20, mask = 1, - [0] = { - shift = 23, mask = 3, - [0] = "vmovFmDN", "vstmFNdr", - _ = { - shift = 21, mask = 1, - [0] = "vstrFdl", - { shift = 16, mask = 15, [13] = "vpushFdr", _ = "vstmdbFNdr", } - }, - }, - { - shift = 23, mask = 3, - [0] = "vmovFDNm", - { shift = 16, mask = 15, [13] = "vpopFdr", _ = "vldmFNdr", }, - _ = { - shift = 21, mask = 1, - [0] = "vldrFdl", "vldmdbFNdr", - }, - }, - }, - [11] = { - shift = 20, mask = 1, - [0] = { - shift = 23, mask = 3, - [0] = "vmovGmDN", "vstmGNdr", - _ = { - shift = 21, mask = 1, - [0] = "vstrGdl", - { shift = 16, mask = 15, [13] = "vpushGdr", _ = "vstmdbGNdr", } - }, - }, - { - shift = 23, mask = 3, - [0] = "vmovGDNm", - { shift = 16, mask = 15, [13] = "vpopGdr", _ = "vldmGNdr", }, - _ = { - shift = 21, mask = 1, - [0] = "vldrGdl", "vldmdbGNdr", - }, - }, - }, - _ = { - shift = 0, mask = 0 -- NYI ldc, mcrr, mrrc. - }, -} - -local map_vfps = { - shift = 6, mask = 0x2c001, - [0] = "vmlaF.dnm", "vmlsF.dnm", - [0x04000] = "vnmlsF.dnm", [0x04001] = "vnmlaF.dnm", - [0x08000] = "vmulF.dnm", [0x08001] = "vnmulF.dnm", - [0x0c000] = "vaddF.dnm", [0x0c001] = "vsubF.dnm", - [0x20000] = "vdivF.dnm", - [0x24000] = "vfnmsF.dnm", [0x24001] = "vfnmaF.dnm", - [0x28000] = "vfmaF.dnm", [0x28001] = "vfmsF.dnm", - [0x2c000] = "vmovF.dY", - [0x2c001] = { - shift = 7, mask = 0x1e01, - [0] = "vmovF.dm", "vabsF.dm", - [0x0200] = "vnegF.dm", [0x0201] = "vsqrtF.dm", - [0x0800] = "vcmpF.dm", [0x0801] = "vcmpeF.dm", - [0x0a00] = "vcmpzF.d", [0x0a01] = "vcmpzeF.d", - [0x0e01] = "vcvtG.dF.m", - [0x1000] = "vcvt.f32.u32Fdm", [0x1001] = "vcvt.f32.s32Fdm", - [0x1800] = "vcvtr.u32F.dm", [0x1801] = "vcvt.u32F.dm", - [0x1a00] = "vcvtr.s32F.dm", [0x1a01] = "vcvt.s32F.dm", - }, -} - -local map_vfpd = { - shift = 6, mask = 0x2c001, - [0] = "vmlaG.dnm", "vmlsG.dnm", - [0x04000] = "vnmlsG.dnm", [0x04001] = "vnmlaG.dnm", - [0x08000] = "vmulG.dnm", [0x08001] = "vnmulG.dnm", - [0x0c000] = "vaddG.dnm", [0x0c001] = "vsubG.dnm", - [0x20000] = "vdivG.dnm", - [0x24000] = "vfnmsG.dnm", [0x24001] = "vfnmaG.dnm", - [0x28000] = "vfmaG.dnm", [0x28001] = "vfmsG.dnm", - [0x2c000] = "vmovG.dY", - [0x2c001] = { - shift = 7, mask = 0x1e01, - [0] = "vmovG.dm", "vabsG.dm", - [0x0200] = "vnegG.dm", [0x0201] = "vsqrtG.dm", - [0x0800] = "vcmpG.dm", [0x0801] = "vcmpeG.dm", - [0x0a00] = "vcmpzG.d", [0x0a01] = "vcmpzeG.d", - [0x0e01] = "vcvtF.dG.m", - [0x1000] = "vcvt.f64.u32GdFm", [0x1001] = "vcvt.f64.s32GdFm", - [0x1800] = "vcvtr.u32FdG.m", [0x1801] = "vcvt.u32FdG.m", - [0x1a00] = "vcvtr.s32FdG.m", [0x1a01] = "vcvt.s32FdG.m", - }, -} - -local map_datac = { - shift = 24, mask = 1, - [0] = { - shift = 4, mask = 1, - [0] = { - shift = 8, mask = 15, - [10] = map_vfps, - [11] = map_vfpd, - -- NYI cdp, mcr, mrc. - }, - { - shift = 8, mask = 15, - [10] = { - shift = 20, mask = 15, - [0] = "vmovFnD", "vmovFDn", - [14] = "vmsrD", - [15] = { shift = 12, mask = 15, [15] = "vmrs", _ = "vmrsD", }, - }, - }, - }, - "svcT", -} - -local map_loadcu = { - shift = 0, mask = 0, -- NYI unconditional CP load/store. -} - -local map_datacu = { - shift = 0, mask = 0, -- NYI unconditional CP data. -} - -local map_simddata = { - shift = 0, mask = 0, -- NYI SIMD data. -} - -local map_simdload = { - shift = 0, mask = 0, -- NYI SIMD load/store, preload. -} - -local map_preload = { - shift = 0, mask = 0, -- NYI preload. -} - -local map_media = { - shift = 20, mask = 31, - [0] = false, - { --01 - shift = 5, mask = 7, - [0] = "sadd16DNM", "sasxDNM", "ssaxDNM", "ssub16DNM", - "sadd8DNM", false, false, "ssub8DNM", - }, - { --02 - shift = 5, mask = 7, - [0] = "qadd16DNM", "qasxDNM", "qsaxDNM", "qsub16DNM", - "qadd8DNM", false, false, "qsub8DNM", - }, - { --03 - shift = 5, mask = 7, - [0] = "shadd16DNM", "shasxDNM", "shsaxDNM", "shsub16DNM", - "shadd8DNM", false, false, "shsub8DNM", - }, - false, - { --05 - shift = 5, mask = 7, - [0] = "uadd16DNM", "uasxDNM", "usaxDNM", "usub16DNM", - "uadd8DNM", false, false, "usub8DNM", - }, - { --06 - shift = 5, mask = 7, - [0] = "uqadd16DNM", "uqasxDNM", "uqsaxDNM", "uqsub16DNM", - "uqadd8DNM", false, false, "uqsub8DNM", - }, - { --07 - shift = 5, mask = 7, - [0] = "uhadd16DNM", "uhasxDNM", "uhsaxDNM", "uhsub16DNM", - "uhadd8DNM", false, false, "uhsub8DNM", - }, - { --08 - shift = 5, mask = 7, - [0] = "pkhbtDNMU", false, "pkhtbDNMU", - { shift = 16, mask = 15, [15] = "sxtb16DMU", _ = "sxtab16DNMU", }, - "pkhbtDNMU", "selDNM", "pkhtbDNMU", - }, - false, - { --0a - shift = 5, mask = 7, - [0] = "ssatDxMu", "ssat16DxM", "ssatDxMu", - { shift = 16, mask = 15, [15] = "sxtbDMU", _ = "sxtabDNMU", }, - "ssatDxMu", false, "ssatDxMu", - }, - { --0b - shift = 5, mask = 7, - [0] = "ssatDxMu", "revDM", "ssatDxMu", - { shift = 16, mask = 15, [15] = "sxthDMU", _ = "sxtahDNMU", }, - "ssatDxMu", "rev16DM", "ssatDxMu", - }, - { --0c - shift = 5, mask = 7, - [3] = { shift = 16, mask = 15, [15] = "uxtb16DMU", _ = "uxtab16DNMU", }, - }, - false, - { --0e - shift = 5, mask = 7, - [0] = "usatDwMu", "usat16DwM", "usatDwMu", - { shift = 16, mask = 15, [15] = "uxtbDMU", _ = "uxtabDNMU", }, - "usatDwMu", false, "usatDwMu", - }, - { --0f - shift = 5, mask = 7, - [0] = "usatDwMu", "rbitDM", "usatDwMu", - { shift = 16, mask = 15, [15] = "uxthDMU", _ = "uxtahDNMU", }, - "usatDwMu", "revshDM", "usatDwMu", - }, - { --10 - shift = 12, mask = 15, - [15] = { - shift = 5, mask = 7, - "smuadNMS", "smuadxNMS", "smusdNMS", "smusdxNMS", - }, - _ = { - shift = 5, mask = 7, - [0] = "smladNMSD", "smladxNMSD", "smlsdNMSD", "smlsdxNMSD", - }, - }, - false, false, false, - { --14 - shift = 5, mask = 7, - [0] = "smlaldDNMS", "smlaldxDNMS", "smlsldDNMS", "smlsldxDNMS", - }, - { --15 - shift = 5, mask = 7, - [0] = { shift = 12, mask = 15, [15] = "smmulNMS", _ = "smmlaNMSD", }, - { shift = 12, mask = 15, [15] = "smmulrNMS", _ = "smmlarNMSD", }, - false, false, false, false, - "smmlsNMSD", "smmlsrNMSD", - }, - false, false, - { --18 - shift = 5, mask = 7, - [0] = { shift = 12, mask = 15, [15] = "usad8NMS", _ = "usada8NMSD", }, - }, - false, - { --1a - shift = 5, mask = 3, [2] = "sbfxDMvw", - }, - { --1b - shift = 5, mask = 3, [2] = "sbfxDMvw", - }, - { --1c - shift = 5, mask = 3, - [0] = { shift = 0, mask = 15, [15] = "bfcDvX", _ = "bfiDMvX", }, - }, - { --1d - shift = 5, mask = 3, - [0] = { shift = 0, mask = 15, [15] = "bfcDvX", _ = "bfiDMvX", }, - }, - { --1e - shift = 5, mask = 3, [2] = "ubfxDMvw", - }, - { --1f - shift = 5, mask = 3, [2] = "ubfxDMvw", - }, -} - -local map_load = { - shift = 21, mask = 9, - { - shift = 20, mask = 5, - [0] = "strtDL", "ldrtDL", [4] = "strbtDL", [5] = "ldrbtDL", - }, - _ = { - shift = 20, mask = 5, - [0] = "strDL", "ldrDL", [4] = "strbDL", [5] = "ldrbDL", - } -} - -local map_load1 = { - shift = 4, mask = 1, - [0] = map_load, map_media, -} - -local map_loadm = { - shift = 20, mask = 1, - [0] = { - shift = 23, mask = 3, - [0] = "stmdaNR", "stmNR", - { shift = 16, mask = 63, [45] = "pushR", _ = "stmdbNR", }, "stmibNR", - }, - { - shift = 23, mask = 3, - [0] = "ldmdaNR", { shift = 16, mask = 63, [61] = "popR", _ = "ldmNR", }, - "ldmdbNR", "ldmibNR", - }, -} - -local map_data = { - shift = 21, mask = 15, - [0] = "andDNPs", "eorDNPs", "subDNPs", "rsbDNPs", - "addDNPs", "adcDNPs", "sbcDNPs", "rscDNPs", - "tstNP", "teqNP", "cmpNP", "cmnNP", - "orrDNPs", "movDPs", "bicDNPs", "mvnDPs", -} - -local map_mul = { - shift = 21, mask = 7, - [0] = "mulNMSs", "mlaNMSDs", "umaalDNMS", "mlsDNMS", - "umullDNMSs", "umlalDNMSs", "smullDNMSs", "smlalDNMSs", -} - -local map_sync = { - shift = 20, mask = 15, -- NYI: brackets around N. R(D+1) for ldrexd/strexd. - [0] = "swpDMN", false, false, false, - "swpbDMN", false, false, false, - "strexDMN", "ldrexDN", "strexdDN", "ldrexdDN", - "strexbDMN", "ldrexbDN", "strexhDN", "ldrexhDN", -} - -local map_mulh = { - shift = 21, mask = 3, - [0] = { shift = 5, mask = 3, - [0] = "smlabbNMSD", "smlatbNMSD", "smlabtNMSD", "smlattNMSD", }, - { shift = 5, mask = 3, - [0] = "smlawbNMSD", "smulwbNMS", "smlawtNMSD", "smulwtNMS", }, - { shift = 5, mask = 3, - [0] = "smlalbbDNMS", "smlaltbDNMS", "smlalbtDNMS", "smlalttDNMS", }, - { shift = 5, mask = 3, - [0] = "smulbbNMS", "smultbNMS", "smulbtNMS", "smulttNMS", }, -} - -local map_misc = { - shift = 4, mask = 7, - -- NYI: decode PSR bits of msr. - [0] = { shift = 21, mask = 1, [0] = "mrsD", "msrM", }, - { shift = 21, mask = 3, "bxM", false, "clzDM", }, - { shift = 21, mask = 3, "bxjM", }, - { shift = 21, mask = 3, "blxM", }, - false, - { shift = 21, mask = 3, [0] = "qaddDMN", "qsubDMN", "qdaddDMN", "qdsubDMN", }, - false, - { shift = 21, mask = 3, "bkptK", }, -} - -local map_datar = { - shift = 4, mask = 9, - [9] = { - shift = 5, mask = 3, - [0] = { shift = 24, mask = 1, [0] = map_mul, map_sync, }, - { shift = 20, mask = 1, [0] = "strhDL", "ldrhDL", }, - { shift = 20, mask = 1, [0] = "ldrdDL", "ldrsbDL", }, - { shift = 20, mask = 1, [0] = "strdDL", "ldrshDL", }, - }, - _ = { - shift = 20, mask = 25, - [16] = { shift = 7, mask = 1, [0] = map_misc, map_mulh, }, - _ = { - shift = 0, mask = 0xffffffff, - [bor(0xe1a00000)] = "nop", - _ = map_data, - } - }, -} - -local map_datai = { - shift = 20, mask = 31, -- NYI: decode PSR bits of msr. Decode imm12. - [16] = "movwDW", [20] = "movtDW", - [18] = { shift = 0, mask = 0xf00ff, [0] = "nopv6", _ = "msrNW", }, - [22] = "msrNW", - _ = map_data, -} - -local map_branch = { - shift = 24, mask = 1, - [0] = "bB", "blB" -} - -local map_condins = { - [0] = map_datar, map_datai, map_load, map_load1, - map_loadm, map_branch, map_loadc, map_datac -} - --- NYI: setend. -local map_uncondins = { - [0] = false, map_simddata, map_simdload, map_preload, - false, "blxB", map_loadcu, map_datacu, -} - ------------------------------------------------------------------------------- - -local map_gpr = { - [0] = "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", - "r8", "r9", "r10", "r11", "r12", "sp", "lr", "pc", -} - -local map_cond = { - [0] = "eq", "ne", "hs", "lo", "mi", "pl", "vs", "vc", - "hi", "ls", "ge", "lt", "gt", "le", "al", -} - -local map_shift = { [0] = "lsl", "lsr", "asr", "ror", } - ------------------------------------------------------------------------------- - --- Output a nicely formatted line with an opcode and operands. -local function putop(ctx, text, operands) - local pos = ctx.pos - local extra = "" - if ctx.rel then - local sym = ctx.symtab[ctx.rel] - if sym then - extra = "\t->"..sym - elseif band(ctx.op, 0x0e000000) ~= 0x0a000000 then - extra = "\t; 0x"..tohex(ctx.rel) - end - end - if ctx.hexdump > 0 then - ctx.out(format("%08x %s %-5s %s%s\n", - ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) - else - ctx.out(format("%08x %-5s %s%s\n", - ctx.addr+pos, text, concat(operands, ", "), extra)) - end - ctx.pos = pos + 4 -end - --- Fallback for unknown opcodes. -local function unknown(ctx) - return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) -end - --- Format operand 2 of load/store opcodes. -local function fmtload(ctx, op, pos) - local base = map_gpr[band(rshift(op, 16), 15)] - local x, ofs - local ext = (band(op, 0x04000000) == 0) - if not ext and band(op, 0x02000000) == 0 then - ofs = band(op, 4095) - if band(op, 0x00800000) == 0 then ofs = -ofs end - if base == "pc" then ctx.rel = ctx.addr + pos + 8 + ofs end - ofs = "#"..ofs - elseif ext and band(op, 0x00400000) ~= 0 then - ofs = band(op, 15) + band(rshift(op, 4), 0xf0) - if band(op, 0x00800000) == 0 then ofs = -ofs end - if base == "pc" then ctx.rel = ctx.addr + pos + 8 + ofs end - ofs = "#"..ofs - else - ofs = map_gpr[band(op, 15)] - if ext or band(op, 0xfe0) == 0 then - elseif band(op, 0xfe0) == 0x60 then - ofs = format("%s, rrx", ofs) - else - local sh = band(rshift(op, 7), 31) - if sh == 0 then sh = 32 end - ofs = format("%s, %s #%d", ofs, map_shift[band(rshift(op, 5), 3)], sh) - end - if band(op, 0x00800000) == 0 then ofs = "-"..ofs end - end - if ofs == "#0" then - x = format("[%s]", base) - elseif band(op, 0x01000000) == 0 then - x = format("[%s], %s", base, ofs) - else - x = format("[%s, %s]", base, ofs) - end - if band(op, 0x01200000) == 0x01200000 then x = x.."!" end - return x -end - --- Format operand 2 of vector load/store opcodes. -local function fmtvload(ctx, op, pos) - local base = map_gpr[band(rshift(op, 16), 15)] - local ofs = band(op, 255)*4 - if band(op, 0x00800000) == 0 then ofs = -ofs end - if base == "pc" then ctx.rel = ctx.addr + pos + 8 + ofs end - if ofs == 0 then - return format("[%s]", base) - else - return format("[%s, #%d]", base, ofs) - end -end - -local function fmtvr(op, vr, sh0, sh1) - if vr == "s" then - return format("s%d", 2*band(rshift(op, sh0), 15)+band(rshift(op, sh1), 1)) - else - return format("d%d", band(rshift(op, sh0), 15)+band(rshift(op, sh1-4), 16)) - end -end - --- Disassemble a single instruction. -local function disass_ins(ctx) - local pos = ctx.pos - local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) - local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) - local operands = {} - local suffix = "" - local last, name, pat - local vr - ctx.op = op - ctx.rel = nil - - local cond = rshift(op, 28) - local opat - if cond == 15 then - opat = map_uncondins[band(rshift(op, 25), 7)] - else - if cond ~= 14 then suffix = map_cond[cond] end - opat = map_condins[band(rshift(op, 25), 7)] - end - while type(opat) ~= "string" do - if not opat then return unknown(ctx) end - opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ - end - name, pat = match(opat, "^([a-z0-9]*)(.*)") - if sub(pat, 1, 1) == "." then - local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)") - suffix = suffix..s2 - pat = p2 - end - - for p in gmatch(pat, ".") do - local x = nil - if p == "D" then - x = map_gpr[band(rshift(op, 12), 15)] - elseif p == "N" then - x = map_gpr[band(rshift(op, 16), 15)] - elseif p == "S" then - x = map_gpr[band(rshift(op, 8), 15)] - elseif p == "M" then - x = map_gpr[band(op, 15)] - elseif p == "d" then - x = fmtvr(op, vr, 12, 22) - elseif p == "n" then - x = fmtvr(op, vr, 16, 7) - elseif p == "m" then - x = fmtvr(op, vr, 0, 5) - elseif p == "P" then - if band(op, 0x02000000) ~= 0 then - x = ror(band(op, 255), 2*band(rshift(op, 8), 15)) - else - x = map_gpr[band(op, 15)] - if band(op, 0xff0) ~= 0 then - operands[#operands+1] = x - local s = map_shift[band(rshift(op, 5), 3)] - local r = nil - if band(op, 0xf90) == 0 then - if s == "ror" then s = "rrx" else r = "#32" end - elseif band(op, 0x10) == 0 then - r = "#"..band(rshift(op, 7), 31) - else - r = map_gpr[band(rshift(op, 8), 15)] - end - if name == "mov" then name = s; x = r - elseif r then x = format("%s %s", s, r) - else x = s end - end - end - elseif p == "L" then - x = fmtload(ctx, op, pos) - elseif p == "l" then - x = fmtvload(ctx, op, pos) - elseif p == "B" then - local addr = ctx.addr + pos + 8 + arshift(lshift(op, 8), 6) - if cond == 15 then addr = addr + band(rshift(op, 23), 2) end - ctx.rel = addr - x = "0x"..tohex(addr) - elseif p == "F" then - vr = "s" - elseif p == "G" then - vr = "d" - elseif p == "." then - suffix = suffix..(vr == "s" and ".f32" or ".f64") - elseif p == "R" then - if band(op, 0x00200000) ~= 0 and #operands == 1 then - operands[1] = operands[1].."!" - end - local t = {} - for i=0,15 do - if band(rshift(op, i), 1) == 1 then t[#t+1] = map_gpr[i] end - end - x = "{"..concat(t, ", ").."}" - elseif p == "r" then - if band(op, 0x00200000) ~= 0 and #operands == 2 then - operands[1] = operands[1].."!" - end - local s = tonumber(sub(last, 2)) - local n = band(op, 255) - if vr == "d" then n = rshift(n, 1) end - operands[#operands] = format("{%s-%s%d}", last, vr, s+n-1) - elseif p == "W" then - x = band(op, 0x0fff) + band(rshift(op, 4), 0xf000) - elseif p == "T" then - x = "#0x"..tohex(band(op, 0x00ffffff), 6) - elseif p == "U" then - x = band(rshift(op, 7), 31) - if x == 0 then x = nil end - elseif p == "u" then - x = band(rshift(op, 7), 31) - if band(op, 0x40) == 0 then - if x == 0 then x = nil else x = "lsl #"..x end - else - if x == 0 then x = "asr #32" else x = "asr #"..x end - end - elseif p == "v" then - x = band(rshift(op, 7), 31) - elseif p == "w" then - x = band(rshift(op, 16), 31) - elseif p == "x" then - x = band(rshift(op, 16), 31) + 1 - elseif p == "X" then - x = band(rshift(op, 16), 31) - last + 1 - elseif p == "Y" then - x = band(rshift(op, 12), 0xf0) + band(op, 0x0f) - elseif p == "K" then - x = "#0x"..tohex(band(rshift(op, 4), 0x0000fff0) + band(op, 15), 4) - elseif p == "s" then - if band(op, 0x00100000) ~= 0 then suffix = "s"..suffix end - else - assert(false) - end - if x then - last = x - if type(x) == "number" then x = "#"..x end - operands[#operands+1] = x - end - end - - return putop(ctx, name..suffix, operands) -end - ------------------------------------------------------------------------------- - --- Disassemble a block of code. -local function disass_block(ctx, ofs, len) - if not ofs then ofs = 0 end - local stop = len and ofs+len or #ctx.code - ctx.pos = ofs - ctx.rel = nil - while ctx.pos < stop do disass_ins(ctx) end -end - --- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). -local function create(code, addr, out) - local ctx = {} - ctx.code = code - ctx.addr = addr or 0 - ctx.out = out or io.write - ctx.symtab = {} - ctx.disass = disass_block - ctx.hexdump = 8 - return ctx -end - --- Simple API: disassemble code (a string) at address and output via out. -local function disass(code, addr, out) - create(code, addr, out):disass() -end - --- Return register name for RID. -local function regname(r) - if r < 16 then return map_gpr[r] end - return "d"..(r-16) -end - --- Public module functions. -return { - create = create, - disass = disass, - regname = regname -} - diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua deleted file mode 100644 index a7173326ac..0000000000 --- a/src/jit/dis_arm64.lua +++ /dev/null @@ -1,1216 +0,0 @@ ----------------------------------------------------------------------------- --- LuaJIT ARM64 disassembler module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- Released under the MIT license. See Copyright Notice in luajit.h --- --- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. --- Sponsored by Cisco Systems, Inc. ----------------------------------------------------------------------------- --- This is a helper module used by the LuaJIT machine code dumper module. --- --- It disassembles most user-mode AArch64 instructions. --- NYI: Advanced SIMD and VFP instructions. ------------------------------------------------------------------------------- - -local type = type -local sub, byte, format = string.sub, string.byte, string.format -local match, gmatch, gsub = string.match, string.gmatch, string.gsub -local concat = table.concat -local bit = require("bit") -local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex -local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift -local ror = bit.ror - ------------------------------------------------------------------------------- --- Opcode maps ------------------------------------------------------------------------------- - -local map_adr = { -- PC-relative addressing. - shift = 31, mask = 1, - [0] = "adrDBx", "adrpDBx" -} - -local map_addsubi = { -- Add/subtract immediate. - shift = 29, mask = 3, - [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg", -} - -local map_logi = { -- Logical immediate. - shift = 31, mask = 1, - [0] = { - shift = 22, mask = 1, - [0] = { - shift = 29, mask = 3, - [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" - }, - false -- unallocated - }, - { - shift = 29, mask = 3, - [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" - } -} - -local map_movwi = { -- Move wide immediate. - shift = 31, mask = 1, - [0] = { - shift = 22, mask = 1, - [0] = { - shift = 29, mask = 3, - [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" - }, false -- unallocated - }, - { - shift = 29, mask = 3, - [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" - }, -} - -local map_bitf = { -- Bitfield. - shift = 31, mask = 1, - [0] = { - shift = 22, mask = 1, - [0] = { - shift = 29, mask = 3, - [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w", - "bfm|bfi|bfxilDN13w", - "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w" - } - }, - { - shift = 22, mask = 1, - { - shift = 29, mask = 3, - [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x", - "bfm|bfi|bfxilDN13x", - "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x" - } - } -} - -local map_datai = { -- Data processing - immediate. - shift = 23, mask = 7, - [0] = map_adr, map_adr, map_addsubi, false, - map_logi, map_movwi, map_bitf, - { - shift = 15, mask = 0x1c0c1, - [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x", - [0x10081] = "extr|rorDNM4x" - } -} - -local map_logsr = { -- Logical, shifted register. - shift = 31, mask = 1, - [0] = { - shift = 15, mask = 1, - [0] = { - shift = 29, mask = 3, - [0] = { - shift = 21, mask = 7, - [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", - "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" - }, - { - shift = 21, mask = 7, - [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", - "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" - }, - { - shift = 21, mask = 7, - [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", - "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" - }, - { - shift = 21, mask = 7, - [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", - "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" - } - }, - false -- unallocated - }, - { - shift = 29, mask = 3, - [0] = { - shift = 21, mask = 7, - [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", - "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" - }, - { - shift = 21, mask = 7, - [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", - "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" - }, - { - shift = 21, mask = 7, - [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", - "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" - }, - { - shift = 21, mask = 7, - [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", - "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" - } - } -} - -local map_assh = { - shift = 31, mask = 1, - [0] = { - shift = 15, mask = 1, - [0] = { - shift = 29, mask = 3, - [0] = { - shift = 22, mask = 3, - [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" - }, - { - shift = 22, mask = 3, - [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", - "adds|cmnD0NMSg", "adds|cmnD0NMg" - }, - { - shift = 22, mask = 3, - [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" - }, - { - shift = 22, mask = 3, - [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", - "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" - }, - }, - false -- unallocated - }, - { - shift = 29, mask = 3, - [0] = { - shift = 22, mask = 3, - [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" - }, - { - shift = 22, mask = 3, - [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg", - "adds|cmnD0NMg" - }, - { - shift = 22, mask = 3, - [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" - }, - { - shift = 22, mask = 3, - [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", - "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" - } - } -} - -local map_addsubsh = { -- Add/subtract, shifted register. - shift = 22, mask = 3, - [0] = map_assh, map_assh, map_assh -} - -local map_addsubex = { -- Add/subtract, extended register. - shift = 22, mask = 3, - [0] = { - shift = 29, mask = 3, - [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg", - } -} - -local map_addsubc = { -- Add/subtract, with carry. - shift = 10, mask = 63, - [0] = { - shift = 29, mask = 3, - [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg", - } -} - -local map_ccomp = { - shift = 4, mask = 1, - [0] = { - shift = 10, mask = 3, - [0] = { -- Conditional compare register. - shift = 29, mask = 3, - "ccmnNMVCg", false, "ccmpNMVCg", - }, - [2] = { -- Conditional compare immediate. - shift = 29, mask = 3, - "ccmnN5VCg", false, "ccmpN5VCg", - } - } -} - -local map_csel = { -- Conditional select. - shift = 11, mask = 1, - [0] = { - shift = 10, mask = 1, - [0] = { - shift = 29, mask = 3, - [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false, - }, - { - shift = 29, mask = 3, - [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false, - } - } -} - -local map_data1s = { -- Data processing, 1 source. - shift = 29, mask = 1, - [0] = { - shift = 31, mask = 1, - [0] = { - shift = 10, mask = 0x7ff, - [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg" - }, - { - shift = 10, mask = 0x7ff, - [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg" - } - } -} - -local map_data2s = { -- Data processing, 2 sources. - shift = 29, mask = 1, - [0] = { - shift = 10, mask = 63, - false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg", - "lsrDNMg", "asrDNMg", "rorDNMg" - } -} - -local map_data3s = { -- Data processing, 3 sources. - shift = 29, mask = 7, - [0] = { - shift = 21, mask = 7, - [0] = { - shift = 15, mask = 1, - [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g" - } - }, false, false, false, - { - shift = 15, mask = 1, - [0] = { - shift = 21, mask = 7, - [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false, - false, "umaddl|umullDxNMwA0x", "umulhDNMx" - }, - { - shift = 21, mask = 7, - [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false, - false, "umsubl|umneglDxNMwA0x" - } - } -} - -local map_datar = { -- Data processing, register. - shift = 28, mask = 1, - [0] = { - shift = 24, mask = 1, - [0] = map_logsr, - { - shift = 21, mask = 1, - [0] = map_addsubsh, map_addsubex - } - }, - { - shift = 21, mask = 15, - [0] = map_addsubc, false, map_ccomp, false, map_csel, false, - { - shift = 30, mask = 1, - [0] = map_data2s, map_data1s - }, - false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s, - map_data3s, map_data3s, map_data3s - } -} - -local map_lrl = { -- Load register, literal. - shift = 26, mask = 1, - [0] = { - shift = 30, mask = 3, - [0] = "ldrDwB", "ldrDxB", "ldrswDxB" - }, - { - shift = 30, mask = 3, - [0] = "ldrDsB", "ldrDdB" - } -} - -local map_lsriind = { -- Load/store register, immediate pre/post-indexed. - shift = 30, mask = 3, - [0] = { - shift = 26, mask = 1, - [0] = { - shift = 22, mask = 3, - [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL" - } - }, - { - shift = 26, mask = 1, - [0] = { - shift = 22, mask = 3, - [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL" - } - }, - { - shift = 26, mask = 1, - [0] = { - shift = 22, mask = 3, - [0] = "strDwzL", "ldrDwzL", "ldrswDxzL" - }, - { - shift = 22, mask = 3, - [0] = "strDszL", "ldrDszL" - } - }, - { - shift = 26, mask = 1, - [0] = { - shift = 22, mask = 3, - [0] = "strDxzL", "ldrDxzL" - }, - { - shift = 22, mask = 3, - [0] = "strDdzL", "ldrDdzL" - } - } -} - -local map_lsriro = { - shift = 21, mask = 1, - [0] = { -- Load/store register immediate. - shift = 10, mask = 3, - [0] = { -- Unscaled immediate. - shift = 26, mask = 1, - [0] = { - shift = 30, mask = 3, - [0] = { - shift = 22, mask = 3, - [0] = "sturbDwK", "ldurbDwK" - }, - { - shift = 22, mask = 3, - [0] = "sturhDwK", "ldurhDwK" - }, - { - shift = 22, mask = 3, - [0] = "sturDwK", "ldurDwK" - }, - { - shift = 22, mask = 3, - [0] = "sturDxK", "ldurDxK" - } - } - }, map_lsriind, false, map_lsriind - }, - { -- Load/store register, register offset. - shift = 10, mask = 3, - [2] = { - shift = 26, mask = 1, - [0] = { - shift = 30, mask = 3, - [0] = { - shift = 22, mask = 3, - [0] = "strbDwO", "ldrbDwO", "ldrsbDxO", "ldrsbDwO" - }, - { - shift = 22, mask = 3, - [0] = "strhDwO", "ldrhDwO", "ldrshDxO", "ldrshDwO" - }, - { - shift = 22, mask = 3, - [0] = "strDwO", "ldrDwO", "ldrswDxO" - }, - { - shift = 22, mask = 3, - [0] = "strDxO", "ldrDxO" - } - }, - { - shift = 30, mask = 3, - [2] = { - shift = 22, mask = 3, - [0] = "strDsO", "ldrDsO" - }, - [3] = { - shift = 22, mask = 3, - [0] = "strDdO", "ldrDdO" - } - } - } - } -} - -local map_lsp = { -- Load/store register pair, offset. - shift = 22, mask = 1, - [0] = { - shift = 30, mask = 3, - [0] = { - shift = 26, mask = 1, - [0] = "stpDzAzwP", "stpDzAzsP", - }, - { - shift = 26, mask = 1, - "stpDzAzdP" - }, - { - shift = 26, mask = 1, - [0] = "stpDzAzxP" - } - }, - { - shift = 30, mask = 3, - [0] = { - shift = 26, mask = 1, - [0] = "ldpDzAzwP", "ldpDzAzsP", - }, - { - shift = 26, mask = 1, - [0] = "ldpswDAxP", "ldpDzAzdP" - }, - { - shift = 26, mask = 1, - [0] = "ldpDzAzxP" - } - } -} - -local map_ls = { -- Loads and stores. - shift = 24, mask = 0x31, - [0x10] = map_lrl, [0x30] = map_lsriro, - [0x20] = { - shift = 23, mask = 3, - map_lsp, map_lsp, map_lsp - }, - [0x21] = { - shift = 23, mask = 3, - map_lsp, map_lsp, map_lsp - }, - [0x31] = { - shift = 26, mask = 1, - [0] = { - shift = 30, mask = 3, - [0] = { - shift = 22, mask = 3, - [0] = "strbDwzU", "ldrbDwzU" - }, - { - shift = 22, mask = 3, - [0] = "strhDwzU", "ldrhDwzU" - }, - { - shift = 22, mask = 3, - [0] = "strDwzU", "ldrDwzU" - }, - { - shift = 22, mask = 3, - [0] = "strDxzU", "ldrDxzU" - } - }, - { - shift = 30, mask = 3, - [2] = { - shift = 22, mask = 3, - [0] = "strDszU", "ldrDszU" - }, - [3] = { - shift = 22, mask = 3, - [0] = "strDdzU", "ldrDdzU" - } - } - }, -} - -local map_datafp = { -- Data processing, SIMD and FP. - shift = 28, mask = 7, - { -- 001 - shift = 24, mask = 1, - [0] = { - shift = 21, mask = 1, - { - shift = 10, mask = 3, - [0] = { - shift = 12, mask = 1, - [0] = { - shift = 13, mask = 1, - [0] = { - shift = 14, mask = 1, - [0] = { - shift = 15, mask = 1, - [0] = { -- FP/int conversion. - shift = 31, mask = 1, - [0] = { - shift = 16, mask = 0xff, - [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs", - [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw", - [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs", - [0x26] = "fmovDwNs", [0x27] = "fmovDsNw", - [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs", - [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs", - [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs", - [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd", - [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw", - [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd", - [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd", - [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd", - [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd" - }, - { - shift = 16, mask = 0xff, - [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs", - [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx", - [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs", - [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs", - [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs", - [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs", - [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd", - [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx", - [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd", - [0x66] = "fmovDxNd", [0x67] = "fmovDdNx", - [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd", - [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd", - [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd" - } - } - }, - { -- FP data-processing, 1 source. - shift = 31, mask = 1, - [0] = { - shift = 22, mask = 3, - [0] = { - shift = 15, mask = 63, - [0] = "fmovDNf", "fabsDNf", "fnegDNf", - "fsqrtDNf", false, "fcvtDdNs", false, false, - "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", - "frintaDNf", false, "frintxDNf", "frintiDNf", - }, - { - shift = 15, mask = 63, - [0] = "fmovDNf", "fabsDNf", "fnegDNf", - "fsqrtDNf", "fcvtDsNd", false, false, false, - "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", - "frintaDNf", false, "frintxDNf", "frintiDNf", - } - } - } - }, - { -- FP compare. - shift = 31, mask = 1, - [0] = { - shift = 14, mask = 3, - [0] = { - shift = 23, mask = 1, - [0] = { - shift = 0, mask = 31, - [0] = "fcmpNMf", [8] = "fcmpNZf", - [16] = "fcmpeNMf", [24] = "fcmpeNZf", - } - } - } - } - }, - { -- FP immediate. - shift = 31, mask = 1, - [0] = { - shift = 5, mask = 31, - [0] = { - shift = 23, mask = 1, - [0] = "fmovDFf" - } - } - } - }, - { -- FP conditional compare. - shift = 31, mask = 1, - [0] = { - shift = 23, mask = 1, - [0] = { - shift = 4, mask = 1, - [0] = "fccmpNMVCf", "fccmpeNMVCf" - } - } - }, - { -- FP data-processing, 2 sources. - shift = 31, mask = 1, - [0] = { - shift = 23, mask = 1, - [0] = { - shift = 12, mask = 15, - [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf", - "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf", - "fnmulDNMf" - } - } - }, - { -- FP conditional select. - shift = 31, mask = 1, - [0] = { - shift = 23, mask = 1, - [0] = "fcselDNMCf" - } - } - } - }, - { -- FP data-processing, 3 sources. - shift = 31, mask = 1, - [0] = { - shift = 15, mask = 1, - [0] = { - shift = 21, mask = 5, - [0] = "fmaddDNMAf", "fnmaddDNMAf" - }, - { - shift = 21, mask = 5, - [0] = "fmsubDNMAf", "fnmsubDNMAf" - } - } - } - } -} - -local map_br = { -- Branches, exception generating and system instructions. - shift = 29, mask = 7, - [0] = "bB", - { -- Compare & branch, immediate. - shift = 24, mask = 3, - [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw" - }, - { -- Conditional branch, immediate. - shift = 24, mask = 3, - [0] = { - shift = 4, mask = 1, - [0] = { - shift = 0, mask = 15, - [0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB", - "bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB" - } - } - }, false, "blB", - { -- Compare & branch, immediate. - shift = 24, mask = 3, - [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx" - }, - { - shift = 24, mask = 3, - [0] = { -- Exception generation. - shift = 0, mask = 0xe0001f, - [0x200000] = "brkW" - }, - { -- System instructions. - shift = 0, mask = 0x3fffff, - [0x03201f] = "nop" - }, - { -- Unconditional branch, register. - shift = 0, mask = 0xfffc1f, - [0x1f0000] = "brNx", [0x3f0000] = "blrNx", - [0x5f0000] = "retNx" - }, - } -} - -local map_init = { - shift = 25, mask = 15, - [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp, - map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp -} - ------------------------------------------------------------------------------- - -local map_regs = { x = {}, w = {}, d = {}, s = {} } - -for i=0,30 do - map_regs.x[i] = "x"..i - map_regs.w[i] = "w"..i - map_regs.d[i] = "d"..i - map_regs.s[i] = "s"..i -end -map_regs.x[31] = "sp" -map_regs.w[31] = "wsp" -map_regs.d[31] = "d31" -map_regs.s[31] = "s31" - -local map_cond = { - [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", - "hi", "ls", "ge", "lt", "gt", "le", "al", -} - -local map_shift = { [0] = "lsl", "lsr", "asr", } - -local map_extend = { - [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx", -} - ------------------------------------------------------------------------------- - --- Output a nicely formatted line with an opcode and operands. -local function putop(ctx, text, operands) - local pos = ctx.pos - local extra = "" - if ctx.rel then - local sym = ctx.symtab[ctx.rel] - if sym then - extra = "\t->"..sym - end - end - if ctx.hexdump > 0 then - ctx.out(format("%08x %s %-5s %s%s\n", - ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) - else - ctx.out(format("%08x %-5s %s%s\n", - ctx.addr+pos, text, concat(operands, ", "), extra)) - end - ctx.pos = pos + 4 -end - --- Fallback for unknown opcodes. -local function unknown(ctx) - return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) -end - -local function match_reg(p, pat, regnum) - return map_regs[match(pat, p.."%w-([xwds])")][regnum] -end - -local function fmt_hex32(x) - if x < 0 then - return tohex(x) - else - return format("%x", x) - end -end - -local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 } - -local function decode_imm13(op) - local imms = band(rshift(op, 10), 63) - local immr = band(rshift(op, 16), 63) - if band(op, 0x00400000) == 0 then - local len = 5 - if imms >= 56 then - if imms >= 60 then len = 1 else len = 2 end - elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end - local l = lshift(1, len)-1 - local s = band(imms, l) - local r = band(immr, l) - local imm = ror(rshift(-1, 31-s), r) - if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end - imm = imm * imm13_rep[len] - local ix = fmt_hex32(imm) - if rshift(op, 31) ~= 0 then - return ix..tohex(imm) - else - return ix - end - else - local lo, hi = -1, 0 - if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end - if immr ~= 0 then - lo, hi = ror(lo, immr), ror(hi, immr) - local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr)) - lo, hi = bxor(lo, x), bxor(hi, x) - if immr >= 32 then lo, hi = hi, lo end - end - if hi ~= 0 then - return fmt_hex32(hi)..tohex(lo) - else - return fmt_hex32(lo) - end - end -end - -local function parse_immpc(op, name) - if name == "b" or name == "bl" then - return arshift(lshift(op, 6), 4) - elseif name == "adr" or name == "adrp" then - local immlo = band(rshift(op, 29), 3) - local immhi = lshift(arshift(lshift(op, 8), 13), 2) - return bor(immhi, immlo) - elseif name == "tbz" or name == "tbnz" then - return lshift(arshift(lshift(op, 13), 18), 2) - else - return lshift(arshift(lshift(op, 8), 13), 2) - end -end - -local function parse_fpimm8(op) - local sign = band(op, 0x100000) == 0 and 1 or -1 - local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131 - local frac = 16+band(rshift(op, 13), 15) - return sign * frac * 2^exp -end - -local function prefer_bfx(sf, uns, imms, immr) - if imms < immr or imms == 31 or imms == 63 then - return false - end - if immr == 0 then - if sf == 0 and (imms == 7 or imms == 15) then - return false - end - if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then - return false - end - end - return true -end - --- Disassemble a single instruction. -local function disass_ins(ctx) - local pos = ctx.pos - local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) - local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) - local operands = {} - local suffix = "" - local last, name, pat - local map_reg - ctx.op = op - ctx.rel = nil - last = nil - local opat - opat = map_init[band(rshift(op, 25), 15)] - while type(opat) ~= "string" do - if not opat then return unknown(ctx) end - opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ - end - name, pat = match(opat, "^([a-z0-9]*)(.*)") - local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") - if altname then pat = pat2 end - if sub(pat, 1, 1) == "." then - local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)") - suffix = suffix..s2 - pat = p2 - end - - local rt = match(pat, "[gf]") - if rt then - if rt == "g" then - map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w - else - map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s - end - end - - local second0, immr - - for p in gmatch(pat, ".") do - local x = nil - if p == "D" then - local regnum = band(op, 31) - x = rt and map_reg[regnum] or match_reg(p, pat, regnum) - elseif p == "N" then - local regnum = band(rshift(op, 5), 31) - x = rt and map_reg[regnum] or match_reg(p, pat, regnum) - elseif p == "M" then - local regnum = band(rshift(op, 16), 31) - x = rt and map_reg[regnum] or match_reg(p, pat, regnum) - elseif p == "A" then - local regnum = band(rshift(op, 10), 31) - x = rt and map_reg[regnum] or match_reg(p, pat, regnum) - elseif p == "B" then - local addr = ctx.addr + pos + parse_immpc(op, name) - ctx.rel = addr - x = "0x"..tohex(addr) - elseif p == "T" then - x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31)) - elseif p == "V" then - x = band(op, 15) - elseif p == "C" then - x = map_cond[band(rshift(op, 12), 15)] - elseif p == "c" then - local rn = band(rshift(op, 5), 31) - local rm = band(rshift(op, 16), 31) - local cond = band(rshift(op, 12), 15) - local invc = bxor(cond, 1) - x = map_cond[cond] - if altname and cond ~= 14 and cond ~= 15 then - local a1, a2 = match(altname, "([^|]*)|(.*)") - if rn == rm then - local n = #operands - operands[n] = nil - x = map_cond[invc] - if rn ~= 31 then - if a1 then name = a1 else name = altname end - else - operands[n-1] = nil - name = a2 - end - end - end - elseif p == "W" then - x = band(rshift(op, 5), 0xffff) - elseif p == "Y" then - x = band(rshift(op, 5), 0xffff) - local hw = band(rshift(op, 21), 3) - if altname and (hw == 0 or x ~= 0) then - name = altname - end - elseif p == "L" then - local rn = map_regs.x[band(rshift(op, 5), 31)] - local imm9 = arshift(lshift(op, 11), 23) - if band(op, 0x800) ~= 0 then - x = "["..rn..", #"..imm9.."]!" - else - x = "["..rn.."], #"..imm9 - end - elseif p == "U" then - local rn = map_regs.x[band(rshift(op, 5), 31)] - local sz = band(rshift(op, 30), 3) - local imm12 = lshift(arshift(lshift(op, 10), 20), sz) - if imm12 ~= 0 then - x = "["..rn..", #"..imm12.."]" - else - x = "["..rn.."]" - end - elseif p == "K" then - local rn = map_regs.x[band(rshift(op, 5), 31)] - local imm9 = arshift(lshift(op, 11), 23) - if imm9 ~= 0 then - x = "["..rn..", #"..imm9.."]" - else - x = "["..rn.."]" - end - elseif p == "O" then - local rn, rm = map_regs.x[band(rshift(op, 5), 31)] - local m = band(rshift(op, 13), 1) - if m == 0 then - rm = map_regs.w[band(rshift(op, 16), 31)] - else - rm = map_regs.x[band(rshift(op, 16), 31)] - end - x = "["..rn..", "..rm - local opt = band(rshift(op, 13), 7) - local s = band(rshift(op, 12), 1) - local sz = band(rshift(op, 30), 3) - -- extension to be applied - if opt == 3 then - if s == 0 then x = x.."]" - else x = x..", lsl #"..sz.."]" end - elseif opt == 2 or opt == 6 or opt == 7 then - if s == 0 then x = x..", "..map_extend[opt].."]" - else x = x..", "..map_extend[opt].." #"..sz.."]" end - else - x = x.."]" - end - elseif p == "P" then - local opcv, sh = rshift(op, 26), 2 - if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end - local imm7 = lshift(arshift(lshift(op, 10), 25), sh) - local rn = map_regs.x[band(rshift(op, 5), 31)] - local ind = band(rshift(op, 23), 3) - if ind == 1 then - x = "["..rn.."], #"..imm7 - elseif ind == 2 then - if imm7 == 0 then - x = "["..rn.."]" - else - x = "["..rn..", #"..imm7.."]" - end - elseif ind == 3 then - x = "["..rn..", #"..imm7.."]!" - end - elseif p == "I" then - local shf = band(rshift(op, 22), 3) - local imm12 = band(rshift(op, 10), 0x0fff) - local rn, rd = band(rshift(op, 5), 31), band(op, 31) - if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then - name = altname - x = nil - elseif shf == 0 then - x = imm12 - elseif shf == 1 then - x = imm12..", lsl #12" - end - elseif p == "i" then - x = "#0x"..decode_imm13(op) - elseif p == "1" then - immr = band(rshift(op, 16), 63) - x = immr - elseif p == "2" then - x = band(rshift(op, 10), 63) - if altname then - local a1, a2, a3, a4, a5, a6 = - match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)") - local sf = band(rshift(op, 26), 32) - local uns = band(rshift(op, 30), 1) - if prefer_bfx(sf, uns, x, immr) then - name = a2 - x = x - immr + 1 - elseif immr == 0 and x == 7 then - local n = #operands - operands[n] = nil - if sf ~= 0 then - operands[n-1] = gsub(operands[n-1], "x", "w") - end - last = operands[n-1] - name = a6 - x = nil - elseif immr == 0 and x == 15 then - local n = #operands - operands[n] = nil - if sf ~= 0 then - operands[n-1] = gsub(operands[n-1], "x", "w") - end - last = operands[n-1] - name = a5 - x = nil - elseif x == 31 or x == 63 then - if x == 31 and immr == 0 and name == "sbfm" then - name = a4 - local n = #operands - operands[n] = nil - if sf ~= 0 then - operands[n-1] = gsub(operands[n-1], "x", "w") - end - last = operands[n-1] - else - name = a3 - end - x = nil - elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then - name = a4 - last = "#"..(sf+32 - immr) - operands[#operands] = last - x = nil - elseif x < immr then - name = a1 - last = "#"..(sf+32 - immr) - operands[#operands] = last - x = x + 1 - end - end - elseif p == "3" then - x = band(rshift(op, 10), 63) - if altname then - local a1, a2 = match(altname, "([^|]*)|(.*)") - if x < immr then - name = a1 - local sf = band(rshift(op, 26), 32) - last = "#"..(sf+32 - immr) - operands[#operands] = last - x = x + 1 - elseif x >= immr then - name = a2 - x = x - immr + 1 - end - end - elseif p == "4" then - x = band(rshift(op, 10), 63) - local rn = band(rshift(op, 5), 31) - local rm = band(rshift(op, 16), 31) - if altname and rn == rm then - local n = #operands - operands[n] = nil - last = operands[n-1] - name = altname - end - elseif p == "5" then - x = band(rshift(op, 16), 31) - elseif p == "S" then - x = band(rshift(op, 10), 63) - if x == 0 then x = nil - else x = map_shift[band(rshift(op, 22), 3)].." #"..x end - elseif p == "X" then - local opt = band(rshift(op, 13), 7) - -- Width specifier . - if opt ~= 3 and opt ~= 7 then - last = map_regs.w[band(rshift(op, 16), 31)] - operands[#operands] = last - end - x = band(rshift(op, 10), 7) - -- Extension. - if opt == 2 + band(rshift(op, 31), 1) and - band(rshift(op, second0 and 5 or 0), 31) == 31 then - if x == 0 then x = nil - else x = "lsl #"..x end - else - if x == 0 then x = map_extend[band(rshift(op, 13), 7)] - else x = map_extend[band(rshift(op, 13), 7)].." #"..x end - end - elseif p == "R" then - x = band(rshift(op,21), 3) - if x == 0 then x = nil - else x = "lsl #"..x*16 end - elseif p == "z" then - local n = #operands - if operands[n] == "sp" then operands[n] = "xzr" - elseif operands[n] == "wsp" then operands[n] = "wzr" - end - elseif p == "Z" then - x = 0 - elseif p == "F" then - x = parse_fpimm8(op) - elseif p == "g" or p == "f" or p == "x" or p == "w" or - p == "d" or p == "s" then - -- These are handled in D/N/M/A. - elseif p == "0" then - if last == "sp" or last == "wsp" then - local n = #operands - operands[n] = nil - last = operands[n-1] - if altname then - local a1, a2 = match(altname, "([^|]*)|(.*)") - if not a1 then - name = altname - elseif second0 then - name, altname = a2, a1 - else - name, altname = a1, a2 - end - end - end - second0 = true - else - assert(false) - end - if x then - last = x - if type(x) == "number" then x = "#"..x end - operands[#operands+1] = x - end - end - - return putop(ctx, name..suffix, operands) -end - ------------------------------------------------------------------------------- - --- Disassemble a block of code. -local function disass_block(ctx, ofs, len) - if not ofs then ofs = 0 end - local stop = len and ofs+len or #ctx.code - ctx.pos = ofs - ctx.rel = nil - while ctx.pos < stop do disass_ins(ctx) end -end - --- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). -local function create(code, addr, out) - local ctx = {} - ctx.code = code - ctx.addr = addr or 0 - ctx.out = out or io.write - ctx.symtab = {} - ctx.disass = disass_block - ctx.hexdump = 8 - return ctx -end - --- Simple API: disassemble code (a string) at address and output via out. -local function disass(code, addr, out) - create(code, addr, out):disass() -end - --- Return register name for RID. -local function regname(r) - if r < 32 then return map_regs.x[r] end - return map_regs.d[r-32] -end - --- Public module functions. -return { - create = create, - disass = disass, - regname = regname -} - diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua deleted file mode 100644 index a12b8e62f3..0000000000 --- a/src/jit/dis_mips.lua +++ /dev/null @@ -1,443 +0,0 @@ ----------------------------------------------------------------------------- --- LuaJIT MIPS disassembler module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- Released under the MIT/X license. See Copyright Notice in luajit.h ----------------------------------------------------------------------------- --- This is a helper module used by the LuaJIT machine code dumper module. --- --- It disassembles all standard MIPS32R1/R2 instructions. --- Default mode is big-endian, but see: dis_mipsel.lua ------------------------------------------------------------------------------- - -local type = type -local byte, format = string.byte, string.format -local match, gmatch = string.match, string.gmatch -local concat = table.concat -local bit = require("bit") -local band, bor, tohex = bit.band, bit.bor, bit.tohex -local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift - ------------------------------------------------------------------------------- --- Primary and extended opcode maps ------------------------------------------------------------------------------- - -local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", } -local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", } -local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", } - -local map_special = { - shift = 0, mask = 63, - [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, - map_movci, map_srl, "sraDTA", - "sllvDTS", false, map_srlv, "sravDTS", - "jrS", "jalrD1S", "movzDST", "movnDST", - "syscallY", "breakY", false, "sync", - "mfhiD", "mthiS", "mfloD", "mtloS", - "dsllvDST", false, "dsrlvDST", "dsravDST", - "multST", "multuST", "divST", "divuST", - "dmultST", "dmultuST", "ddivST", "ddivuST", - "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", - "andDST", "or|moveDST0", "xorDST", "nor|notDST0", - false, false, "sltDST", "sltuDST", - "daddDST", "dadduDST", "dsubDST", "dsubuDST", - "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", - "teqSTZ", false, "tneSTZ", false, - "dsllDTA", false, "dsrlDTA", "dsraDTA", - "dsll32DTA", false, "dsrl32DTA", "dsra32DTA", -} - -local map_special2 = { - shift = 0, mask = 63, - [0] = "maddST", "madduST", "mulDST", false, - "msubST", "msubuST", - [32] = "clzDS", [33] = "cloDS", - [63] = "sdbbpY", -} - -local map_bshfl = { - shift = 6, mask = 31, - [2] = "wsbhDT", - [16] = "sebDT", - [24] = "sehDT", -} - -local map_dbshfl = { - shift = 6, mask = 31, - [2] = "dsbhDT", - [5] = "dshdDT", -} - -local map_special3 = { - shift = 0, mask = 63, - [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK", - [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL", - [32] = map_bshfl, [36] = map_dbshfl, [59] = "rdhwrTD", -} - -local map_regimm = { - shift = 16, mask = 31, - [0] = "bltzSB", "bgezSB", "bltzlSB", "bgezlSB", - false, false, false, false, - "tgeiSI", "tgeiuSI", "tltiSI", "tltiuSI", - "teqiSI", false, "tneiSI", false, - "bltzalSB", "bgezalSB", "bltzallSB", "bgezallSB", - false, false, false, false, - false, false, false, false, - false, false, false, "synciSO", -} - -local map_cop0 = { - shift = 25, mask = 1, - [0] = { - shift = 21, mask = 15, - [0] = "mfc0TDW", [4] = "mtc0TDW", - [10] = "rdpgprDT", - [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", }, - [14] = "wrpgprDT", - }, { - shift = 0, mask = 63, - [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp", - [24] = "eret", [31] = "deret", - [32] = "wait", - }, -} - -local map_cop1s = { - shift = 0, mask = 63, - [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", - "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG", - "round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG", - "round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG", - false, - { shift = 16, mask = 1, [0] = "movf.sFGC", "movt.sFGC" }, - "movz.sFGT", "movn.sFGT", - false, "recip.sFG", "rsqrt.sFG", false, - false, false, false, false, - false, false, false, false, - false, "cvt.d.sFG", false, false, - "cvt.w.sFG", "cvt.l.sFG", "cvt.ps.sFGH", false, - false, false, false, false, - false, false, false, false, - "c.f.sVGH", "c.un.sVGH", "c.eq.sVGH", "c.ueq.sVGH", - "c.olt.sVGH", "c.ult.sVGH", "c.ole.sVGH", "c.ule.sVGH", - "c.sf.sVGH", "c.ngle.sVGH", "c.seq.sVGH", "c.ngl.sVGH", - "c.lt.sVGH", "c.nge.sVGH", "c.le.sVGH", "c.ngt.sVGH", -} - -local map_cop1d = { - shift = 0, mask = 63, - [0] = "add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH", - "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG", - "round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG", - "round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG", - false, - { shift = 16, mask = 1, [0] = "movf.dFGC", "movt.dFGC" }, - "movz.dFGT", "movn.dFGT", - false, "recip.dFG", "rsqrt.dFG", false, - false, false, false, false, - false, false, false, false, - "cvt.s.dFG", false, false, false, - "cvt.w.dFG", "cvt.l.dFG", false, false, - false, false, false, false, - false, false, false, false, - "c.f.dVGH", "c.un.dVGH", "c.eq.dVGH", "c.ueq.dVGH", - "c.olt.dVGH", "c.ult.dVGH", "c.ole.dVGH", "c.ule.dVGH", - "c.df.dVGH", "c.ngle.dVGH", "c.deq.dVGH", "c.ngl.dVGH", - "c.lt.dVGH", "c.nge.dVGH", "c.le.dVGH", "c.ngt.dVGH", -} - -local map_cop1ps = { - shift = 0, mask = 63, - [0] = "add.psFGH", "sub.psFGH", "mul.psFGH", false, - false, "abs.psFG", "mov.psFG", "neg.psFG", - false, false, false, false, - false, false, false, false, - false, - { shift = 16, mask = 1, [0] = "movf.psFGC", "movt.psFGC" }, - "movz.psFGT", "movn.psFGT", - false, false, false, false, - false, false, false, false, - false, false, false, false, - "cvt.s.puFG", false, false, false, - false, false, false, false, - "cvt.s.plFG", false, false, false, - "pll.psFGH", "plu.psFGH", "pul.psFGH", "puu.psFGH", - "c.f.psVGH", "c.un.psVGH", "c.eq.psVGH", "c.ueq.psVGH", - "c.olt.psVGH", "c.ult.psVGH", "c.ole.psVGH", "c.ule.psVGH", - "c.psf.psVGH", "c.ngle.psVGH", "c.pseq.psVGH", "c.ngl.psVGH", - "c.lt.psVGH", "c.nge.psVGH", "c.le.psVGH", "c.ngt.psVGH", -} - -local map_cop1w = { - shift = 0, mask = 63, - [32] = "cvt.s.wFG", [33] = "cvt.d.wFG", -} - -local map_cop1l = { - shift = 0, mask = 63, - [32] = "cvt.s.lFG", [33] = "cvt.d.lFG", -} - -local map_cop1bc = { - shift = 16, mask = 3, - [0] = "bc1fCB", "bc1tCB", "bc1flCB", "bc1tlCB", -} - -local map_cop1 = { - shift = 21, mask = 31, - [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG", - "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG", - map_cop1bc, false, false, false, - false, false, false, false, - map_cop1s, map_cop1d, false, false, - map_cop1w, map_cop1l, map_cop1ps, -} - -local map_cop1x = { - shift = 0, mask = 63, - [0] = "lwxc1FSX", "ldxc1FSX", false, false, - false, "luxc1FSX", false, false, - "swxc1FSX", "sdxc1FSX", false, false, - false, "suxc1FSX", false, "prefxMSX", - false, false, false, false, - false, false, false, false, - false, false, false, false, - false, false, "alnv.psFGHS", false, - "madd.sFRGH", "madd.dFRGH", false, false, - false, false, "madd.psFRGH", false, - "msub.sFRGH", "msub.dFRGH", false, false, - false, false, "msub.psFRGH", false, - "nmadd.sFRGH", "nmadd.dFRGH", false, false, - false, false, "nmadd.psFRGH", false, - "nmsub.sFRGH", "nmsub.dFRGH", false, false, - false, false, "nmsub.psFRGH", false, -} - -local map_pri = { - [0] = map_special, map_regimm, "jJ", "jalJ", - "beq|beqz|bST00B", "bne|bnezST0B", "blezSB", "bgtzSB", - "addiTSI", "addiu|liTS0I", "sltiTSI", "sltiuTSI", - "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU", - map_cop0, map_cop1, false, map_cop1x, - "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB", - "daddiTSI", "daddiuTSI", false, false, - map_special2, "jalxJ", false, map_special3, - "lbTSO", "lhTSO", "lwlTSO", "lwTSO", - "lbuTSO", "lhuTSO", "lwrTSO", false, - "sbTSO", "shTSO", "swlTSO", "swTSO", - false, false, "swrTSO", "cacheNSO", - "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO", - false, "ldc1HSO", "ldc2TSO", "ldTSO", - "scTSO", "swc1HSO", "swc2TSO", false, - false, "sdc1HSO", "sdc2TSO", "sdTSO", -} - ------------------------------------------------------------------------------- - -local map_gpr = { - [0] = "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", - "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", - "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", - "r24", "r25", "r26", "r27", "r28", "sp", "r30", "ra", -} - ------------------------------------------------------------------------------- - --- Output a nicely formatted line with an opcode and operands. -local function putop(ctx, text, operands) - local pos = ctx.pos - local extra = "" - if ctx.rel then - local sym = ctx.symtab[ctx.rel] - if sym then extra = "\t->"..sym end - end - if ctx.hexdump > 0 then - ctx.out(format("%08x %s %-7s %s%s\n", - ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) - else - ctx.out(format("%08x %-7s %s%s\n", - ctx.addr+pos, text, concat(operands, ", "), extra)) - end - ctx.pos = pos + 4 -end - --- Fallback for unknown opcodes. -local function unknown(ctx) - return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) -end - -local function get_be(ctx) - local pos = ctx.pos - local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) - return bor(lshift(b0, 24), lshift(b1, 16), lshift(b2, 8), b3) -end - -local function get_le(ctx) - local pos = ctx.pos - local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) - return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) -end - --- Disassemble a single instruction. -local function disass_ins(ctx) - local op = ctx:get() - local operands = {} - local last = nil - ctx.op = op - ctx.rel = nil - - local opat = map_pri[rshift(op, 26)] - while type(opat) ~= "string" do - if not opat then return unknown(ctx) end - opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ - end - local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") - local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") - if altname then pat = pat2 end - - for p in gmatch(pat, ".") do - local x = nil - if p == "S" then - x = map_gpr[band(rshift(op, 21), 31)] - elseif p == "T" then - x = map_gpr[band(rshift(op, 16), 31)] - elseif p == "D" then - x = map_gpr[band(rshift(op, 11), 31)] - elseif p == "F" then - x = "f"..band(rshift(op, 6), 31) - elseif p == "G" then - x = "f"..band(rshift(op, 11), 31) - elseif p == "H" then - x = "f"..band(rshift(op, 16), 31) - elseif p == "R" then - x = "f"..band(rshift(op, 21), 31) - elseif p == "A" then - x = band(rshift(op, 6), 31) - elseif p == "E" then - x = band(rshift(op, 6), 31) + 32 - elseif p == "M" then - x = band(rshift(op, 11), 31) - elseif p == "N" then - x = band(rshift(op, 16), 31) - elseif p == "C" then - x = band(rshift(op, 18), 7) - if x == 0 then x = nil end - elseif p == "K" then - x = band(rshift(op, 11), 31) + 1 - elseif p == "P" then - x = band(rshift(op, 11), 31) + 33 - elseif p == "L" then - x = band(rshift(op, 11), 31) - last + 1 - elseif p == "Q" then - x = band(rshift(op, 11), 31) - last + 33 - elseif p == "I" then - x = arshift(lshift(op, 16), 16) - elseif p == "U" then - x = band(op, 0xffff) - elseif p == "O" then - local disp = arshift(lshift(op, 16), 16) - operands[#operands] = format("%d(%s)", disp, last) - elseif p == "X" then - local index = map_gpr[band(rshift(op, 16), 31)] - operands[#operands] = format("%s(%s)", index, last) - elseif p == "B" then - x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4 - ctx.rel = x - x = format("0x%08x", x) - elseif p == "J" then - local a = ctx.addr + ctx.pos - x = a - band(a, 0x0fffffff) + band(op, 0x03ffffff)*4 - ctx.rel = x - x = format("0x%08x", x) - elseif p == "V" then - x = band(rshift(op, 8), 7) - if x == 0 then x = nil end - elseif p == "W" then - x = band(op, 7) - if x == 0 then x = nil end - elseif p == "Y" then - x = band(rshift(op, 6), 0x000fffff) - if x == 0 then x = nil end - elseif p == "Z" then - x = band(rshift(op, 6), 1023) - if x == 0 then x = nil end - elseif p == "0" then - if last == "r0" or last == 0 then - local n = #operands - operands[n] = nil - last = operands[n-1] - if altname then - local a1, a2 = match(altname, "([^|]*)|(.*)") - if a1 then name, altname = a1, a2 - else name = altname end - end - end - elseif p == "1" then - if last == "ra" then - operands[#operands] = nil - end - else - assert(false) - end - if x then operands[#operands+1] = x; last = x end - end - - return putop(ctx, name, operands) -end - ------------------------------------------------------------------------------- - --- Disassemble a block of code. -local function disass_block(ctx, ofs, len) - if not ofs then ofs = 0 end - local stop = len and ofs+len or #ctx.code - stop = stop - stop % 4 - ctx.pos = ofs - ofs % 4 - ctx.rel = nil - while ctx.pos < stop do disass_ins(ctx) end -end - --- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). -local function create(code, addr, out) - local ctx = {} - ctx.code = code - ctx.addr = addr or 0 - ctx.out = out or io.write - ctx.symtab = {} - ctx.disass = disass_block - ctx.hexdump = 8 - ctx.get = get_be - return ctx -end - -local function create_el(code, addr, out) - local ctx = create(code, addr, out) - ctx.get = get_le - return ctx -end - --- Simple API: disassemble code (a string) at address and output via out. -local function disass(code, addr, out) - create(code, addr, out):disass() -end - -local function disass_el(code, addr, out) - create_el(code, addr, out):disass() -end - --- Return register name for RID. -local function regname(r) - if r < 32 then return map_gpr[r] end - return "f"..(r-32) -end - --- Public module functions. -return { - create = create, - create_el = create_el, - disass = disass, - disass_el = disass_el, - regname = regname -} - diff --git a/src/jit/dis_mips64.lua b/src/jit/dis_mips64.lua deleted file mode 100644 index c4374928ab..0000000000 --- a/src/jit/dis_mips64.lua +++ /dev/null @@ -1,17 +0,0 @@ ----------------------------------------------------------------------------- --- LuaJIT MIPS64 disassembler wrapper module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- Released under the MIT license. See Copyright Notice in luajit.h ----------------------------------------------------------------------------- --- This module just exports the big-endian functions from the --- MIPS disassembler module. All the interesting stuff is there. ------------------------------------------------------------------------------- - -local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") -return { - create = dis_mips.create, - disass = dis_mips.disass, - regname = dis_mips.regname -} - diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64el.lua deleted file mode 100644 index 2b1470af50..0000000000 --- a/src/jit/dis_mips64el.lua +++ /dev/null @@ -1,17 +0,0 @@ ----------------------------------------------------------------------------- --- LuaJIT MIPS64EL disassembler wrapper module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- Released under the MIT license. See Copyright Notice in luajit.h ----------------------------------------------------------------------------- --- This module just exports the little-endian functions from the --- MIPS disassembler module. All the interesting stuff is there. ------------------------------------------------------------------------------- - -local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") -return { - create = dis_mips.create_el, - disass = dis_mips.disass_el, - regname = dis_mips.regname -} - diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua deleted file mode 100644 index f69b11f01f..0000000000 --- a/src/jit/dis_mipsel.lua +++ /dev/null @@ -1,17 +0,0 @@ ----------------------------------------------------------------------------- --- LuaJIT MIPSEL disassembler wrapper module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- Released under the MIT license. See Copyright Notice in luajit.h ----------------------------------------------------------------------------- --- This module just exports the little-endian functions from the --- MIPS disassembler module. All the interesting stuff is there. ------------------------------------------------------------------------------- - -local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") -return { - create = dis_mips.create_el, - disass = dis_mips.disass_el, - regname = dis_mips.regname -} - diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua deleted file mode 100644 index 2aeb1b2924..0000000000 --- a/src/jit/dis_ppc.lua +++ /dev/null @@ -1,591 +0,0 @@ ----------------------------------------------------------------------------- --- LuaJIT PPC disassembler module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- Released under the MIT/X license. See Copyright Notice in luajit.h ----------------------------------------------------------------------------- --- This is a helper module used by the LuaJIT machine code dumper module. --- --- It disassembles all common, non-privileged 32/64 bit PowerPC instructions --- plus the e500 SPE instructions and some Cell/Xenon extensions. --- --- NYI: VMX, VMX128 ------------------------------------------------------------------------------- - -local type = type -local byte, format = string.byte, string.format -local match, gmatch, gsub = string.match, string.gmatch, string.gsub -local concat = table.concat -local bit = require("bit") -local band, bor, tohex = bit.band, bit.bor, bit.tohex -local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift - ------------------------------------------------------------------------------- --- Primary and extended opcode maps ------------------------------------------------------------------------------- - -local map_crops = { - shift = 1, mask = 1023, - [0] = "mcrfXX", - [33] = "crnor|crnotCCC=", [129] = "crandcCCC", - [193] = "crxor|crclrCCC%", [225] = "crnandCCC", - [257] = "crandCCC", [289] = "creqv|crsetCCC%", - [417] = "crorcCCC", [449] = "cror|crmoveCCC=", - [16] = "b_lrKB", [528] = "b_ctrKB", - [150] = "isync", -} - -local map_rlwinm = setmetatable({ - shift = 0, mask = -1, -}, -{ __index = function(t, x) - local rot = band(rshift(x, 11), 31) - local mb = band(rshift(x, 6), 31) - local me = band(rshift(x, 1), 31) - if mb == 0 and me == 31-rot then - return "slwiRR~A." - elseif me == 31 and mb == 32-rot then - return "srwiRR~-A." - else - return "rlwinmRR~AAA." - end - end -}) - -local map_rld = { - shift = 2, mask = 7, - [0] = "rldiclRR~HM.", "rldicrRR~HM.", "rldicRR~HM.", "rldimiRR~HM.", - { - shift = 1, mask = 1, - [0] = "rldclRR~RM.", "rldcrRR~RM.", - }, -} - -local map_ext = setmetatable({ - shift = 1, mask = 1023, - - [0] = "cmp_YLRR", [32] = "cmpl_YLRR", - [4] = "twARR", [68] = "tdARR", - - [8] = "subfcRRR.", [40] = "subfRRR.", - [104] = "negRR.", [136] = "subfeRRR.", - [200] = "subfzeRR.", [232] = "subfmeRR.", - [520] = "subfcoRRR.", [552] = "subfoRRR.", - [616] = "negoRR.", [648] = "subfeoRRR.", - [712] = "subfzeoRR.", [744] = "subfmeoRR.", - - [9] = "mulhduRRR.", [73] = "mulhdRRR.", [233] = "mulldRRR.", - [457] = "divduRRR.", [489] = "divdRRR.", - [745] = "mulldoRRR.", - [969] = "divduoRRR.", [1001] = "divdoRRR.", - - [10] = "addcRRR.", [138] = "addeRRR.", - [202] = "addzeRR.", [234] = "addmeRR.", [266] = "addRRR.", - [522] = "addcoRRR.", [650] = "addeoRRR.", - [714] = "addzeoRR.", [746] = "addmeoRR.", [778] = "addoRRR.", - - [11] = "mulhwuRRR.", [75] = "mulhwRRR.", [235] = "mullwRRR.", - [459] = "divwuRRR.", [491] = "divwRRR.", - [747] = "mullwoRRR.", - [971] = "divwouRRR.", [1003] = "divwoRRR.", - - [15] = "iselltRRR", [47] = "iselgtRRR", [79] = "iseleqRRR", - - [144] = { shift = 20, mask = 1, [0] = "mtcrfRZ~", "mtocrfRZ~", }, - [19] = { shift = 20, mask = 1, [0] = "mfcrR", "mfocrfRZ", }, - [371] = { shift = 11, mask = 1023, [392] = "mftbR", [424] = "mftbuR", }, - [339] = { - shift = 11, mask = 1023, - [32] = "mferR", [256] = "mflrR", [288] = "mfctrR", [16] = "mfspefscrR", - }, - [467] = { - shift = 11, mask = 1023, - [32] = "mtxerR", [256] = "mtlrR", [288] = "mtctrR", [16] = "mtspefscrR", - }, - - [20] = "lwarxRR0R", [84] = "ldarxRR0R", - - [21] = "ldxRR0R", [53] = "lduxRRR", - [149] = "stdxRR0R", [181] = "stduxRRR", - [341] = "lwaxRR0R", [373] = "lwauxRRR", - - [23] = "lwzxRR0R", [55] = "lwzuxRRR", - [87] = "lbzxRR0R", [119] = "lbzuxRRR", - [151] = "stwxRR0R", [183] = "stwuxRRR", - [215] = "stbxRR0R", [247] = "stbuxRRR", - [279] = "lhzxRR0R", [311] = "lhzuxRRR", - [343] = "lhaxRR0R", [375] = "lhauxRRR", - [407] = "sthxRR0R", [439] = "sthuxRRR", - - [54] = "dcbst-R0R", [86] = "dcbf-R0R", - [150] = "stwcxRR0R.", [214] = "stdcxRR0R.", - [246] = "dcbtst-R0R", [278] = "dcbt-R0R", - [310] = "eciwxRR0R", [438] = "ecowxRR0R", - [470] = "dcbi-RR", - - [598] = { - shift = 21, mask = 3, - [0] = "sync", "lwsync", "ptesync", - }, - [758] = "dcba-RR", - [854] = "eieio", [982] = "icbi-R0R", [1014] = "dcbz-R0R", - - [26] = "cntlzwRR~", [58] = "cntlzdRR~", - [122] = "popcntbRR~", - [154] = "prtywRR~", [186] = "prtydRR~", - - [28] = "andRR~R.", [60] = "andcRR~R.", [124] = "nor|notRR~R=.", - [284] = "eqvRR~R.", [316] = "xorRR~R.", - [412] = "orcRR~R.", [444] = "or|mrRR~R=.", [476] = "nandRR~R.", - [508] = "cmpbRR~R", - - [512] = "mcrxrX", - - [532] = "ldbrxRR0R", [660] = "stdbrxRR0R", - - [533] = "lswxRR0R", [597] = "lswiRR0A", - [661] = "stswxRR0R", [725] = "stswiRR0A", - - [534] = "lwbrxRR0R", [662] = "stwbrxRR0R", - [790] = "lhbrxRR0R", [918] = "sthbrxRR0R", - - [535] = "lfsxFR0R", [567] = "lfsuxFRR", - [599] = "lfdxFR0R", [631] = "lfduxFRR", - [663] = "stfsxFR0R", [695] = "stfsuxFRR", - [727] = "stfdxFR0R", [759] = "stfduxFR0R", - [855] = "lfiwaxFR0R", - [983] = "stfiwxFR0R", - - [24] = "slwRR~R.", - - [27] = "sldRR~R.", [536] = "srwRR~R.", - [792] = "srawRR~R.", [824] = "srawiRR~A.", - - [794] = "sradRR~R.", [826] = "sradiRR~H.", [827] = "sradiRR~H.", - [922] = "extshRR~.", [954] = "extsbRR~.", [986] = "extswRR~.", - - [539] = "srdRR~R.", -}, -{ __index = function(t, x) - if band(x, 31) == 15 then return "iselRRRC" end - end -}) - -local map_ld = { - shift = 0, mask = 3, - [0] = "ldRRE", "lduRRE", "lwaRRE", -} - -local map_std = { - shift = 0, mask = 3, - [0] = "stdRRE", "stduRRE", -} - -local map_fps = { - shift = 5, mask = 1, - { - shift = 1, mask = 15, - [0] = false, false, "fdivsFFF.", false, - "fsubsFFF.", "faddsFFF.", "fsqrtsF-F.", false, - "fresF-F.", "fmulsFF-F.", "frsqrtesF-F.", false, - "fmsubsFFFF~.", "fmaddsFFFF~.", "fnmsubsFFFF~.", "fnmaddsFFFF~.", - } -} - -local map_fpd = { - shift = 5, mask = 1, - [0] = { - shift = 1, mask = 1023, - [0] = "fcmpuXFF", [32] = "fcmpoXFF", [64] = "mcrfsXX", - [38] = "mtfsb1A.", [70] = "mtfsb0A.", [134] = "mtfsfiA>>-A>", - [8] = "fcpsgnFFF.", [40] = "fnegF-F.", [72] = "fmrF-F.", - [136] = "fnabsF-F.", [264] = "fabsF-F.", - [12] = "frspF-F.", - [14] = "fctiwF-F.", [15] = "fctiwzF-F.", - [583] = "mffsF.", [711] = "mtfsfZF.", - [392] = "frinF-F.", [424] = "frizF-F.", - [456] = "fripF-F.", [488] = "frimF-F.", - [814] = "fctidF-F.", [815] = "fctidzF-F.", [846] = "fcfidF-F.", - }, - { - shift = 1, mask = 15, - [0] = false, false, "fdivFFF.", false, - "fsubFFF.", "faddFFF.", "fsqrtF-F.", "fselFFFF~.", - "freF-F.", "fmulFF-F.", "frsqrteF-F.", false, - "fmsubFFFF~.", "fmaddFFFF~.", "fnmsubFFFF~.", "fnmaddFFFF~.", - } -} - -local map_spe = { - shift = 0, mask = 2047, - - [512] = "evaddwRRR", [514] = "evaddiwRAR~", - [516] = "evsubwRRR~", [518] = "evsubiwRAR~", - [520] = "evabsRR", [521] = "evnegRR", - [522] = "evextsbRR", [523] = "evextshRR", [524] = "evrndwRR", - [525] = "evcntlzwRR", [526] = "evcntlswRR", - - [527] = "brincRRR", - - [529] = "evandRRR", [530] = "evandcRRR", [534] = "evxorRRR", - [535] = "evor|evmrRRR=", [536] = "evnor|evnotRRR=", - [537] = "eveqvRRR", [539] = "evorcRRR", [542] = "evnandRRR", - - [544] = "evsrwuRRR", [545] = "evsrwsRRR", - [546] = "evsrwiuRRA", [547] = "evsrwisRRA", - [548] = "evslwRRR", [550] = "evslwiRRA", - [552] = "evrlwRRR", [553] = "evsplatiRS", - [554] = "evrlwiRRA", [555] = "evsplatfiRS", - [556] = "evmergehiRRR", [557] = "evmergeloRRR", - [558] = "evmergehiloRRR", [559] = "evmergelohiRRR", - - [560] = "evcmpgtuYRR", [561] = "evcmpgtsYRR", - [562] = "evcmpltuYRR", [563] = "evcmpltsYRR", - [564] = "evcmpeqYRR", - - [632] = "evselRRR", [633] = "evselRRRW", - [634] = "evselRRRW", [635] = "evselRRRW", - [636] = "evselRRRW", [637] = "evselRRRW", - [638] = "evselRRRW", [639] = "evselRRRW", - - [640] = "evfsaddRRR", [641] = "evfssubRRR", - [644] = "evfsabsRR", [645] = "evfsnabsRR", [646] = "evfsnegRR", - [648] = "evfsmulRRR", [649] = "evfsdivRRR", - [652] = "evfscmpgtYRR", [653] = "evfscmpltYRR", [654] = "evfscmpeqYRR", - [656] = "evfscfuiR-R", [657] = "evfscfsiR-R", - [658] = "evfscfufR-R", [659] = "evfscfsfR-R", - [660] = "evfsctuiR-R", [661] = "evfsctsiR-R", - [662] = "evfsctufR-R", [663] = "evfsctsfR-R", - [664] = "evfsctuizR-R", [666] = "evfsctsizR-R", - [668] = "evfststgtYRR", [669] = "evfststltYRR", [670] = "evfststeqYRR", - - [704] = "efsaddRRR", [705] = "efssubRRR", - [708] = "efsabsRR", [709] = "efsnabsRR", [710] = "efsnegRR", - [712] = "efsmulRRR", [713] = "efsdivRRR", - [716] = "efscmpgtYRR", [717] = "efscmpltYRR", [718] = "efscmpeqYRR", - [719] = "efscfdR-R", - [720] = "efscfuiR-R", [721] = "efscfsiR-R", - [722] = "efscfufR-R", [723] = "efscfsfR-R", - [724] = "efsctuiR-R", [725] = "efsctsiR-R", - [726] = "efsctufR-R", [727] = "efsctsfR-R", - [728] = "efsctuizR-R", [730] = "efsctsizR-R", - [732] = "efststgtYRR", [733] = "efststltYRR", [734] = "efststeqYRR", - - [736] = "efdaddRRR", [737] = "efdsubRRR", - [738] = "efdcfuidR-R", [739] = "efdcfsidR-R", - [740] = "efdabsRR", [741] = "efdnabsRR", [742] = "efdnegRR", - [744] = "efdmulRRR", [745] = "efddivRRR", - [746] = "efdctuidzR-R", [747] = "efdctsidzR-R", - [748] = "efdcmpgtYRR", [749] = "efdcmpltYRR", [750] = "efdcmpeqYRR", - [751] = "efdcfsR-R", - [752] = "efdcfuiR-R", [753] = "efdcfsiR-R", - [754] = "efdcfufR-R", [755] = "efdcfsfR-R", - [756] = "efdctuiR-R", [757] = "efdctsiR-R", - [758] = "efdctufR-R", [759] = "efdctsfR-R", - [760] = "efdctuizR-R", [762] = "efdctsizR-R", - [764] = "efdtstgtYRR", [765] = "efdtstltYRR", [766] = "efdtsteqYRR", - - [768] = "evlddxRR0R", [769] = "evlddRR8", - [770] = "evldwxRR0R", [771] = "evldwRR8", - [772] = "evldhxRR0R", [773] = "evldhRR8", - [776] = "evlhhesplatxRR0R", [777] = "evlhhesplatRR2", - [780] = "evlhhousplatxRR0R", [781] = "evlhhousplatRR2", - [782] = "evlhhossplatxRR0R", [783] = "evlhhossplatRR2", - [784] = "evlwhexRR0R", [785] = "evlwheRR4", - [788] = "evlwhouxRR0R", [789] = "evlwhouRR4", - [790] = "evlwhosxRR0R", [791] = "evlwhosRR4", - [792] = "evlwwsplatxRR0R", [793] = "evlwwsplatRR4", - [796] = "evlwhsplatxRR0R", [797] = "evlwhsplatRR4", - - [800] = "evstddxRR0R", [801] = "evstddRR8", - [802] = "evstdwxRR0R", [803] = "evstdwRR8", - [804] = "evstdhxRR0R", [805] = "evstdhRR8", - [816] = "evstwhexRR0R", [817] = "evstwheRR4", - [820] = "evstwhoxRR0R", [821] = "evstwhoRR4", - [824] = "evstwwexRR0R", [825] = "evstwweRR4", - [828] = "evstwwoxRR0R", [829] = "evstwwoRR4", - - [1027] = "evmhessfRRR", [1031] = "evmhossfRRR", [1032] = "evmheumiRRR", - [1033] = "evmhesmiRRR", [1035] = "evmhesmfRRR", [1036] = "evmhoumiRRR", - [1037] = "evmhosmiRRR", [1039] = "evmhosmfRRR", [1059] = "evmhessfaRRR", - [1063] = "evmhossfaRRR", [1064] = "evmheumiaRRR", [1065] = "evmhesmiaRRR", - [1067] = "evmhesmfaRRR", [1068] = "evmhoumiaRRR", [1069] = "evmhosmiaRRR", - [1071] = "evmhosmfaRRR", [1095] = "evmwhssfRRR", [1096] = "evmwlumiRRR", - [1100] = "evmwhumiRRR", [1101] = "evmwhsmiRRR", [1103] = "evmwhsmfRRR", - [1107] = "evmwssfRRR", [1112] = "evmwumiRRR", [1113] = "evmwsmiRRR", - [1115] = "evmwsmfRRR", [1127] = "evmwhssfaRRR", [1128] = "evmwlumiaRRR", - [1132] = "evmwhumiaRRR", [1133] = "evmwhsmiaRRR", [1135] = "evmwhsmfaRRR", - [1139] = "evmwssfaRRR", [1144] = "evmwumiaRRR", [1145] = "evmwsmiaRRR", - [1147] = "evmwsmfaRRR", - - [1216] = "evaddusiaawRR", [1217] = "evaddssiaawRR", - [1218] = "evsubfusiaawRR", [1219] = "evsubfssiaawRR", - [1220] = "evmraRR", - [1222] = "evdivwsRRR", [1223] = "evdivwuRRR", - [1224] = "evaddumiaawRR", [1225] = "evaddsmiaawRR", - [1226] = "evsubfumiaawRR", [1227] = "evsubfsmiaawRR", - - [1280] = "evmheusiaawRRR", [1281] = "evmhessiaawRRR", - [1283] = "evmhessfaawRRR", [1284] = "evmhousiaawRRR", - [1285] = "evmhossiaawRRR", [1287] = "evmhossfaawRRR", - [1288] = "evmheumiaawRRR", [1289] = "evmhesmiaawRRR", - [1291] = "evmhesmfaawRRR", [1292] = "evmhoumiaawRRR", - [1293] = "evmhosmiaawRRR", [1295] = "evmhosmfaawRRR", - [1320] = "evmhegumiaaRRR", [1321] = "evmhegsmiaaRRR", - [1323] = "evmhegsmfaaRRR", [1324] = "evmhogumiaaRRR", - [1325] = "evmhogsmiaaRRR", [1327] = "evmhogsmfaaRRR", - [1344] = "evmwlusiaawRRR", [1345] = "evmwlssiaawRRR", - [1352] = "evmwlumiaawRRR", [1353] = "evmwlsmiaawRRR", - [1363] = "evmwssfaaRRR", [1368] = "evmwumiaaRRR", - [1369] = "evmwsmiaaRRR", [1371] = "evmwsmfaaRRR", - [1408] = "evmheusianwRRR", [1409] = "evmhessianwRRR", - [1411] = "evmhessfanwRRR", [1412] = "evmhousianwRRR", - [1413] = "evmhossianwRRR", [1415] = "evmhossfanwRRR", - [1416] = "evmheumianwRRR", [1417] = "evmhesmianwRRR", - [1419] = "evmhesmfanwRRR", [1420] = "evmhoumianwRRR", - [1421] = "evmhosmianwRRR", [1423] = "evmhosmfanwRRR", - [1448] = "evmhegumianRRR", [1449] = "evmhegsmianRRR", - [1451] = "evmhegsmfanRRR", [1452] = "evmhogumianRRR", - [1453] = "evmhogsmianRRR", [1455] = "evmhogsmfanRRR", - [1472] = "evmwlusianwRRR", [1473] = "evmwlssianwRRR", - [1480] = "evmwlumianwRRR", [1481] = "evmwlsmianwRRR", - [1491] = "evmwssfanRRR", [1496] = "evmwumianRRR", - [1497] = "evmwsmianRRR", [1499] = "evmwsmfanRRR", -} - -local map_pri = { - [0] = false, false, "tdiARI", "twiARI", - map_spe, false, false, "mulliRRI", - "subficRRI", false, "cmpl_iYLRU", "cmp_iYLRI", - "addicRRI", "addic.RRI", "addi|liRR0I", "addis|lisRR0I", - "b_KBJ", "sc", "bKJ", map_crops, - "rlwimiRR~AAA.", map_rlwinm, false, "rlwnmRR~RAA.", - "oriNRR~U", "orisRR~U", "xoriRR~U", "xorisRR~U", - "andi.RR~U", "andis.RR~U", map_rld, map_ext, - "lwzRRD", "lwzuRRD", "lbzRRD", "lbzuRRD", - "stwRRD", "stwuRRD", "stbRRD", "stbuRRD", - "lhzRRD", "lhzuRRD", "lhaRRD", "lhauRRD", - "sthRRD", "sthuRRD", "lmwRRD", "stmwRRD", - "lfsFRD", "lfsuFRD", "lfdFRD", "lfduFRD", - "stfsFRD", "stfsuFRD", "stfdFRD", "stfduFRD", - false, false, map_ld, map_fps, - false, false, map_std, map_fpd, -} - ------------------------------------------------------------------------------- - -local map_gpr = { - [0] = "r0", "sp", "r2", "r3", "r4", "r5", "r6", "r7", - "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", - "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", - "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", -} - -local map_cond = { [0] = "lt", "gt", "eq", "so", "ge", "le", "ne", "ns", } - --- Format a condition bit. -local function condfmt(cond) - if cond <= 3 then - return map_cond[band(cond, 3)] - else - return format("4*cr%d+%s", rshift(cond, 2), map_cond[band(cond, 3)]) - end -end - ------------------------------------------------------------------------------- - --- Output a nicely formatted line with an opcode and operands. -local function putop(ctx, text, operands) - local pos = ctx.pos - local extra = "" - if ctx.rel then - local sym = ctx.symtab[ctx.rel] - if sym then extra = "\t->"..sym end - end - if ctx.hexdump > 0 then - ctx.out(format("%08x %s %-7s %s%s\n", - ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) - else - ctx.out(format("%08x %-7s %s%s\n", - ctx.addr+pos, text, concat(operands, ", "), extra)) - end - ctx.pos = pos + 4 -end - --- Fallback for unknown opcodes. -local function unknown(ctx) - return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) -end - --- Disassemble a single instruction. -local function disass_ins(ctx) - local pos = ctx.pos - local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) - local op = bor(lshift(b0, 24), lshift(b1, 16), lshift(b2, 8), b3) - local operands = {} - local last = nil - local rs = 21 - ctx.op = op - ctx.rel = nil - - local opat = map_pri[rshift(b0, 2)] - while type(opat) ~= "string" do - if not opat then return unknown(ctx) end - opat = opat[band(rshift(op, opat.shift), opat.mask)] - end - local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") - local altname, pat2 = match(pat, "|([a-z0-9_.]*)(.*)") - if altname then pat = pat2 end - - for p in gmatch(pat, ".") do - local x = nil - if p == "R" then - x = map_gpr[band(rshift(op, rs), 31)] - rs = rs - 5 - elseif p == "F" then - x = "f"..band(rshift(op, rs), 31) - rs = rs - 5 - elseif p == "A" then - x = band(rshift(op, rs), 31) - rs = rs - 5 - elseif p == "S" then - x = arshift(lshift(op, 27-rs), 27) - rs = rs - 5 - elseif p == "I" then - x = arshift(lshift(op, 16), 16) - elseif p == "U" then - x = band(op, 0xffff) - elseif p == "D" or p == "E" then - local disp = arshift(lshift(op, 16), 16) - if p == "E" then disp = band(disp, -4) end - if last == "r0" then last = "0" end - operands[#operands] = format("%d(%s)", disp, last) - elseif p >= "2" and p <= "8" then - local disp = band(rshift(op, rs), 31) * p - if last == "r0" then last = "0" end - operands[#operands] = format("%d(%s)", disp, last) - elseif p == "H" then - x = band(rshift(op, rs), 31) + lshift(band(op, 2), 4) - rs = rs - 5 - elseif p == "M" then - x = band(rshift(op, rs), 31) + band(op, 0x20) - elseif p == "C" then - x = condfmt(band(rshift(op, rs), 31)) - rs = rs - 5 - elseif p == "B" then - local bo = rshift(op, 21) - local cond = band(rshift(op, 16), 31) - local cn = "" - rs = rs - 10 - if band(bo, 4) == 0 then - cn = band(bo, 2) == 0 and "dnz" or "dz" - if band(bo, 0x10) == 0 then - cn = cn..(band(bo, 8) == 0 and "f" or "t") - end - if band(bo, 0x10) == 0 then x = condfmt(cond) end - name = name..(band(bo, 1) == band(rshift(op, 15), 1) and "-" or "+") - elseif band(bo, 0x10) == 0 then - cn = map_cond[band(cond, 3) + (band(bo, 8) == 0 and 4 or 0)] - if cond > 3 then x = "cr"..rshift(cond, 2) end - name = name..(band(bo, 1) == band(rshift(op, 15), 1) and "-" or "+") - end - name = gsub(name, "_", cn) - elseif p == "J" then - x = arshift(lshift(op, 27-rs), 29-rs)*4 - if band(op, 2) == 0 then x = ctx.addr + pos + x end - ctx.rel = x - x = "0x"..tohex(x) - elseif p == "K" then - if band(op, 1) ~= 0 then name = name.."l" end - if band(op, 2) ~= 0 then name = name.."a" end - elseif p == "X" or p == "Y" then - x = band(rshift(op, rs+2), 7) - if x == 0 and p == "Y" then x = nil else x = "cr"..x end - rs = rs - 5 - elseif p == "W" then - x = "cr"..band(op, 7) - elseif p == "Z" then - x = band(rshift(op, rs-4), 255) - rs = rs - 10 - elseif p == ">" then - operands[#operands] = rshift(operands[#operands], 1) - elseif p == "0" then - if last == "r0" then - operands[#operands] = nil - if altname then name = altname end - end - elseif p == "L" then - name = gsub(name, "_", band(op, 0x00200000) ~= 0 and "d" or "w") - elseif p == "." then - if band(op, 1) == 1 then name = name.."." end - elseif p == "N" then - if op == 0x60000000 then name = "nop"; break end - elseif p == "~" then - local n = #operands - operands[n-1], operands[n] = operands[n], operands[n-1] - elseif p == "=" then - local n = #operands - if last == operands[n-1] then - operands[n] = nil - name = altname - end - elseif p == "%" then - local n = #operands - if last == operands[n-1] and last == operands[n-2] then - operands[n] = nil - operands[n-1] = nil - name = altname - end - elseif p == "-" then - rs = rs - 5 - else - assert(false) - end - if x then operands[#operands+1] = x; last = x end - end - - return putop(ctx, name, operands) -end - ------------------------------------------------------------------------------- - --- Disassemble a block of code. -local function disass_block(ctx, ofs, len) - if not ofs then ofs = 0 end - local stop = len and ofs+len or #ctx.code - stop = stop - stop % 4 - ctx.pos = ofs - ofs % 4 - ctx.rel = nil - while ctx.pos < stop do disass_ins(ctx) end -end - --- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). -local function create(code, addr, out) - local ctx = {} - ctx.code = code - ctx.addr = addr or 0 - ctx.out = out or io.write - ctx.symtab = {} - ctx.disass = disass_block - ctx.hexdump = 8 - return ctx -end - --- Simple API: disassemble code (a string) at address and output via out. -local function disass(code, addr, out) - create(code, addr, out):disass() -end - --- Return register name for RID. -local function regname(r) - if r < 32 then return map_gpr[r] end - return "f"..(r-32) -end - --- Public module functions. -return { - create = create, - disass = disass, - regname = regname -} - diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua deleted file mode 100644 index d5714ee1f7..0000000000 --- a/src/jit/dis_x64.lua +++ /dev/null @@ -1,17 +0,0 @@ ----------------------------------------------------------------------------- --- LuaJIT x64 disassembler wrapper module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- Released under the MIT license. See Copyright Notice in luajit.h ----------------------------------------------------------------------------- --- This module just exports the 64 bit functions from the combined --- x86/x64 disassembler module. All the interesting stuff is there. ------------------------------------------------------------------------------- - -local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86") -return { - create = dis_x86.create64, - disass = dis_x86.disass64, - regname = dis_x86.regname64 -} - diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua deleted file mode 100644 index 4371233d2b..0000000000 --- a/src/jit/dis_x86.lua +++ /dev/null @@ -1,931 +0,0 @@ ----------------------------------------------------------------------------- --- LuaJIT x86/x64 disassembler module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- Released under the MIT license. See Copyright Notice in luajit.h ----------------------------------------------------------------------------- --- This is a helper module used by the LuaJIT machine code dumper module. --- --- Sending small code snippets to an external disassembler and mixing the --- output with our own stuff was too fragile. So I had to bite the bullet --- and write yet another x86 disassembler. Oh well ... --- --- The output format is very similar to what ndisasm generates. But it has --- been developed independently by looking at the opcode tables from the --- Intel and AMD manuals. The supported instruction set is quite extensive --- and reflects what a current generation Intel or AMD CPU implements in --- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, --- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor --- (VMX/SVM) instructions. --- --- Notes: --- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. --- * No attempt at optimization has been made -- it's fast enough for my needs. ------------------------------------------------------------------------------- - -local type = type -local sub, byte, format = string.sub, string.byte, string.format -local match, gmatch, gsub = string.match, string.gmatch, string.gsub -local lower, rep = string.lower, string.rep -local bit = require("bit") -local tohex = bit.tohex - --- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. -local map_opc1_32 = { ---0x -[0]="addBmr","addVmr","addBrm","addVrm","addBai","addVai","push es","pop es", -"orBmr","orVmr","orBrm","orVrm","orBai","orVai","push cs","opc2*", ---1x -"adcBmr","adcVmr","adcBrm","adcVrm","adcBai","adcVai","push ss","pop ss", -"sbbBmr","sbbVmr","sbbBrm","sbbVrm","sbbBai","sbbVai","push ds","pop ds", ---2x -"andBmr","andVmr","andBrm","andVrm","andBai","andVai","es:seg","daa", -"subBmr","subVmr","subBrm","subVrm","subBai","subVai","cs:seg","das", ---3x -"xorBmr","xorVmr","xorBrm","xorVrm","xorBai","xorVai","ss:seg","aaa", -"cmpBmr","cmpVmr","cmpBrm","cmpVrm","cmpBai","cmpVai","ds:seg","aas", ---4x -"incVR","incVR","incVR","incVR","incVR","incVR","incVR","incVR", -"decVR","decVR","decVR","decVR","decVR","decVR","decVR","decVR", ---5x -"pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR", -"popUR","popUR","popUR","popUR","popUR","popUR","popUR","popUR", ---6x -"sz*pushaw,pusha","sz*popaw,popa","boundVrm","arplWmr", -"fs:seg","gs:seg","o16:","a16", -"pushUi","imulVrmi","pushBs","imulVrms", -"insb","insVS","outsb","outsVS", ---7x -"joBj","jnoBj","jbBj","jnbBj","jzBj","jnzBj","jbeBj","jaBj", -"jsBj","jnsBj","jpeBj","jpoBj","jlBj","jgeBj","jleBj","jgBj", ---8x -"arith!Bmi","arith!Vmi","arith!Bmi","arith!Vms", -"testBmr","testVmr","xchgBrm","xchgVrm", -"movBmr","movVmr","movBrm","movVrm", -"movVmg","leaVrm","movWgm","popUm", ---9x -"nop*xchgVaR|pause|xchgWaR|repne nop","xchgVaR","xchgVaR","xchgVaR", -"xchgVaR","xchgVaR","xchgVaR","xchgVaR", -"sz*cbw,cwde,cdqe","sz*cwd,cdq,cqo","call farViw","wait", -"sz*pushfw,pushf","sz*popfw,popf","sahf","lahf", ---Ax -"movBao","movVao","movBoa","movVoa", -"movsb","movsVS","cmpsb","cmpsVS", -"testBai","testVai","stosb","stosVS", -"lodsb","lodsVS","scasb","scasVS", ---Bx -"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", -"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", ---Cx -"shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi", -"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", ---Dx -"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", -"fp*0","fp*1","fp*2","fp*3","fp*4","fp*5","fp*6","fp*7", ---Ex -"loopneBj","loopeBj","loopBj","sz*jcxzBj,jecxzBj,jrcxzBj", -"inBau","inVau","outBua","outVua", -"callVj","jmpVj","jmp farViw","jmpBj","inBad","inVad","outBda","outVda", ---Fx -"lock:","int1","repne:rep","rep:","hlt","cmc","testb!Bm","testv!Vm", -"clc","stc","cli","sti","cld","std","incb!Bm","incd!Vm", -} -assert(#map_opc1_32 == 255) - --- Map for 1st opcode byte in 64 bit mode (overrides only). -local map_opc1_64 = setmetatable({ - [0x06]=false, [0x07]=false, [0x0e]=false, - [0x16]=false, [0x17]=false, [0x1e]=false, [0x1f]=false, - [0x27]=false, [0x2f]=false, [0x37]=false, [0x3f]=false, - [0x60]=false, [0x61]=false, [0x62]=false, [0x63]="movsxdVrDmt", [0x67]="a32:", - [0x40]="rex*", [0x41]="rex*b", [0x42]="rex*x", [0x43]="rex*xb", - [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", - [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", - [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", - [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false, - [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, -}, { __index = map_opc1_32 }) - --- Map for 2nd opcode byte (0F xx). True CISC hell. Hey, I told you. --- Prefix dependent MMX/SSE opcodes: (none)|rep|o16|repne, -|F3|66|F2 -local map_opc2 = { ---0x -[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", -"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", ---1x -"movupsXrm|movssXrvm|movupdXrm|movsdXrvm", -"movupsXmr|movssXmvr|movupdXmr|movsdXmvr", -"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", -"movlpsXmr||movlpdXmr", -"unpcklpsXrvm||unpcklpdXrvm", -"unpckhpsXrvm||unpckhpdXrvm", -"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", -"movhpsXmr||movhpdXmr", -"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", -"hintnopVm","hintnopVm","hintnopVm","hintnopVm", ---2x -"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, -"movapsXrm||movapdXrm", -"movapsXmr||movapdXmr", -"cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt", -"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", -"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", -"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", -"ucomissXrm||ucomisdXrm", -"comissXrm||comisdXrm", ---3x -"wrmsr","rdtsc","rdmsr","rdpmc","sysenter","sysexit",nil,"getsec", -"opc3*38",nil,"opc3*3a",nil,nil,nil,nil,nil, ---4x -"cmovoVrm","cmovnoVrm","cmovbVrm","cmovnbVrm", -"cmovzVrm","cmovnzVrm","cmovbeVrm","cmovaVrm", -"cmovsVrm","cmovnsVrm","cmovpeVrm","cmovpoVrm", -"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", ---5x -"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", -"rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm", -"andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm", -"orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm", -"addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm", -"cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm", -"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", -"subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm", -"divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm", ---6x -"punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm", -"pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm", -"punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm", -"||punpcklqdqXrvm","||punpckhqdqXrvm", -"movPrVSm","movqMrm|movdquXrm|movdqaXrm", ---7x -"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu", -"pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu", -"pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|", -"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", -nil,nil, -"||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm", -"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", ---8x -"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", -"jsVj","jnsVj","jpeVj","jpoVj","jlVj","jgeVj","jleVj","jgVj", ---9x -"setoBm","setnoBm","setbBm","setnbBm","setzBm","setnzBm","setbeBm","setaBm", -"setsBm","setnsBm","setpeBm","setpoBm","setlBm","setgeBm","setleBm","setgBm", ---Ax -"push fs","pop fs","cpuid","btVmr","shldVmru","shldVmrc",nil,nil, -"push gs","pop gs","rsm","btsVmr","shrdVmru","shrdVmrc","fxsave!Dmp","imulVrm", ---Bx -"cmpxchgBmr","cmpxchgVmr","$lssVrm","btrVmr", -"$lfsVrm","$lgsVrm","movzxVrBmt","movzxVrWmt", -"|popcntVrm","ud2Dp","bt!Vmu","btcVmr", -"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", ---Cx -"xaddBmr","xaddVmr", -"cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|", -"pinsrwPrvWmu","pextrwDrPmu", -"shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp", -"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", ---Dx -"||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm", -"paddqPrvm","pmullwPrvm", -"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", -"psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm", -"paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm", ---Ex -"pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm", -"pmulhuwPrvm","pmulhwPrvm", -"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", -"psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm", -"paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm", ---Fx -"|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm", -"pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$", -"psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm", -"paddbPrvm","paddwPrvm","padddPrvm","ud", -} -assert(map_opc2[255] == "ud") - --- Map for three-byte opcodes. Can't wait for their next invention. -local map_opc3 = { -["38"] = { -- [66] 0f 38 xx ---0x -[0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm", -"pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm", -"psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm", -"||permilpsXrvm","||permilpdXrvm",nil,nil, ---1x -"||pblendvbXrma",nil,nil,nil, -"||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm", -"||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil, -"pabsbPrm","pabswPrm","pabsdPrm",nil, ---2x -"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", -"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, -"||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm", -"||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr", ---3x -"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", -"||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm", -"||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm", -"||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm", ---4x -"||pmulddXrvm","||phminposuwXrm",nil,nil, -nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm", ---5x -[0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm", -[0x5a] = "||broadcasti128XrlXm", ---7x -[0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm", ---8x -[0x8c] = "||pmaskmovXrvVSm", -[0x8e] = "||pmaskmovVSmXvr", ---Dx -[0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm", -[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm", ---Fx -[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", -[0xf7] = "| sarxVrmv| shlxVrmv| shrxVrmv", -}, - -["3a"] = { -- [66] 0f 3a xx ---0x -[0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil, -"||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil, -"||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu", -"||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu", ---1x -nil,nil,nil,nil, -"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", -"||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil, -nil,nil,nil,nil, ---2x -"||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil, ---3x -[0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru", ---4x -[0x40] = "||dppsXrvmu", -[0x41] = "||dppdXrvmu", -[0x42] = "||mpsadbwXrvmu", -[0x44] = "||pclmulqdqXrvmu", -[0x46] = "||perm2i128Xrvmu", -[0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb", -[0x4c] = "||pblendvbXrvmb", ---6x -[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", -[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", -[0xdf] = "||aeskeygenassistXrmu", ---Fx -[0xf0] = "||| rorxVrmu", -}, -} - --- Map for VMX/SVM opcodes 0F 01 C0-FF (sgdt group with register operands). -local map_opcvm = { -[0xc1]="vmcall",[0xc2]="vmlaunch",[0xc3]="vmresume",[0xc4]="vmxoff", -[0xc8]="monitor",[0xc9]="mwait", -[0xd8]="vmrun",[0xd9]="vmmcall",[0xda]="vmload",[0xdb]="vmsave", -[0xdc]="stgi",[0xdd]="clgi",[0xde]="skinit",[0xdf]="invlpga", -[0xf8]="swapgs",[0xf9]="rdtscp", -} - --- Map for FP opcodes. And you thought stack machines are simple? -local map_opcfp = { --- D8-DF 00-BF: opcodes with a memory operand. --- D8 -[0]="faddFm","fmulFm","fcomFm","fcompFm","fsubFm","fsubrFm","fdivFm","fdivrFm", -"fldFm",nil,"fstFm","fstpFm","fldenvVm","fldcwWm","fnstenvVm","fnstcwWm", --- DA -"fiaddDm","fimulDm","ficomDm","ficompDm", -"fisubDm","fisubrDm","fidivDm","fidivrDm", --- DB -"fildDm","fisttpDm","fistDm","fistpDm",nil,"fld twordFmp",nil,"fstp twordFmp", --- DC -"faddGm","fmulGm","fcomGm","fcompGm","fsubGm","fsubrGm","fdivGm","fdivrGm", --- DD -"fldGm","fisttpQm","fstGm","fstpGm","frstorDmp",nil,"fnsaveDmp","fnstswWm", --- DE -"fiaddWm","fimulWm","ficomWm","ficompWm", -"fisubWm","fisubrWm","fidivWm","fidivrWm", --- DF -"fildWm","fisttpWm","fistWm","fistpWm", -"fbld twordFmp","fildQm","fbstp twordFmp","fistpQm", --- xx C0-FF: opcodes with a pseudo-register operand. --- D8 -"faddFf","fmulFf","fcomFf","fcompFf","fsubFf","fsubrFf","fdivFf","fdivrFf", --- D9 -"fldFf","fxchFf",{"fnop"},nil, -{"fchs","fabs",nil,nil,"ftst","fxam"}, -{"fld1","fldl2t","fldl2e","fldpi","fldlg2","fldln2","fldz"}, -{"f2xm1","fyl2x","fptan","fpatan","fxtract","fprem1","fdecstp","fincstp"}, -{"fprem","fyl2xp1","fsqrt","fsincos","frndint","fscale","fsin","fcos"}, --- DA -"fcmovbFf","fcmoveFf","fcmovbeFf","fcmovuFf",nil,{nil,"fucompp"},nil,nil, --- DB -"fcmovnbFf","fcmovneFf","fcmovnbeFf","fcmovnuFf", -{nil,nil,"fnclex","fninit"},"fucomiFf","fcomiFf",nil, --- DC -"fadd toFf","fmul toFf",nil,nil, -"fsub toFf","fsubr toFf","fdivr toFf","fdiv toFf", --- DD -"ffreeFf",nil,"fstFf","fstpFf","fucomFf","fucompFf",nil,nil, --- DE -"faddpFf","fmulpFf",nil,{nil,"fcompp"}, -"fsubrpFf","fsubpFf","fdivrpFf","fdivpFf", --- DF -nil,nil,nil,nil,{"fnstsw ax"},"fucomipFf","fcomipFf",nil, -} -assert(map_opcfp[126] == "fcomipFf") - --- Map for opcode groups. The subkey is sp from the ModRM byte. -local map_opcgroup = { - arith = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }, - shift = { "rol", "ror", "rcl", "rcr", "shl", "shr", "sal", "sar" }, - testb = { "testBmi", "testBmi", "not", "neg", "mul", "imul", "div", "idiv" }, - testv = { "testVmi", "testVmi", "not", "neg", "mul", "imul", "div", "idiv" }, - incb = { "inc", "dec" }, - incd = { "inc", "dec", "callUmp", "$call farDmp", - "jmpUmp", "$jmp farDmp", "pushUm" }, - sldt = { "sldt", "str", "lldt", "ltr", "verr", "verw" }, - sgdt = { "vm*$sgdt", "vm*$sidt", "$lgdt", "vm*$lidt", - "smsw", nil, "lmsw", "vm*$invlpg" }, - bt = { nil, nil, nil, nil, "bt", "bts", "btr", "btc" }, - cmpxchg = { nil, "sz*,cmpxchg8bQmp,cmpxchg16bXmp", nil, nil, - nil, nil, "vmptrld|vmxon|vmclear", "vmptrst" }, - pshiftw = { nil, nil, "psrlw", nil, "psraw", nil, "psllw" }, - pshiftd = { nil, nil, "psrld", nil, "psrad", nil, "pslld" }, - pshiftq = { nil, nil, "psrlq", nil, nil, nil, "psllq" }, - pshiftdq = { nil, nil, "psrlq", "psrldq", nil, nil, "psllq", "pslldq" }, - fxsave = { "$fxsave", "$fxrstor", "$ldmxcsr", "$stmxcsr", - nil, "lfenceDp$", "mfenceDp$", "sfenceDp$clflush" }, - prefetch = { "prefetch", "prefetchw" }, - prefetcht = { "prefetchnta", "prefetcht0", "prefetcht1", "prefetcht2" }, -} - ------------------------------------------------------------------------------- - --- Maps for register names. -local map_regs = { - B = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", - "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" }, - B64 = { "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", - "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" }, - W = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", - "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" }, - D = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", - "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" }, - Q = { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", - "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }, - M = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", - "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! - X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", - "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, - Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", - "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" }, -} -local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } - --- Maps for size names. -local map_sz2n = { - B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32, -} -local map_sz2prefix = { - B = "byte", W = "word", D = "dword", - Q = "qword", - M = "qword", X = "xword", Y = "yword", - F = "dword", G = "qword", -- No need for sizes/register names for these two. -} - ------------------------------------------------------------------------------- - --- Output a nicely formatted line with an opcode and operands. -local function putop(ctx, text, operands) - local code, pos, hex = ctx.code, ctx.pos, "" - local hmax = ctx.hexdump - if hmax > 0 then - for i=ctx.start,pos-1 do - hex = hex..format("%02X", byte(code, i, i)) - end - if #hex > hmax then hex = sub(hex, 1, hmax)..". " - else hex = hex..rep(" ", hmax-#hex+2) end - end - if operands then text = text.." "..operands end - if ctx.o16 then text = "o16 "..text; ctx.o16 = false end - if ctx.a32 then text = "a32 "..text; ctx.a32 = false end - if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end - if ctx.rex then - local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. - (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "").. - (ctx.vexl and "l" or "") - if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end - if t ~= "" then text = ctx.rex.."."..t.." "..gsub(text, "^ ", "") - elseif ctx.rex == "vex" then text = gsub("v"..text, "^v ", "") end - ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false - ctx.rex = false; ctx.vexl = false; ctx.vexv = false - end - if ctx.seg then - local text2, n = gsub(text, "%[", "["..ctx.seg..":") - if n == 0 then text = ctx.seg.." "..text else text = text2 end - ctx.seg = false - end - if ctx.lock then text = "lock "..text; ctx.lock = false end - local imm = ctx.imm - if imm then - local sym = ctx.symtab[imm] - if sym then text = text.."\t->"..sym end - end - ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) - ctx.mrm = false - ctx.vexv = false - ctx.start = pos - ctx.imm = nil -end - --- Clear all prefix flags. -local function clearprefixes(ctx) - ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false - ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false - ctx.rex = false; ctx.a32 = false; ctx.vexl = false -end - --- Fallback for incomplete opcodes at the end. -local function incomplete(ctx) - ctx.pos = ctx.stop+1 - clearprefixes(ctx) - return putop(ctx, "(incomplete)") -end - --- Fallback for unknown opcodes. -local function unknown(ctx) - clearprefixes(ctx) - return putop(ctx, "(unknown)") -end - --- Return an immediate of the specified size. -local function getimm(ctx, pos, n) - if pos+n-1 > ctx.stop then return incomplete(ctx) end - local code = ctx.code - if n == 1 then - local b1 = byte(code, pos, pos) - return b1 - elseif n == 2 then - local b1, b2 = byte(code, pos, pos+1) - return b1+b2*256 - else - local b1, b2, b3, b4 = byte(code, pos, pos+3) - local imm = b1+b2*256+b3*65536+b4*16777216 - ctx.imm = imm - return imm - end -end - --- Process pattern string and generate the operands. -local function putpat(ctx, name, pat) - local operands, regs, sz, mode, sp, rm, sc, rx, sdisp - local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl - - -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz - for p in gmatch(pat, ".") do - local x = nil - if p == "V" or p == "U" then - if ctx.rexw then sz = "Q"; ctx.rexw = false - elseif ctx.o16 then sz = "W"; ctx.o16 = false - elseif p == "U" and ctx.x64 then sz = "Q" - else sz = "D" end - regs = map_regs[sz] - elseif p == "T" then - if ctx.rexw then sz = "Q"; ctx.rexw = false else sz = "D" end - regs = map_regs[sz] - elseif p == "B" then - sz = "B" - regs = ctx.rex and map_regs.B64 or map_regs.B - elseif match(p, "[WDQMXYFG]") then - sz = p - if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end - regs = map_regs[sz] - elseif p == "P" then - sz = ctx.o16 and "X" or "M"; ctx.o16 = false - if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end - regs = map_regs[sz] - elseif p == "S" then - name = name..lower(sz) - elseif p == "s" then - local imm = getimm(ctx, pos, 1); if not imm then return end - x = imm <= 127 and format("+0x%02x", imm) - or format("-0x%02x", 256-imm) - pos = pos+1 - elseif p == "u" then - local imm = getimm(ctx, pos, 1); if not imm then return end - x = format("0x%02x", imm) - pos = pos+1 - elseif p == "b" then - local imm = getimm(ctx, pos, 1); if not imm then return end - x = regs[imm/16+1] - pos = pos+1 - elseif p == "w" then - local imm = getimm(ctx, pos, 2); if not imm then return end - x = format("0x%x", imm) - pos = pos+2 - elseif p == "o" then -- [offset] - if ctx.x64 then - local imm1 = getimm(ctx, pos, 4); if not imm1 then return end - local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end - x = format("[0x%08x%08x]", imm2, imm1) - pos = pos+8 - else - local imm = getimm(ctx, pos, 4); if not imm then return end - x = format("[0x%08x]", imm) - pos = pos+4 - end - elseif p == "i" or p == "I" then - local n = map_sz2n[sz] - if n == 8 and ctx.x64 and p == "I" then - local imm1 = getimm(ctx, pos, 4); if not imm1 then return end - local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end - x = format("0x%08x%08x", imm2, imm1) - else - if n == 8 then n = 4 end - local imm = getimm(ctx, pos, n); if not imm then return end - if sz == "Q" and (imm < 0 or imm > 0x7fffffff) then - imm = (0xffffffff+1)-imm - x = format(imm > 65535 and "-0x%08x" or "-0x%x", imm) - else - x = format(imm > 65535 and "0x%08x" or "0x%x", imm) - end - end - pos = pos+n - elseif p == "j" then - local n = map_sz2n[sz] - if n == 8 then n = 4 end - local imm = getimm(ctx, pos, n); if not imm then return end - if sz == "B" and imm > 127 then imm = imm-256 - elseif imm > 2147483647 then imm = imm-4294967296 end - pos = pos+n - imm = imm + pos + ctx.addr - if imm > 4294967295 and not ctx.x64 then imm = imm-4294967296 end - ctx.imm = imm - if sz == "W" then - x = format("word 0x%04x", imm%65536) - elseif ctx.x64 then - local lo = imm % 0x1000000 - x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) - else - x = "0x"..tohex(imm) - end - elseif p == "R" then - local r = byte(code, pos-1, pos-1)%8 - if ctx.rexb then r = r + 8; ctx.rexb = false end - x = regs[r+1] - elseif p == "a" then x = regs[1] - elseif p == "c" then x = "cl" - elseif p == "d" then x = "dx" - elseif p == "1" then x = "1" - else - if not mode then - mode = ctx.mrm - if not mode then - if pos > stop then return incomplete(ctx) end - mode = byte(code, pos, pos) - pos = pos+1 - end - rm = mode%8; mode = (mode-rm)/8 - sp = mode%8; mode = (mode-sp)/8 - sdisp = "" - if mode < 3 then - if rm == 4 then - if pos > stop then return incomplete(ctx) end - sc = byte(code, pos, pos) - pos = pos+1 - rm = sc%8; sc = (sc-rm)/8 - rx = sc%8; sc = (sc-rx)/8 - if ctx.rexx then rx = rx + 8; ctx.rexx = false end - if rx == 4 then rx = nil end - end - if mode > 0 or rm == 5 then - local dsz = mode - if dsz ~= 1 then dsz = 4 end - local disp = getimm(ctx, pos, dsz); if not disp then return end - if mode == 0 then rm = nil end - if rm or rx or (not sc and ctx.x64 and not ctx.a32) then - if dsz == 1 and disp > 127 then - sdisp = format("-0x%x", 256-disp) - elseif disp >= 0 and disp <= 0x7fffffff then - sdisp = format("+0x%x", disp) - else - sdisp = format("-0x%x", (0xffffffff+1)-disp) - end - else - sdisp = format(ctx.x64 and not ctx.a32 and - not (disp >= 0 and disp <= 0x7fffffff) - and "0xffffffff%08x" or "0x%08x", disp) - end - pos = pos+dsz - end - end - if rm and ctx.rexb then rm = rm + 8; ctx.rexb = false end - if ctx.rexr then sp = sp + 8; ctx.rexr = false end - end - if p == "m" then - if mode == 3 then x = regs[rm+1] - else - local aregs = ctx.a32 and map_regs.D or ctx.aregs - local srm, srx = "", "" - if rm then srm = aregs[rm+1] - elseif not sc and ctx.x64 and not ctx.a32 then srm = "rip" end - ctx.a32 = false - if rx then - if rm then srm = srm.."+" end - srx = aregs[rx+1] - if sc > 0 then srx = srx.."*"..(2^sc) end - end - x = format("[%s%s%s]", srm, srx, sdisp) - end - if mode < 3 and - (not match(pat, "[aRrgp]") or match(pat, "t")) then -- Yuck. - x = map_sz2prefix[sz].." "..x - end - elseif p == "r" then x = regs[sp+1] - elseif p == "g" then x = map_segregs[sp+1] - elseif p == "p" then -- Suppress prefix. - elseif p == "f" then x = "st"..rm - elseif p == "x" then - if sp == 0 and ctx.lock and not ctx.x64 then - x = "CR8"; ctx.lock = false - else - x = "CR"..sp - end - elseif p == "v" then - if ctx.vexv then - x = regs[ctx.vexv+1]; ctx.vexv = false - end - elseif p == "y" then x = "DR"..sp - elseif p == "z" then x = "TR"..sp - elseif p == "l" then vexl = false - elseif p == "t" then - else - error("bad pattern `"..pat.."'") - end - end - if x then operands = operands and operands..", "..x or x end - end - ctx.pos = pos - return putop(ctx, name, operands) -end - --- Forward declaration. -local map_act - --- Fetch and cache MRM byte. -local function getmrm(ctx) - local mrm = ctx.mrm - if not mrm then - local pos = ctx.pos - if pos > ctx.stop then return nil end - mrm = byte(ctx.code, pos, pos) - ctx.pos = pos+1 - ctx.mrm = mrm - end - return mrm -end - --- Dispatch to handler depending on pattern. -local function dispatch(ctx, opat, patgrp) - if not opat then return unknown(ctx) end - if match(opat, "%|") then -- MMX/SSE variants depending on prefix. - local p - if ctx.rep then - p = ctx.rep=="rep" and "%|([^%|]*)" or "%|[^%|]*%|[^%|]*%|([^%|]*)" - ctx.rep = false - elseif ctx.o16 then p = "%|[^%|]*%|([^%|]*)"; ctx.o16 = false - else p = "^[^%|]*" end - opat = match(opat, p) - if not opat then return unknown(ctx) end --- ctx.rep = false; ctx.o16 = false - --XXX fails for 66 f2 0f 38 f1 06 crc32 eax,WORD PTR [esi] - --XXX remove in branches? - end - if match(opat, "%$") then -- reg$mem variants. - local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end - opat = match(opat, mrm >= 192 and "^[^%$]*" or "%$(.*)") - if opat == "" then return unknown(ctx) end - end - if opat == "" then return unknown(ctx) end - local name, pat = match(opat, "^([a-z0-9 ]*)(.*)") - if pat == "" and patgrp then pat = patgrp end - return map_act[sub(pat, 1, 1)](ctx, name, pat) -end - --- Get a pattern from an opcode map and dispatch to handler. -local function dispatchmap(ctx, opcmap) - local pos = ctx.pos - local opat = opcmap[byte(ctx.code, pos, pos)] - pos = pos + 1 - ctx.pos = pos - return dispatch(ctx, opat) -end - --- Map for action codes. The key is the first char after the name. -map_act = { - -- Simple opcodes without operands. - [""] = function(ctx, name, pat) - return putop(ctx, name) - end, - - -- Operand size chars fall right through. - B = putpat, W = putpat, D = putpat, Q = putpat, - V = putpat, U = putpat, T = putpat, - M = putpat, X = putpat, P = putpat, - F = putpat, G = putpat, Y = putpat, - - -- Collect prefixes. - [":"] = function(ctx, name, pat) - ctx[pat == ":" and name or sub(pat, 2)] = name - if ctx.pos - ctx.start > 5 then return unknown(ctx) end -- Limit #prefixes. - end, - - -- Chain to special handler specified by name. - ["*"] = function(ctx, name, pat) - return map_act[name](ctx, name, sub(pat, 2)) - end, - - -- Use named subtable for opcode group. - ["!"] = function(ctx, name, pat) - local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end - return dispatch(ctx, map_opcgroup[name][((mrm-(mrm%8))/8)%8+1], sub(pat, 2)) - end, - - -- o16,o32[,o64] variants. - sz = function(ctx, name, pat) - if ctx.o16 then ctx.o16 = false - else - pat = match(pat, ",(.*)") - if ctx.rexw then - local p = match(pat, ",(.*)") - if p then pat = p; ctx.rexw = false end - end - end - pat = match(pat, "^[^,]*") - return dispatch(ctx, pat) - end, - - -- Two-byte opcode dispatch. - opc2 = function(ctx, name, pat) - return dispatchmap(ctx, map_opc2) - end, - - -- Three-byte opcode dispatch. - opc3 = function(ctx, name, pat) - return dispatchmap(ctx, map_opc3[pat]) - end, - - -- VMX/SVM dispatch. - vm = function(ctx, name, pat) - return dispatch(ctx, map_opcvm[ctx.mrm]) - end, - - -- Floating point opcode dispatch. - fp = function(ctx, name, pat) - local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end - local rm = mrm%8 - local idx = pat*8 + ((mrm-rm)/8)%8 - if mrm >= 192 then idx = idx + 64 end - local opat = map_opcfp[idx] - if type(opat) == "table" then opat = opat[rm+1] end - return dispatch(ctx, opat) - end, - - -- REX prefix. - rex = function(ctx, name, pat) - if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed. - for p in gmatch(pat, ".") do ctx["rex"..p] = true end - ctx.rex = "rex" - end, - - -- VEX prefix. - vex = function(ctx, name, pat) - if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed. - ctx.rex = "vex" - local pos = ctx.pos - if ctx.mrm then - ctx.mrm = nil - pos = pos-1 - end - local b = byte(ctx.code, pos, pos) - if not b then return incomplete(ctx) end - pos = pos+1 - if b < 128 then ctx.rexr = true end - local m = 1 - if pat == "3" then - m = b%32; b = (b-m)/32 - local nb = b%2; b = (b-nb)/2 - if nb == 0 then ctx.rexb = true end - local nx = b%2 - if nx == 0 then ctx.rexx = true end - b = byte(ctx.code, pos, pos) - if not b then return incomplete(ctx) end - pos = pos+1 - if b >= 128 then ctx.rexw = true end - end - ctx.pos = pos - local map - if m == 1 then map = map_opc2 - elseif m == 2 then map = map_opc3["38"] - elseif m == 3 then map = map_opc3["3a"] - else return unknown(ctx) end - local p = b%4; b = (b-p)/4 - if p == 1 then ctx.o16 = "o16" - elseif p == 2 then ctx.rep = "rep" - elseif p == 3 then ctx.rep = "repne" end - local l = b%2; b = (b-l)/2 - if l ~= 0 then ctx.vexl = true end - ctx.vexv = (-1-b)%16 - return dispatchmap(ctx, map) - end, - - -- Special case for nop with REX prefix. - nop = function(ctx, name, pat) - return dispatch(ctx, ctx.rex and pat or "nop") - end, - - -- Special case for 0F 77. - emms = function(ctx, name, pat) - if ctx.rex ~= "vex" then - return putop(ctx, "emms") - elseif ctx.vexl then - ctx.vexl = false - return putop(ctx, "zeroall") - else - return putop(ctx, "zeroupper") - end - end, -} - ------------------------------------------------------------------------------- - --- Disassemble a block of code. -local function disass_block(ctx, ofs, len) - if not ofs then ofs = 0 end - local stop = len and ofs+len or #ctx.code - ofs = ofs + 1 - ctx.start = ofs - ctx.pos = ofs - ctx.stop = stop - ctx.imm = nil - ctx.mrm = false - clearprefixes(ctx) - while ctx.pos <= stop do dispatchmap(ctx, ctx.map1) end - if ctx.pos ~= ctx.start then incomplete(ctx) end -end - --- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). -local function create(code, addr, out) - local ctx = {} - ctx.code = code - ctx.addr = (addr or 0) - 1 - ctx.out = out or io.write - ctx.symtab = {} - ctx.disass = disass_block - ctx.hexdump = 16 - ctx.x64 = false - ctx.map1 = map_opc1_32 - ctx.aregs = map_regs.D - return ctx -end - -local function create64(code, addr, out) - local ctx = create(code, addr, out) - ctx.x64 = true - ctx.map1 = map_opc1_64 - ctx.aregs = map_regs.Q - return ctx -end - --- Simple API: disassemble code (a string) at address and output via out. -local function disass(code, addr, out) - create(code, addr, out):disass() -end - -local function disass64(code, addr, out) - create64(code, addr, out):disass() -end - --- Return register name for RID. -local function regname(r) - if r < 8 then return map_regs.D[r+1] end - return map_regs.X[r-7] -end - -local function regname64(r) - if r < 16 then return map_regs.Q[r+1] end - return map_regs.X[r-15] -end - --- Public module functions. -return { - create = create, - create64 = create64, - disass = disass, - disass64 = disass64, - regname = regname, - regname64 = regname64 -} - diff --git a/src/jit/dump.lua b/src/jit/dump.lua deleted file mode 100644 index 2bea652bf8..0000000000 --- a/src/jit/dump.lua +++ /dev/null @@ -1,712 +0,0 @@ ----------------------------------------------------------------------------- --- LuaJIT compiler dump module. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- Released under the MIT license. See Copyright Notice in luajit.h ----------------------------------------------------------------------------- --- --- This module can be used to debug the JIT compiler itself. It dumps the --- code representations and structures used in various compiler stages. --- --- Example usage: --- --- luajit -jdump -e "local x=0; for i=1,1e6 do x=x+i end; print(x)" --- luajit -jdump=im -e "for i=1,1000 do for j=1,1000 do end end" | less -R --- luajit -jdump=is myapp.lua | less -R --- luajit -jdump=-b myapp.lua --- luajit -jdump=+aH,myapp.html myapp.lua --- luajit -jdump=ixT,myapp.dump myapp.lua --- --- The first argument specifies the dump mode. The second argument gives --- the output file name. Default output is to stdout, unless the environment --- variable LUAJIT_DUMPFILE is set. The file is overwritten every time the --- module is started. --- --- Different features can be turned on or off with the dump mode. If the --- mode starts with a '+', the following features are added to the default --- set of features; a '-' removes them. Otherwise the features are replaced. --- --- The following dump features are available (* marks the default): --- --- * t Print a line for each started, ended or aborted trace (see also -jv). --- * b Dump the traced bytecode. --- * i Dump the IR (intermediate representation). --- r Augment the IR with register/stack slots. --- s Dump the snapshot map. --- * m Dump the generated machine code. --- x Print each taken trace exit. --- X Print each taken trace exit and the contents of all registers. --- a Print the IR of aborted traces, too. --- --- The output format can be set with the following characters: --- --- T Plain text output. --- A ANSI-colored text output --- H Colorized HTML + CSS output. --- --- The default output format is plain text. It's set to ANSI-colored text --- if the COLORTERM variable is set. Note: this is independent of any output --- redirection, which is actually considered a feature. --- --- You probably want to use less -R to enjoy viewing ANSI-colored text from --- a pipe or a file. Add this to your ~/.bashrc: export LESS="-R" --- ------------------------------------------------------------------------------- - --- Cache some library functions and objects. -local jit = require("jit") -assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") -local jutil = require("jit.util") -local vmdef = require("jit.vmdef") -local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc -local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek -local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap -local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr -local bit = require("bit") -local band, shr, tohex = bit.band, bit.rshift, bit.tohex -local sub, gsub, format = string.sub, string.gsub, string.format -local byte, rep = string.byte, string.rep -local type, tostring = type, tostring -local stdout, stderr = io.stdout, io.stderr - --- Load other modules on-demand. -local bcline, disass - --- Active flag, output file handle and dump mode. -local active, out, dumpmode - ------------------------------------------------------------------------------- - -local symtabmt = { __index = false } -local symtab = {} -local nexitsym = 0 - --- Fill nested symbol table with per-trace exit stub addresses. -local function fillsymtab_tr(tr, nexit) - local t = {} - symtabmt.__index = t - if jit.arch:sub(1, 4) == "mips" then - t[traceexitstub(tr, 0)] = "exit" - return - end - for i=0,nexit-1 do - local addr = traceexitstub(tr, i) - if addr < 0 then addr = addr + 2^32 end - t[addr] = tostring(i) - end - local addr = traceexitstub(tr, nexit) - if addr then t[addr] = "stack_check" end -end - --- Fill symbol table with trace exit stub addresses. -local function fillsymtab(tr, nexit) - local t = symtab - if nexitsym == 0 then - local ircall = vmdef.ircall - for i=0,#ircall do - local addr = ircalladdr(i) - if addr ~= 0 then - if addr < 0 then addr = addr + 2^32 end - t[addr] = ircall[i] - end - end - end - if nexitsym == 1000000 then -- Per-trace exit stubs. - fillsymtab_tr(tr, nexit) - elseif nexit > nexitsym then -- Shared exit stubs. - for i=nexitsym,nexit-1 do - local addr = traceexitstub(i) - if addr == nil then -- Fall back to per-trace exit stubs. - fillsymtab_tr(tr, nexit) - setmetatable(symtab, symtabmt) - nexit = 1000000 - break - end - if addr < 0 then addr = addr + 2^32 end - t[addr] = tostring(i) - end - nexitsym = nexit - end - return t -end - -local function dumpwrite(s) - out:write(s) -end - --- Disassemble machine code. -local function dump_mcode(tr) - local info = traceinfo(tr) - if not info then return end - local mcode, addr, loop = tracemc(tr) - if not mcode then return end - if not disass then disass = require("jit.dis_"..jit.arch) end - if addr < 0 then addr = addr + 2^32 end - out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") - local ctx = disass.create(mcode, addr, dumpwrite) - ctx.hexdump = 0 - ctx.symtab = fillsymtab(tr, info.nexit) - if loop ~= 0 then - symtab[addr+loop] = "LOOP" - ctx:disass(0, loop) - out:write("->LOOP:\n") - ctx:disass(loop, #mcode-loop) - symtab[addr+loop] = nil - else - ctx:disass(0, #mcode) - end -end - ------------------------------------------------------------------------------- - -local irtype_text = { - [0] = "nil", - "fal", - "tru", - "lud", - "str", - "p32", - "thr", - "pro", - "fun", - "p64", - "cdt", - "tab", - "udt", - "flt", - "num", - "i8 ", - "u8 ", - "i16", - "u16", - "int", - "u32", - "i64", - "u64", - "sfp", -} - -local colortype_ansi = { - [0] = "%s", - "%s", - "%s", - "\027[36m%s\027[m", - "\027[32m%s\027[m", - "%s", - "\027[1m%s\027[m", - "%s", - "\027[1m%s\027[m", - "%s", - "\027[33m%s\027[m", - "\027[31m%s\027[m", - "\027[36m%s\027[m", - "\027[34m%s\027[m", - "\027[34m%s\027[m", - "\027[35m%s\027[m", - "\027[35m%s\027[m", - "\027[35m%s\027[m", - "\027[35m%s\027[m", - "\027[35m%s\027[m", - "\027[35m%s\027[m", - "\027[35m%s\027[m", - "\027[35m%s\027[m", - "\027[35m%s\027[m", -} - -local function colorize_text(s) - return s -end - -local function colorize_ansi(s, t) - return format(colortype_ansi[t], s) -end - -local irtype_ansi = setmetatable({}, - { __index = function(tab, t) - local s = colorize_ansi(irtype_text[t], t); tab[t] = s; return s; end }) - -local html_escape = { ["<"] = "<", [">"] = ">", ["&"] = "&", } - -local function colorize_html(s, t) - s = gsub(s, "[<>&]", html_escape) - return format('%s', irtype_text[t], s) -end - -local irtype_html = setmetatable({}, - { __index = function(tab, t) - local s = colorize_html(irtype_text[t], t); tab[t] = s; return s; end }) - -local header_html = [[ - -]] - -local colorize, irtype - --- Lookup tables to convert some literals into names. -local litname = { - ["SLOAD "] = setmetatable({}, { __index = function(t, mode) - local s = "" - if band(mode, 1) ~= 0 then s = s.."P" end - if band(mode, 2) ~= 0 then s = s.."F" end - if band(mode, 4) ~= 0 then s = s.."T" end - if band(mode, 8) ~= 0 then s = s.."C" end - if band(mode, 16) ~= 0 then s = s.."R" end - if band(mode, 32) ~= 0 then s = s.."I" end - t[mode] = s - return s - end}), - ["XLOAD "] = { [0] = "", "R", "V", "RV", "U", "RU", "VU", "RVU", }, - ["CONV "] = setmetatable({}, { __index = function(t, mode) - local s = irtype[band(mode, 31)] - s = irtype[band(shr(mode, 5), 31)].."."..s - if band(mode, 0x800) ~= 0 then s = s.." sext" end - local c = shr(mode, 14) - if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end - t[mode] = s - return s - end}), - ["FLOAD "] = vmdef.irfield, - ["FREF "] = vmdef.irfield, - ["FPMATH"] = vmdef.irfpm, - ["BUFHDR"] = { [0] = "RESET", "APPEND" }, - ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" }, -} - -local function ctlsub(c) - if c == "\n" then return "\\n" - elseif c == "\r" then return "\\r" - elseif c == "\t" then return "\\t" - else return format("\\%03d", byte(c)) - end -end - -local function fmtfunc(func, pc) - local fi = funcinfo(func, pc) - if fi.loc then - return fi.loc - elseif fi.ffid then - return vmdef.ffnames[fi.ffid] - elseif fi.addr then - return format("C:%x", fi.addr) - else - return "(?)" - end -end - -local function formatk(tr, idx, sn) - local k, t, slot = tracek(tr, idx) - local tn = type(k) - local s - if tn == "number" then - if band(sn or 0, 0x30000) ~= 0 then - s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz" - elseif k == 2^52+2^51 then - s = "bias" - else - s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k) - end - elseif tn == "string" then - s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) - elseif tn == "function" then - s = fmtfunc(k) - elseif tn == "table" then - s = format("{%p}", k) - elseif tn == "userdata" then - if t == 12 then - s = format("userdata:%p", k) - else - s = format("[%p]", k) - if s == "[NULL]" then s = "NULL" end - end - elseif t == 21 then -- int64_t - s = sub(tostring(k), 1, -3) - if sub(s, 1, 1) ~= "-" then s = "+"..s end - elseif sn == 0x1057fff then -- SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL) - return "----" -- Special case for LJ_FR2 slot 1. - else - s = tostring(k) -- For primitives. - end - s = colorize(format("%-4s", s), t) - if slot then - s = format("%s @%d", s, slot) - end - return s -end - -local function printsnap(tr, snap) - local n = 2 - for s=0,snap[1]-1 do - local sn = snap[n] - if shr(sn, 24) == s then - n = n + 1 - local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS - if ref < 0 then - out:write(formatk(tr, ref, sn)) - elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM - out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) - else - local m, ot, op1, op2 = traceir(tr, ref) - out:write(colorize(format("%04d", ref), band(ot, 31))) - end - out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME - else - out:write("---- ") - end - end - out:write("]\n") -end - --- Dump snapshots (not interleaved with IR). -local function dump_snap(tr) - out:write("---- TRACE ", tr, " snapshots\n") - for i=0,1000000000 do - local snap = tracesnap(tr, i) - if not snap then break end - out:write(format("#%-3d %04d [ ", i, snap[0])) - printsnap(tr, snap) - end -end - --- Return a register name or stack slot for a rid/sp location. -local function ridsp_name(ridsp, ins) - if not disass then disass = require("jit.dis_"..jit.arch) end - local rid, slot = band(ridsp, 0xff), shr(ridsp, 8) - if rid == 253 or rid == 254 then - return (slot == 0 or slot == 255) and " {sink" or format(" {%04d", ins-slot) - end - if ridsp > 255 then return format("[%x]", slot*4) end - if rid < 128 then return disass.regname(rid) end - return "" -end - --- Dump CALL* function ref and return optional ctype. -local function dumpcallfunc(tr, ins) - local ctype - if ins > 0 then - local m, ot, op1, op2 = traceir(tr, ins) - if band(ot, 31) == 0 then -- nil type means CARG(func, ctype). - ins = op1 - ctype = formatk(tr, op2) - end - end - if ins < 0 then - out:write(format("[0x%x](", tonumber((tracek(tr, ins))))) - else - out:write(format("%04d (", ins)) - end - return ctype -end - --- Recursively gather CALL* args and dump them. -local function dumpcallargs(tr, ins) - if ins < 0 then - out:write(formatk(tr, ins)) - else - local m, ot, op1, op2 = traceir(tr, ins) - local oidx = 6*shr(ot, 8) - local op = sub(vmdef.irnames, oidx+1, oidx+6) - if op == "CARG " then - dumpcallargs(tr, op1) - if op2 < 0 then - out:write(" ", formatk(tr, op2)) - else - out:write(" ", format("%04d", op2)) - end - else - out:write(format("%04d", ins)) - end - end -end - --- Dump IR and interleaved snapshots. -local function dump_ir(tr, dumpsnap, dumpreg) - local info = traceinfo(tr) - if not info then return end - local nins = info.nins - out:write("---- TRACE ", tr, " IR\n") - local irnames = vmdef.irnames - local snapref = 65536 - local snap, snapno - if dumpsnap then - snap = tracesnap(tr, 0) - snapref = snap[0] - snapno = 0 - end - for ins=1,nins do - if ins >= snapref then - if dumpreg then - out:write(format(".... SNAP #%-3d [ ", snapno)) - else - out:write(format(".... SNAP #%-3d [ ", snapno)) - end - printsnap(tr, snap) - snapno = snapno + 1 - snap = tracesnap(tr, snapno) - snapref = snap and snap[0] or 65536 - end - local m, ot, op1, op2, ridsp = traceir(tr, ins) - local oidx, t = 6*shr(ot, 8), band(ot, 31) - local op = sub(irnames, oidx+1, oidx+6) - if op == "LOOP " then - if dumpreg then - out:write(format("%04d ------------ LOOP ------------\n", ins)) - else - out:write(format("%04d ------ LOOP ------------\n", ins)) - end - elseif op ~= "NOP " and op ~= "CARG " and - (dumpreg or op ~= "RENAME") then - local rid = band(ridsp, 255) - if dumpreg then - out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins))) - else - out:write(format("%04d ", ins)) - end - out:write(format("%s%s %s %s ", - (rid == 254 or rid == 253) and "}" or - (band(ot, 128) == 0 and " " or ">"), - band(ot, 64) == 0 and " " or "+", - irtype[t], op)) - local m1, m2 = band(m, 3), band(m, 3*4) - if sub(op, 1, 4) == "CALL" then - local ctype - if m2 == 1*4 then -- op2 == IRMlit - out:write(format("%-10s (", vmdef.ircall[op2])) - else - ctype = dumpcallfunc(tr, op2) - end - if op1 ~= -1 then dumpcallargs(tr, op1) end - out:write(")") - if ctype then out:write(" ctype ", ctype) end - elseif op == "CNEW " and op2 == -1 then - out:write(formatk(tr, op1)) - elseif m1 ~= 3 then -- op1 != IRMnone - if op1 < 0 then - out:write(formatk(tr, op1)) - else - out:write(format(m1 == 0 and "%04d" or "#%-3d", op1)) - end - if m2 ~= 3*4 then -- op2 != IRMnone - if m2 == 1*4 then -- op2 == IRMlit - local litn = litname[op] - if litn and litn[op2] then - out:write(" ", litn[op2]) - elseif op == "UREFO " or op == "UREFC " then - out:write(format(" #%-3d", shr(op2, 8))) - else - out:write(format(" #%-3d", op2)) - end - elseif op2 < 0 then - out:write(" ", formatk(tr, op2)) - else - out:write(format(" %04d", op2)) - end - end - end - out:write("\n") - end - end - if snap then - if dumpreg then - out:write(format(".... SNAP #%-3d [ ", snapno)) - else - out:write(format(".... SNAP #%-3d [ ", snapno)) - end - printsnap(tr, snap) - end -end - ------------------------------------------------------------------------------- - -local recprefix = "" -local recdepth = 0 - --- Format trace error message. -local function fmterr(err, info) - if type(err) == "number" then - if type(info) == "function" then info = fmtfunc(info) end - err = format(vmdef.traceerr[err], info) - end - return err -end - --- Dump trace states. -local function dump_trace(what, tr, func, pc, otr, oex) - if what == "stop" or (what == "abort" and dumpmode.a) then - if dumpmode.i then dump_ir(tr, dumpmode.s, dumpmode.r and what == "stop") - elseif dumpmode.s then dump_snap(tr) end - if dumpmode.m then dump_mcode(tr) end - end - if what == "start" then - if dumpmode.H then out:write('
    \n') end
    -    out:write("---- TRACE ", tr, " ", what)
    -    if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end
    -    out:write(" ", fmtfunc(func, pc), "\n")
    -  elseif what == "stop" or what == "abort" then
    -    out:write("---- TRACE ", tr, " ", what)
    -    if what == "abort" then
    -      out:write(" ", fmtfunc(func, pc), " -- ", fmterr(otr, oex), "\n")
    -    else
    -      local info = traceinfo(tr)
    -      local link, ltype = info.link, info.linktype
    -      if link == tr or link == 0 then
    -	out:write(" -> ", ltype, "\n")
    -      elseif ltype == "root" then
    -	out:write(" -> ", link, "\n")
    -      else
    -	out:write(" -> ", link, " ", ltype, "\n")
    -      end
    -    end
    -    if dumpmode.H then out:write("
    \n\n") else out:write("\n") end - else - if what == "flush" then symtab, nexitsym = {}, 0 end - out:write("---- TRACE ", what, "\n\n") - end - out:flush() -end - --- Dump recorded bytecode. -local function dump_record(tr, func, pc, depth, callee) - if depth ~= recdepth then - recdepth = depth - recprefix = rep(" .", depth) - end - local line - if pc >= 0 then - line = bcline(func, pc, recprefix) - if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end - else - line = "0000 "..recprefix.." FUNCC \n" - callee = func - end - if pc <= 0 then - out:write(sub(line, 1, -2), " ; ", fmtfunc(func), "\n") - else - out:write(line) - end - if pc >= 0 and band(funcbc(func, pc), 0xff) < 16 then -- ORDER BC - out:write(bcline(func, pc+1, recprefix)) -- Write JMP for cond. - end -end - ------------------------------------------------------------------------------- - --- Dump taken trace exits. -local function dump_texit(tr, ex, ngpr, nfpr, ...) - out:write("---- TRACE ", tr, " exit ", ex, "\n") - if dumpmode.X then - local regs = {...} - if jit.arch == "x64" then - for i=1,ngpr do - out:write(format(" %016x", regs[i])) - if i % 4 == 0 then out:write("\n") end - end - else - for i=1,ngpr do - out:write(" ", tohex(regs[i])) - if i % 8 == 0 then out:write("\n") end - end - end - if jit.arch == "mips" or jit.arch == "mipsel" then - for i=1,nfpr,2 do - out:write(format(" %+17.14g", regs[ngpr+i])) - if i % 8 == 7 then out:write("\n") end - end - else - for i=1,nfpr do - out:write(format(" %+17.14g", regs[ngpr+i])) - if i % 4 == 0 then out:write("\n") end - end - end - end -end - ------------------------------------------------------------------------------- - --- Detach dump handlers. -local function dumpoff() - if active then - active = false - jit.attach(dump_texit) - jit.attach(dump_record) - jit.attach(dump_trace) - if out and out ~= stdout and out ~= stderr then out:close() end - out = nil - end -end - --- Open the output file and attach dump handlers. -local function dumpon(opt, outfile) - if active then dumpoff() end - - local term = os.getenv("TERM") - local colormode = (term and term:match("color") or os.getenv("COLORTERM")) and "A" or "T" - if opt then - opt = gsub(opt, "[TAH]", function(mode) colormode = mode; return ""; end) - end - - local m = { t=true, b=true, i=true, m=true, } - if opt and opt ~= "" then - local o = sub(opt, 1, 1) - if o ~= "+" and o ~= "-" then m = {} end - for i=1,#opt do m[sub(opt, i, i)] = (o ~= "-") end - end - dumpmode = m - - if m.t or m.b or m.i or m.s or m.m then - jit.attach(dump_trace, "trace") - end - if m.b then - jit.attach(dump_record, "record") - if not bcline then bcline = require("jit.bc").line end - end - if m.x or m.X then - jit.attach(dump_texit, "texit") - end - - if not outfile then outfile = os.getenv("LUAJIT_DUMPFILE") end - if outfile then - out = outfile == "-" and stdout or assert(io.open(outfile, "w")) - else - out = stdout - end - - m[colormode] = true - if colormode == "A" then - colorize = colorize_ansi - irtype = irtype_ansi - elseif colormode == "H" then - colorize = colorize_html - irtype = irtype_html - out:write(header_html) - else - colorize = colorize_text - irtype = irtype_text - end - - active = true -end - --- Public module functions. -return { - on = dumpon, - off = dumpoff, - start = dumpon -- For -j command line option. -} - diff --git a/src/jit/p.lua b/src/jit/p.lua deleted file mode 100644 index 7be105863d..0000000000 --- a/src/jit/p.lua +++ /dev/null @@ -1,311 +0,0 @@ ----------------------------------------------------------------------------- --- LuaJIT profiler. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- Released under the MIT license. See Copyright Notice in luajit.h ----------------------------------------------------------------------------- --- --- This module is a simple command line interface to the built-in --- low-overhead profiler of LuaJIT. --- --- The lower-level API of the profiler is accessible via the "jit.profile" --- module or the luaJIT_profile_* C API. --- --- Example usage: --- --- luajit -jp myapp.lua --- luajit -jp=s myapp.lua --- luajit -jp=-s myapp.lua --- luajit -jp=vl myapp.lua --- luajit -jp=G,profile.txt myapp.lua --- --- The following dump features are available: --- --- f Stack dump: function name, otherwise module:line. Default mode. --- F Stack dump: ditto, but always prepend module. --- l Stack dump: module:line. --- stack dump depth (callee < caller). Default: 1. --- - Inverse stack dump depth (caller > callee). --- s Split stack dump after first stack level. Implies abs(depth) >= 2. --- p Show full path for module names. --- v Show VM states. Can be combined with stack dumps, e.g. vf or fv. --- z Show zones. Can be combined with stack dumps, e.g. zf or fz. --- r Show raw sample counts. Default: show percentages. --- a Annotate excerpts from source code files. --- A Annotate complete source code files. --- G Produce raw output suitable for graphical tools (e.g. flame graphs). --- m Minimum sample percentage to be shown. Default: 3. --- i Sampling interval in milliseconds. Default: 10. --- ----------------------------------------------------------------------------- - --- Cache some library functions and objects. -local jit = require("jit") -assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") -local profile = require("jit.profile") -local vmdef = require("jit.vmdef") -local math = math -local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor -local sort, format = table.sort, string.format -local stdout = io.stdout -local zone -- Load jit.zone module on demand. - --- Output file handle. -local out - ------------------------------------------------------------------------------- - -local prof_ud -local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth -local prof_ann, prof_count1, prof_count2, prof_samples - -local map_vmmode = { - N = "Compiled", - I = "Interpreted", - C = "C code", - G = "Garbage Collector", - J = "JIT Compiler", -} - --- Profiler callback. -local function prof_cb(th, samples, vmmode) - prof_samples = prof_samples + samples - local key_stack, key_stack2, key_state - -- Collect keys for sample. - if prof_states then - if prof_states == "v" then - key_state = map_vmmode[vmmode] or vmmode - else - key_state = zone:get() or "(none)" - end - end - if prof_fmt then - key_stack = profile.dumpstack(th, prof_fmt, prof_depth) - key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x) - return vmdef.ffnames[tonumber(x)] - end) - if prof_split == 2 then - local k1, k2 = key_stack:match("(.-) [<>] (.*)") - if k2 then key_stack, key_stack2 = k1, k2 end - elseif prof_split == 3 then - key_stack2 = profile.dumpstack(th, "l", 1) - end - end - -- Order keys. - local k1, k2 - if prof_split == 1 then - if key_state then - k1 = key_state - if key_stack then k2 = key_stack end - end - elseif key_stack then - k1 = key_stack - if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end - end - -- Coalesce samples in one or two levels. - if k1 then - local t1 = prof_count1 - t1[k1] = (t1[k1] or 0) + samples - if k2 then - local t2 = prof_count2 - local t3 = t2[k1] - if not t3 then t3 = {}; t2[k1] = t3 end - t3[k2] = (t3[k2] or 0) + samples - end - end -end - ------------------------------------------------------------------------------- - --- Show top N list. -local function prof_top(count1, count2, samples, indent) - local t, n = {}, 0 - for k in pairs(count1) do - n = n + 1 - t[n] = k - end - sort(t, function(a, b) return count1[a] > count1[b] end) - for i=1,n do - local k = t[i] - local v = count1[k] - local pct = floor(v*100/samples + 0.5) - if pct < prof_min then break end - if not prof_raw then - out:write(format("%s%2d%% %s\n", indent, pct, k)) - elseif prof_raw == "r" then - out:write(format("%s%5d %s\n", indent, v, k)) - else - out:write(format("%s %d\n", k, v)) - end - if count2 then - local r = count2[k] - if r then - prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and " -- " or - (prof_depth < 0 and " -> " or " <- ")) - end - end - end -end - --- Annotate source code -local function prof_annotate(count1, samples) - local files = {} - local ms = 0 - for k, v in pairs(count1) do - local pct = floor(v*100/samples + 0.5) - ms = math.max(ms, v) - if pct >= prof_min then - local file, line = k:match("^(.*):(%d+)$") - if not file then file = k; line = 0 end - local fl = files[file] - if not fl then fl = {}; files[file] = fl; files[#files+1] = file end - line = tonumber(line) - fl[line] = prof_raw and v or pct - end - end - sort(files) - local fmtv, fmtn = " %3d%% | %s\n", " | %s\n" - if prof_raw then - local n = math.max(5, math.ceil(math.log10(ms))) - fmtv = "%"..n.."d | %s\n" - fmtn = (" "):rep(n).." | %s\n" - end - local ann = prof_ann - for _, file in ipairs(files) do - local f0 = file:byte() - if f0 == 40 or f0 == 91 then - out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file)) - break - end - local fp, err = io.open(file) - if not fp then - out:write(format("====== ERROR: %s: %s\n", file, err)) - break - end - out:write(format("\n====== %s ======\n", file)) - local fl = files[file] - local n, show = 1, false - if ann ~= 0 then - for i=1,ann do - if fl[i] then show = true; out:write("@@ 1 @@\n"); break end - end - end - for line in fp:lines() do - if line:byte() == 27 then - out:write("[Cannot annotate bytecode file]\n") - break - end - local v = fl[n] - if ann ~= 0 then - local v2 = fl[n+ann] - if show then - if v2 then show = n+ann elseif v then show = n - elseif show+ann < n then show = false end - elseif v2 then - show = n+ann - out:write(format("@@ %d @@\n", n)) - end - if not show then goto next end - end - if v then - out:write(format(fmtv, v, line)) - else - out:write(format(fmtn, line)) - end - ::next:: - n = n + 1 - end - fp:close() - end -end - ------------------------------------------------------------------------------- - --- Finish profiling and dump result. -local function prof_finish() - if prof_ud then - profile.stop() - local samples = prof_samples - if samples == 0 then - if prof_raw ~= true then out:write("[No samples collected]\n") end - return - end - if prof_ann then - prof_annotate(prof_count1, samples) - else - prof_top(prof_count1, prof_count2, samples, "") - end - prof_count1 = nil - prof_count2 = nil - prof_ud = nil - end -end - --- Start profiling. -local function prof_start(mode) - local interval = "" - mode = mode:gsub("i%d*", function(s) interval = s; return "" end) - prof_min = 3 - mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end) - prof_depth = 1 - mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end) - local m = {} - for c in mode:gmatch(".") do m[c] = c end - prof_states = m.z or m.v - if prof_states == "z" then zone = require("jit.zone") end - local scope = m.l or m.f or m.F or (prof_states and "" or "f") - local flags = (m.p or "") - prof_raw = m.r - if m.s then - prof_split = 2 - if prof_depth == -1 or m["-"] then prof_depth = -2 - elseif prof_depth == 1 then prof_depth = 2 end - elseif mode:find("[fF].*l") then - scope = "l" - prof_split = 3 - else - prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0 - end - prof_ann = m.A and 0 or (m.a and 3) - if prof_ann then - scope = "l" - prof_fmt = "pl" - prof_split = 0 - prof_depth = 1 - elseif m.G and scope ~= "" then - prof_fmt = flags..scope.."Z;" - prof_depth = -100 - prof_raw = true - prof_min = 0 - elseif scope == "" then - prof_fmt = false - else - local sc = prof_split == 3 and m.f or m.F or scope - prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ") - end - prof_count1 = {} - prof_count2 = {} - prof_samples = 0 - profile.start(scope:lower()..interval, prof_cb) - prof_ud = newproxy(true) - getmetatable(prof_ud).__gc = prof_finish -end - ------------------------------------------------------------------------------- - -local function start(mode, outfile) - if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end - if outfile then - out = outfile == "-" and stdout or assert(io.open(outfile, "w")) - else - out = stdout - end - prof_start(mode or "f") -end - --- Public module functions. -return { - start = start, -- For -j command line option. - stop = prof_finish -} - diff --git a/src/jit/v.lua b/src/jit/v.lua deleted file mode 100644 index 934de9859c..0000000000 --- a/src/jit/v.lua +++ /dev/null @@ -1,170 +0,0 @@ ----------------------------------------------------------------------------- --- Verbose mode of the LuaJIT compiler. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- Released under the MIT license. See Copyright Notice in luajit.h ----------------------------------------------------------------------------- --- --- This module shows verbose information about the progress of the --- JIT compiler. It prints one line for each generated trace. This module --- is useful to see which code has been compiled or where the compiler --- punts and falls back to the interpreter. --- --- Example usage: --- --- luajit -jv -e "for i=1,1000 do for j=1,1000 do end end" --- luajit -jv=myapp.out myapp.lua --- --- Default output is to stderr. To redirect the output to a file, pass a --- filename as an argument (use '-' for stdout) or set the environment --- variable LUAJIT_VERBOSEFILE. The file is overwritten every time the --- module is started. --- --- The output from the first example should look like this: --- --- [TRACE 1 (command line):1 loop] --- [TRACE 2 (1/3) (command line):1 -> 1] --- --- The first number in each line is the internal trace number. Next are --- the file name ('(command line)') and the line number (':1') where the --- trace has started. Side traces also show the parent trace number and --- the exit number where they are attached to in parentheses ('(1/3)'). --- An arrow at the end shows where the trace links to ('-> 1'), unless --- it loops to itself. --- --- In this case the inner loop gets hot and is traced first, generating --- a root trace. Then the last exit from the 1st trace gets hot, too, --- and triggers generation of the 2nd trace. The side trace follows the --- path along the outer loop and *around* the inner loop, back to its --- start, and then links to the 1st trace. Yes, this may seem unusual, --- if you know how traditional compilers work. Trace compilers are full --- of surprises like this -- have fun! :-) --- --- Aborted traces are shown like this: --- --- [TRACE --- foo.lua:44 -- leaving loop in root trace at foo:lua:50] --- --- Don't worry -- trace aborts are quite common, even in programs which --- can be fully compiled. The compiler may retry several times until it --- finds a suitable trace. --- --- Of course this doesn't work with features that are not-yet-implemented --- (NYI error messages). The VM simply falls back to the interpreter. This --- may not matter at all if the particular trace is not very high up in --- the CPU usage profile. Oh, and the interpreter is quite fast, too. --- --- Also check out the -jdump module, which prints all the gory details. --- ------------------------------------------------------------------------------- - --- Cache some library functions and objects. -local jit = require("jit") -assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") -local jutil = require("jit.util") -local vmdef = require("jit.vmdef") -local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo -local type, format = type, string.format -local stdout, stderr = io.stdout, io.stderr - --- Active flag and output file handle. -local active, out - ------------------------------------------------------------------------------- - -local startloc, startex - -local function fmtfunc(func, pc) - local fi = funcinfo(func, pc) - if fi.loc then - return fi.loc - elseif fi.ffid then - return vmdef.ffnames[fi.ffid] - elseif fi.addr then - return format("C:%x", fi.addr) - else - return "(?)" - end -end - --- Format trace error message. -local function fmterr(err, info) - if type(err) == "number" then - if type(info) == "function" then info = fmtfunc(info) end - err = format(vmdef.traceerr[err], info) - end - return err -end - --- Dump trace states. -local function dump_trace(what, tr, func, pc, otr, oex) - if what == "start" then - startloc = fmtfunc(func, pc) - startex = otr and "("..otr.."/"..(oex == -1 and "stitch" or oex)..") " or "" - else - if what == "abort" then - local loc = fmtfunc(func, pc) - if loc ~= startloc then - out:write(format("[TRACE --- %s%s -- %s at %s]\n", - startex, startloc, fmterr(otr, oex), loc)) - else - out:write(format("[TRACE --- %s%s -- %s]\n", - startex, startloc, fmterr(otr, oex))) - end - elseif what == "stop" then - local info = traceinfo(tr) - local link, ltype = info.link, info.linktype - if ltype == "interpreter" then - out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", - tr, startex, startloc)) - elseif ltype == "stitch" then - out:write(format("[TRACE %3s %s%s %s %s]\n", - tr, startex, startloc, ltype, fmtfunc(func, pc))) - elseif link == tr or link == 0 then - out:write(format("[TRACE %3s %s%s %s]\n", - tr, startex, startloc, ltype)) - elseif ltype == "root" then - out:write(format("[TRACE %3s %s%s -> %d]\n", - tr, startex, startloc, link)) - else - out:write(format("[TRACE %3s %s%s -> %d %s]\n", - tr, startex, startloc, link, ltype)) - end - else - out:write(format("[TRACE %s]\n", what)) - end - out:flush() - end -end - ------------------------------------------------------------------------------- - --- Detach dump handlers. -local function dumpoff() - if active then - active = false - jit.attach(dump_trace) - if out and out ~= stdout and out ~= stderr then out:close() end - out = nil - end -end - --- Open the output file and attach dump handlers. -local function dumpon(outfile) - if active then dumpoff() end - if not outfile then outfile = os.getenv("LUAJIT_VERBOSEFILE") end - if outfile then - out = outfile == "-" and stdout or assert(io.open(outfile, "w")) - else - out = stderr - end - jit.attach(dump_trace, "trace") - active = true -end - --- Public module functions. -return { - on = dumpon, - off = dumpoff, - start = dumpon -- For -j command line option. -} - diff --git a/src/jit/vmdef.lua b/src/jit/vmdef.lua new file mode 100644 index 0000000000..a911267004 --- /dev/null +++ b/src/jit/vmdef.lua @@ -0,0 +1,348 @@ +-- This is a generated file. DO NOT EDIT! + +return { + +bcnames = "ISLT ISGE ISLE ISGT ISEQV ISNEV ISEQS ISNES ISEQN ISNEN ISEQP ISNEP ISTC ISFC IST ISF ISTYPEISNUM MOV NOT UNM LEN ADDVN SUBVN MULVN DIVVN MODVN ADDNV SUBNV MULNV DIVNV MODNV ADDVV SUBVV MULVV DIVVV MODVV POW CAT KSTR KCDATAKSHORTKNUM KPRI KNIL UGET USETV USETS USETN USETP UCLO FNEW TNEW TDUP GGET GSET TGETV TGETS TGETB TGETR TSETV TSETS TSETB TSETM TSETR CALLM CALL CALLMTCALLT ITERC ITERN VARG ISNEXTRETM RET RET0 RET1 FORI JFORI FORL IFORL JFORL ITERL IITERLJITERLLOOP ILOOP JLOOP JMP FUNCF IFUNCFJFUNCFFUNCV IFUNCVJFUNCVFUNCC FUNCCW", + +irnames = "LT GE LE GT ULT UGE ULE UGT EQ NE ABC RETF NOP BASE PVAL GCSTEPHIOP LOOP USE PHI RENAMEKPRI KINT KGC KPTR KKPTR KNULL KNUM KINT64KSLOT BNOT BSWAP BAND BOR BXOR BSHL BSHR BSAR BROL BROR ADD SUB MUL DIV MOD POW NEG ABS ATAN2 LDEXP MIN MAX FPMATHADDOV SUBOV MULOV AREF HREFK HREF NEWREFUREFO UREFC FREF STRREFLREF ALOAD HLOAD ULOAD FLOAD XLOAD SLOAD VLOAD ASTOREHSTOREUSTOREFSTOREXSTORESNEW XSNEW TNEW TDUP CNEW CNEWI BUFHDRBUFPUTBUFSTRTBAR OBAR XBAR CONV TOBIT TOSTR STRTO CALLN CALLA CALLL CALLS CALLXSCARG ", + +irfpm = { [0]="floor", "ceil", "trunc", "sqrt", "exp", "exp2", "log", "log2", "log10", "sin", "cos", "tan", "other", }, + +irfield = { [0]="str.len", "func.env", "func.pc", "func.ffid", "thread.env", "tab.meta", "tab.array", "tab.node", "tab.asize", "tab.hmask", "tab.nomm", "udata.meta", "udata.udtype", "udata.file", "cdata.ctypeid", "cdata.ptr", "cdata.int", "cdata.int64", "cdata.int64_4", }, + +ircall = { +[0]="lj_str_cmp", +"lj_str_find", +"lj_str_new", +"lj_strscan_num", +"lj_strfmt_int", +"lj_strfmt_num", +"lj_strfmt_char", +"lj_strfmt_putint", +"lj_strfmt_putnum", +"lj_strfmt_putquoted", +"lj_strfmt_putfxint", +"lj_strfmt_putfnum_int", +"lj_strfmt_putfnum_uint", +"lj_strfmt_putfnum", +"lj_strfmt_putfstr", +"lj_strfmt_putfchar", +"lj_buf_putmem", +"lj_buf_putstr", +"lj_buf_putchar", +"lj_buf_putstr_reverse", +"lj_buf_putstr_lower", +"lj_buf_putstr_upper", +"lj_buf_putstr_rep", +"lj_buf_puttab", +"lj_buf_tostr", +"lj_tab_new_ah", +"lj_tab_new1", +"lj_tab_dup", +"lj_tab_clear", +"lj_tab_newkey", +"lj_tab_len", +"lj_gc_step_jit", +"lj_gc_barrieruv", +"lj_mem_newgco", +"lj_math_random_step", +"lj_vm_modi", +"sinh", +"cosh", +"tanh", +"fputc", +"fwrite", +"fflush", +"lj_vm_floor", +"lj_vm_ceil", +"lj_vm_trunc", +"sqrt", +"exp", +"lj_vm_exp2", +"log", +"lj_vm_log2", +"log10", +"sin", +"cos", +"tan", +"lj_vm_powi", +"pow", +"atan2", +"ldexp", +"lj_vm_tobit", +"softfp_add", +"softfp_sub", +"softfp_mul", +"softfp_div", +"softfp_cmp", +"softfp_i2d", +"softfp_d2i", +"lj_vm_sfmin", +"lj_vm_sfmax", +"softfp_ui2d", +"softfp_f2d", +"softfp_d2ui", +"softfp_d2f", +"softfp_i2f", +"softfp_ui2f", +"softfp_f2i", +"softfp_f2ui", +"fp64_l2d", +"fp64_ul2d", +"fp64_l2f", +"fp64_ul2f", +"fp64_d2l", +"fp64_d2ul", +"fp64_f2l", +"fp64_f2ul", +"lj_carith_divi64", +"lj_carith_divu64", +"lj_carith_modi64", +"lj_carith_modu64", +"lj_carith_powi64", +"lj_carith_powu64", +"lj_cdata_newv", +"lj_cdata_setfin", +"strlen", +"memcpy", +"memset", +"lj_vm_errno", +"lj_carith_mul64", +"lj_carith_shl64", +"lj_carith_shr64", +"lj_carith_sar64", +"lj_carith_rol64", +"lj_carith_ror64", +}, + +traceerr = { +[0]="error thrown or hook called during recording", +"trace too short", +"trace too long", +"trace too deep", +"too many snapshots", +"blacklisted", +"retry recording", +"NYI: bytecode %d", +"leaving loop in root trace", +"inner loop in root trace", +"loop unroll limit reached", +"bad argument type", +"JIT compilation disabled for function", +"call unroll limit reached", +"down-recursion, restarting", +"NYI: unsupported variant of FastFunc %s", +"NYI: return to lower frame", +"store with nil or NaN key", +"missing metamethod", +"looping index lookup", +"NYI: mixed sparse/dense table", +"symbol not in cache", +"NYI: unsupported C type conversion", +"NYI: unsupported C function type", +"guard would always fail", +"too many PHIs", +"persistent type instability", +"failed to allocate mcode memory", +"machine code too long", +"hit mcode limit (retrying)", +"too many spill slots", +"inconsistent register allocation", +"NYI: cannot assemble IR instruction %d", +"NYI: PHI shuffling too complex", +"NYI: register coalescing too complex", +}, + +ffnames = { +[0]="Lua", +"C", +"assert", +"type", +"next", +"pairs", +"ipairs_aux", +"ipairs", +"getmetatable", +"setmetatable", +"getfenv", +"setfenv", +"rawget", +"rawset", +"rawequal", +"unpack", +"select", +"tonumber", +"tostring", +"error", +"pcall", +"xpcall", +"loadfile", +"load", +"loadstring", +"dofile", +"gcinfo", +"collectgarbage", +"newproxy", +"print", +"coroutine.status", +"coroutine.running", +"coroutine.isyieldable", +"coroutine.create", +"coroutine.yield", +"coroutine.resume", +"coroutine.wrap_aux", +"coroutine.wrap", +"math.abs", +"math.floor", +"math.ceil", +"math.sqrt", +"math.log10", +"math.exp", +"math.sin", +"math.cos", +"math.tan", +"math.asin", +"math.acos", +"math.atan", +"math.sinh", +"math.cosh", +"math.tanh", +"math.frexp", +"math.modf", +"math.log", +"math.atan2", +"math.pow", +"math.fmod", +"math.ldexp", +"math.min", +"math.max", +"math.random", +"math.randomseed", +"bit.tobit", +"bit.bnot", +"bit.bswap", +"bit.lshift", +"bit.rshift", +"bit.arshift", +"bit.rol", +"bit.ror", +"bit.band", +"bit.bor", +"bit.bxor", +"bit.tohex", +"string.byte", +"string.char", +"string.sub", +"string.rep", +"string.reverse", +"string.lower", +"string.upper", +"string.dump", +"string.find", +"string.match", +"string.gmatch_aux", +"string.gmatch", +"string.gsub", +"string.format", +"table.maxn", +"table.insert", +"table.concat", +"table.sort", +"table.new", +"table.clear", +"io.method.close", +"io.method.read", +"io.method.write", +"io.method.flush", +"io.method.seek", +"io.method.setvbuf", +"io.method.lines", +"io.method.__gc", +"io.method.__tostring", +"io.open", +"io.popen", +"io.tmpfile", +"io.close", +"io.read", +"io.write", +"io.flush", +"io.input", +"io.output", +"io.lines", +"io.type", +"os.execute", +"os.remove", +"os.rename", +"os.tmpname", +"os.getenv", +"os.exit", +"os.clock", +"os.date", +"os.time", +"os.difftime", +"os.setlocale", +"debug.getregistry", +"debug.getmetatable", +"debug.setmetatable", +"debug.getfenv", +"debug.setfenv", +"debug.getinfo", +"debug.getlocal", +"debug.setlocal", +"debug.getupvalue", +"debug.setupvalue", +"debug.upvalueid", +"debug.upvaluejoin", +"debug.sethook", +"debug.gethook", +"debug.debug", +"debug.traceback", +"jit.on", +"jit.off", +"jit.flush", +"jit.status", +"jit.opt.start", +"ffi.meta.__index", +"ffi.meta.__newindex", +"ffi.meta.__eq", +"ffi.meta.__len", +"ffi.meta.__lt", +"ffi.meta.__le", +"ffi.meta.__concat", +"ffi.meta.__call", +"ffi.meta.__add", +"ffi.meta.__sub", +"ffi.meta.__mul", +"ffi.meta.__div", +"ffi.meta.__mod", +"ffi.meta.__pow", +"ffi.meta.__unm", +"ffi.meta.__tostring", +"ffi.meta.__pairs", +"ffi.meta.__ipairs", +"ffi.clib.__index", +"ffi.clib.__newindex", +"ffi.clib.__gc", +"ffi.callback.free", +"ffi.callback.set", +"ffi.cdef", +"ffi.new", +"ffi.cast", +"ffi.typeof", +"ffi.typeinfo", +"ffi.istype", +"ffi.sizeof", +"ffi.alignof", +"ffi.offsetof", +"ffi.errno", +"ffi.string", +"ffi.copy", +"ffi.fill", +"ffi.abi", +"ffi.metatype", +"ffi.gc", +"ffi.load", +}, + +} + diff --git a/src/jit/zone.lua b/src/jit/zone.lua deleted file mode 100644 index fa702c4e98..0000000000 --- a/src/jit/zone.lua +++ /dev/null @@ -1,45 +0,0 @@ ----------------------------------------------------------------------------- --- LuaJIT profiler zones. --- --- Copyright (C) 2005-2017 Mike Pall. All rights reserved. --- Released under the MIT license. See Copyright Notice in luajit.h ----------------------------------------------------------------------------- --- --- This module implements a simple hierarchical zone model. --- --- Example usage: --- --- local zone = require("jit.zone") --- zone("AI") --- ... --- zone("A*") --- ... --- print(zone:get()) --> "A*" --- ... --- zone() --- ... --- print(zone:get()) --> "AI" --- ... --- zone() --- ----------------------------------------------------------------------------- - -local remove = table.remove - -return setmetatable({ - flush = function(t) - for i=#t,1,-1 do t[i] = nil end - end, - get = function(t) - return t[#t] - end -}, { - __call = function(t, zone) - if zone then - t[#t+1] = zone - else - return (assert(remove(t), "empty zone stack")) - end - end -}) - diff --git a/src/lauxlib.h b/src/lauxlib.h index fed1491b89..a44f0272b3 100644 --- a/src/lauxlib.h +++ b/src/lauxlib.h @@ -15,9 +15,6 @@ #include "lua.h" -#define luaL_getn(L,i) ((int)lua_objlen(L, i)) -#define luaL_setn(L,i,j) ((void)0) /* no op! */ - /* extra error code for `luaL_load' */ #define LUA_ERRFILE (LUA_ERRERR+1) @@ -58,6 +55,10 @@ LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...); LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def, const char *const lst[]); +/* pre-defined references */ +#define LUA_NOREF (-2) +#define LUA_REFNIL (-1) + LUALIB_API int (luaL_ref) (lua_State *L, int t); LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref); @@ -84,6 +85,11 @@ LUALIB_API int (luaL_loadbufferx) (lua_State *L, const char *buff, size_t sz, const char *name, const char *mode); LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, int level); +LUALIB_API void (luaL_setfuncs) (lua_State *L, const luaL_Reg *l, int nup); +LUALIB_API void (luaL_pushmodule) (lua_State *L, const char *modname, + int sizehint); +LUALIB_API void *(luaL_testudata) (lua_State *L, int ud, const char *tname); +LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname); /* @@ -113,6 +119,11 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, #define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n))) +/* From Lua 5.2. */ +#define luaL_newlibtable(L, l) \ + lua_createtable(L, 0, sizeof(l)/sizeof((l)[0]) - 1) +#define luaL_newlib(L, l) (luaL_newlibtable(L, l), luaL_setfuncs(L, l, 0)) + /* ** {====================================================== ** Generic Buffer manipulation @@ -147,21 +158,4 @@ LUALIB_API void (luaL_pushresult) (luaL_Buffer *B); /* }====================================================== */ - -/* compatibility with ref system */ - -/* pre-defined references */ -#define LUA_NOREF (-2) -#define LUA_REFNIL (-1) - -#define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \ - (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0)) - -#define lua_unref(L,ref) luaL_unref(L, LUA_REGISTRYINDEX, (ref)) - -#define lua_getref(L,ref) lua_rawgeti(L, LUA_REGISTRYINDEX, (ref)) - - -#define luaL_reg luaL_Reg - #endif diff --git a/src/lib_aux.c b/src/lib_aux.c index 4e137949ec..857cc53171 100644 --- a/src/lib_aux.c +++ b/src/lib_aux.c @@ -22,9 +22,7 @@ #include "lj_trace.h" #include "lj_lib.h" -#if LJ_TARGET_POSIX #include -#endif /* -- I/O error handling -------------------------------------------------- */ @@ -49,7 +47,6 @@ LUALIB_API int luaL_fileresult(lua_State *L, int stat, const char *fname) LUALIB_API int luaL_execresult(lua_State *L, int stat) { if (stat != -1) { -#if LJ_TARGET_POSIX if (WIFSIGNALED(stat)) { stat = WTERMSIG(stat); setnilV(L->top++); @@ -63,13 +60,6 @@ LUALIB_API int luaL_execresult(lua_State *L, int stat) setnilV(L->top++); lua_pushliteral(L, "exit"); } -#else - if (stat == 0) - setboolV(L->top++, 1); - else - setnilV(L->top++); - lua_pushliteral(L, "exit"); -#endif setintV(L->top++, stat); return 3; } @@ -107,38 +97,36 @@ LUALIB_API const char *luaL_findtable(lua_State *L, int idx, static int libsize(const luaL_Reg *l) { int size = 0; - for (; l->name; l++) size++; + for (; l && l->name; l++) size++; return size; } +LUALIB_API void luaL_pushmodule(lua_State *L, const char *modname, int sizehint) +{ + luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); + lua_getfield(L, -1, modname); + if (!lua_istable(L, -1)) { + lua_pop(L, 1); + if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, sizehint) != NULL) + lj_err_callerv(L, LJ_ERR_BADMODN, modname); + lua_pushvalue(L, -1); + lua_setfield(L, -3, modname); /* _LOADED[modname] = new table. */ + } + lua_remove(L, -2); /* Remove _LOADED table. */ +} + LUALIB_API void luaL_openlib(lua_State *L, const char *libname, const luaL_Reg *l, int nup) { lj_lib_checkfpu(L); if (libname) { - int size = libsize(l); - /* check whether lib already exists */ - luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); - lua_getfield(L, -1, libname); /* get _LOADED[libname] */ - if (!lua_istable(L, -1)) { /* not found? */ - lua_pop(L, 1); /* remove previous result */ - /* try global variable (and create one if it does not exist) */ - if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL) - lj_err_callerv(L, LJ_ERR_BADMODN, libname); - lua_pushvalue(L, -1); - lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */ - } - lua_remove(L, -2); /* remove _LOADED table */ - lua_insert(L, -(nup+1)); /* move library table to below upvalues */ - } - for (; l->name; l++) { - int i; - for (i = 0; i < nup; i++) /* copy upvalues to the top */ - lua_pushvalue(L, -nup); - lua_pushcclosure(L, l->func, nup); - lua_setfield(L, -(nup+2), l->name); + luaL_pushmodule(L, libname, libsize(l)); + lua_insert(L, -(nup + 1)); /* Move module table below upvalues. */ } - lua_pop(L, nup); /* remove upvalues */ + if (l) + luaL_setfuncs(L, l, nup); + else + lua_pop(L, nup); /* Remove upvalues. */ } LUALIB_API void luaL_register(lua_State *L, const char *libname, @@ -147,6 +135,19 @@ LUALIB_API void luaL_register(lua_State *L, const char *libname, luaL_openlib(L, libname, l, 0); } +LUALIB_API void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup) +{ + luaL_checkstack(L, nup, "too many upvalues"); + for (; l->name; l++) { + int i; + for (i = 0; i < nup; i++) /* Copy upvalues to the top. */ + lua_pushvalue(L, -nup); + lua_pushcclosure(L, l->func, nup); + lua_setfield(L, -(nup + 2), l->name); + } + lua_pop(L, nup); /* Remove upvalues. */ +} + LUALIB_API const char *luaL_gsub(lua_State *L, const char *s, const char *p, const char *r) { @@ -300,12 +301,6 @@ static int panic(lua_State *L) return 0; } -#ifdef LUAJIT_USE_SYSMALLOC - -#if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND) -#error "Must use builtin allocator for 64 bit target" -#endif - static void *mem_alloc(void *ud, void *ptr, size_t osize, size_t nsize) { (void)ud; @@ -324,33 +319,3 @@ LUALIB_API lua_State *luaL_newstate(void) if (L) G(L)->panic = panic; return L; } - -#else - -#include "lj_alloc.h" - -LUALIB_API lua_State *luaL_newstate(void) -{ - lua_State *L; - void *ud = lj_alloc_create(); - if (ud == NULL) return NULL; -#if LJ_64 && !LJ_GC64 - L = lj_state_newstate(lj_alloc_f, ud); -#else - L = lua_newstate(lj_alloc_f, ud); -#endif - if (L) G(L)->panic = panic; - return L; -} - -#if LJ_64 && !LJ_GC64 -LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) -{ - UNUSED(f); UNUSED(ud); - fputs("Must use luaL_newstate() for 64 bit target\n", stderr); - return NULL; -} -#endif - -#endif - diff --git a/src/lib_base.c b/src/lib_base.c index 44014f9b67..c061d296e8 100644 --- a/src/lib_base.c +++ b/src/lib_base.c @@ -24,10 +24,8 @@ #include "lj_meta.h" #include "lj_state.h" #include "lj_frame.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_cconv.h" -#endif #include "lj_bc.h" #include "lj_ff.h" #include "lj_dispatch.h" @@ -81,7 +79,6 @@ LJLIB_ASM(next) return FFH_UNREACHABLE; } -#if LJ_52 || LJ_HASFFI static int ffh_pairs(lua_State *L, MMS mm) { TValue *o = lj_lib_checkany(L, 1); @@ -98,9 +95,6 @@ static int ffh_pairs(lua_State *L, MMS mm) return FFH_RES(3); } } -#else -#define ffh_pairs(L, mm) (lj_lib_checktab(L, 1), FFH_UNREACHABLE) -#endif LJLIB_PUSH(lastcl) LJLIB_ASM(pairs) LJLIB_REC(xpairs 0) @@ -265,25 +259,16 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) copyTV(L, L->base-1-LJ_FR2, o); return FFH_RES(1); } -#if LJ_HASFFI if (tviscdata(o)) { CTState *cts = ctype_cts(L); CType *ct = lj_ctype_rawref(cts, cdataV(o)->ctypeid); if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct); if (ctype_isnum(ct->info) || ctype_iscomplex(ct->info)) { - if (LJ_DUALNUM && ctype_isinteger_or_bool(ct->info) && - ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) { - int32_t i; - lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0); - setintV(L->base-1-LJ_FR2, i); - return FFH_RES(1); - } lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE), (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0); return FFH_RES(1); } } -#endif } else { const char *p = strdata(lj_lib_checkstr(L, 1)); char *ep; @@ -294,10 +279,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) if (p != ep) { while (lj_char_isspace((unsigned char)(*ep))) ep++; if (*ep == '\0') { - if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u)) - setintV(L->base-1-LJ_FR2, (int32_t)ul); - else - setnumV(L->base-1-LJ_FR2, (lua_Number)ul); + setnumV(L->base-1-LJ_FR2, (lua_Number)ul); return FFH_RES(1); } } @@ -346,7 +328,7 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.) static int load_aux(lua_State *L, int status, int envarg) { - if (status == 0) { + if (status == LUA_OK) { if (tvistab(L->base+envarg-1)) { GCfunc *fn = funcV(L->top-1); GCtab *t = tabV(L->base+envarg-1); @@ -419,7 +401,7 @@ LJLIB_CF(dofile) GCstr *fname = lj_lib_optstr(L, 1); setnilV(L->top); L->top = L->base+1; - if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0) + if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != LUA_OK) lua_error(L); lua_call(L, 0, LUA_MULTRET); return (int)(L->top - L->base) - 1; @@ -537,7 +519,7 @@ LJLIB_CF(coroutine_status) co = threadV(L->base); if (co == L) s = "running"; else if (co->status == LUA_YIELD) s = "suspended"; - else if (co->status != 0) s = "dead"; + else if (co->status != LUA_OK) s = "dead"; else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal"; else if (co->top == co->base) s = "dead"; else s = "suspended"; @@ -583,7 +565,7 @@ LJLIB_ASM(coroutine_yield) static int ffh_resume(lua_State *L, lua_State *co, int wrap) { if (co->cframe != NULL || co->status > LUA_YIELD || - (co->status == 0 && co->top == co->base)) { + (co->status == LUA_OK && co->top == co->base)) { ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD; if (wrap) lj_err_caller(L, em); setboolV(L->base-1-LJ_FR2, 0); @@ -608,13 +590,11 @@ LJLIB_NOREG LJLIB_ASM(coroutine_wrap_aux) /* Inline declarations. */ LJ_ASMF void lj_ff_coroutine_wrap_aux(void); -#if !(LJ_TARGET_MIPS && defined(ljamalg_c)) -LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, +LJ_FUNCA_NORET void lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co); -#endif /* Error handler, called from assembler VM. */ -void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co) +void lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co) { co->top--; copyTV(L, L->top, co->top); L->top++; if (tvisstr(L->top-1)) diff --git a/src/lib_bit.c b/src/lib_bit.c index c979a44839..36f98acc7d 100644 --- a/src/lib_bit.c +++ b/src/lib_bit.c @@ -15,12 +15,10 @@ #include "lj_buf.h" #include "lj_strscan.h" #include "lj_strfmt.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_cdata.h" #include "lj_cconv.h" #include "lj_carith.h" -#endif #include "lj_ff.h" #include "lj_lib.h" @@ -28,7 +26,6 @@ #define LJLIB_MODULE_bit -#if LJ_HASFFI static int bit_result64(lua_State *L, CTypeID id, uint64_t x) { GCcdata *cd = lj_cdata_new_(L, id, 8); @@ -36,61 +33,30 @@ static int bit_result64(lua_State *L, CTypeID id, uint64_t x) setcdataV(L, L->base-1-LJ_FR2, cd); return FFH_RES(1); } -#else -static int32_t bit_checkbit(lua_State *L, int narg) -{ - TValue *o = L->base + narg-1; - if (!(o < L->top && lj_strscan_numberobj(o))) - lj_err_argt(L, narg, LUA_TNUMBER); - if (LJ_LIKELY(tvisint(o))) { - return intV(o); - } else { - int32_t i = lj_num2bit(numV(o)); - if (LJ_DUALNUM) setintV(o, i); - return i; - } -} -#endif LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit) { -#if LJ_HASFFI CTypeID id = 0; setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id)); return FFH_RES(1); -#else - lj_lib_checknumber(L, 1); - return FFH_RETRY; -#endif } LJLIB_ASM(bit_bnot) LJLIB_REC(bit_unary IR_BNOT) { -#if LJ_HASFFI CTypeID id = 0; uint64_t x = lj_carith_check64(L, 1, &id); return id ? bit_result64(L, id, ~x) : FFH_RETRY; -#else - lj_lib_checknumber(L, 1); - return FFH_RETRY; -#endif } LJLIB_ASM(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP) { -#if LJ_HASFFI CTypeID id = 0; uint64_t x = lj_carith_check64(L, 1, &id); return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY; -#else - lj_lib_checknumber(L, 1); - return FFH_RETRY; -#endif } LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) { -#if LJ_HASFFI CTypeID id = 0, id2 = 0; uint64_t x = lj_carith_check64(L, 1, &id); int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2); @@ -100,11 +66,6 @@ LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) } if (id2) setintV(L->base+1, sh); return FFH_RETRY; -#else - lj_lib_checknumber(L, 1); - bit_checkbit(L, 2); - return FFH_RETRY; -#endif } LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) @@ -113,7 +74,6 @@ LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR) LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) { -#if LJ_HASFFI CTypeID id = 0; TValue *o = L->base, *top = L->top; int i = 0; @@ -131,11 +91,6 @@ LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) return bit_result64(L, id, y); } return FFH_RETRY; -#else - int i = 0; - do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); - return FFH_RETRY; -#endif } LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) @@ -144,24 +99,15 @@ LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) LJLIB_CF(bit_tohex) LJLIB_REC(.) { -#if LJ_HASFFI CTypeID id = 0, id2 = 0; uint64_t b = lj_carith_check64(L, 1, &id); int32_t n = L->base+1>=L->top ? (id ? 16 : 8) : (int32_t)lj_carith_check64(L, 2, &id2); -#else - uint32_t b = (uint32_t)bit_checkbit(L, 1); - int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2); -#endif SBuf *sb = lj_buf_tmp_(L); SFormat sf = (STRFMT_UINT|STRFMT_T_HEX); if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; } sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC); -#if LJ_HASFFI if (n < 16) b &= ((uint64_t)1 << 4*n)-1; -#else - if (n < 8) b &= (1u << 4*n)-1; -#endif sb = lj_strfmt_putfxint(sb, sf, b); setstrV(L, L->top-1, lj_buf_str(L, sb)); lj_gc_check(L); diff --git a/src/lib_ffi.c b/src/lib_ffi.c index 136e98e896..eb2fa2f269 100644 --- a/src/lib_ffi.c +++ b/src/lib_ffi.c @@ -14,7 +14,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_gc.h" #include "lj_err.h" @@ -434,7 +433,7 @@ static int ffi_callback_set(lua_State *L, GCfunc *fn) GCcdata *cd = ffi_checkcdata(L, 1); CTState *cts = ctype_cts(L); CType *ct = ctype_raw(cts, cd->ctypeid); - if (ctype_isptr(ct->info) && (LJ_32 || ct->size == 8)) { + if (ctype_isptr(ct->info) && ct->size == 8) { MSize slot = lj_ccallback_ptr2slot(cts, *(void **)cdataptr(cd)); if (slot < cts->cb.sizeid && cts->cb.cbid[slot] != 0) { GCtab *t = cts->miscmap; @@ -728,29 +727,13 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.) GCstr *s = lj_lib_checkstr(L, 1); int b = 0; switch (s->hash) { -#if LJ_64 case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */ -#else - case H_(662d3c79,d0e22477): b = 1; break; /* 32bit */ -#endif #if LJ_ARCH_HASFPU case H_(e33ee463,e33ee463): b = 1; break; /* fpu */ #endif -#if LJ_ABI_SOFTFP - case H_(61211a23,c2e8c81c): b = 1; break; /* softfp */ -#else case H_(539417a8,8ce0812f): b = 1; break; /* hardfp */ -#endif -#if LJ_ABI_EABI - case H_(2182df8f,f2ed1152): b = 1; break; /* eabi */ -#endif -#if LJ_ABI_WIN - case H_(4ab624a8,4ab624a8): b = 1; break; /* win */ -#endif case H_(3af93066,1f001464): b = 1; break; /* le/be */ -#if LJ_GC64 case H_(9e89d2c9,13c83c92): b = 1; break; /* gc64 */ -#endif default: break; } @@ -866,4 +849,3 @@ LUALIB_API int luaopen_ffi(lua_State *L) return 1; } -#endif diff --git a/src/lib_init.c b/src/lib_init.c index 2ed370e967..8fbb351c12 100644 --- a/src/lib_init.c +++ b/src/lib_init.c @@ -30,9 +30,7 @@ static const luaL_Reg lj_lib_load[] = { }; static const luaL_Reg lj_lib_preload[] = { -#if LJ_HASFFI { LUA_FFILIBNAME, luaopen_ffi }, -#endif { NULL, NULL } }; diff --git a/src/lib_io.c b/src/lib_io.c index 9763ed466f..d73af63f88 100644 --- a/src/lib_io.c +++ b/src/lib_io.c @@ -97,14 +97,7 @@ static int io_file_close(lua_State *L, IOFileUD *iof) ok = (fclose(iof->fp) == 0); } else if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_PIPE) { int stat = -1; -#if LJ_TARGET_POSIX stat = pclose(iof->fp); -#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE - stat = _pclose(iof->fp); -#else - lua_assert(0); - return 0; -#endif #if LJ_52 iof->fp = NULL; return luaL_execresult(L, stat); @@ -127,13 +120,6 @@ static int io_file_readnum(lua_State *L, FILE *fp) { lua_Number d; if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { - if (LJ_DUALNUM) { - int32_t i = lj_num2int(d); - if (d == (lua_Number)i && !tvismzero((cTValue *)&d)) { - setintV(L->top++, i); - return 1; - } - } setnumV(L->top++, d); return 1; } else { @@ -318,33 +304,15 @@ LJLIB_CF(io_method_seek) else if (opt == 2) opt = SEEK_END; o = L->base+2; if (o < L->top) { - if (tvisint(o)) - ofs = (int64_t)intV(o); - else if (tvisnum(o)) + if (tvisnum(o)) ofs = (int64_t)numV(o); else if (!tvisnil(o)) lj_err_argt(L, 3, LUA_TNUMBER); } -#if LJ_TARGET_POSIX res = fseeko(fp, ofs, opt); -#elif _MSC_VER >= 1400 - res = _fseeki64(fp, ofs, opt); -#elif defined(__MINGW32__) - res = fseeko64(fp, ofs, opt); -#else - res = fseek(fp, (long)ofs, opt); -#endif if (res) return luaL_fileresult(L, 0, NULL); -#if LJ_TARGET_POSIX ofs = ftello(fp); -#elif _MSC_VER >= 1400 - ofs = _ftelli64(fp); -#elif defined(__MINGW32__) - ofs = ftello64(fp); -#else - ofs = (int64_t)ftell(fp); -#endif setint64V(L->top-1, ofs); return 1; } @@ -406,32 +374,20 @@ LJLIB_CF(io_open) LJLIB_CF(io_popen) { -#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE) const char *fname = strdata(lj_lib_checkstr(L, 1)); GCstr *s = lj_lib_optstr(L, 2); const char *mode = s ? strdata(s) : "r"; IOFileUD *iof = io_file_new(L); iof->type = IOFILE_TYPE_PIPE; -#if LJ_TARGET_POSIX fflush(NULL); iof->fp = popen(fname, mode); -#else - iof->fp = _popen(fname, mode); -#endif return iof->fp != NULL ? 1 : luaL_fileresult(L, 0, fname); -#else - return luaL_error(L, LUA_QL("popen") " not supported"); -#endif } LJLIB_CF(io_tmpfile) { IOFileUD *iof = io_file_new(L); -#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA - iof->fp = NULL; errno = ENOSYS; -#else iof->fp = tmpfile(); -#endif return iof->fp != NULL ? 1 : luaL_fileresult(L, 0, NULL); } diff --git a/src/lib_jit.c b/src/lib_jit.c index 22ca0a1a24..0e88f1523c 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -18,21 +18,17 @@ #include "lj_tab.h" #include "lj_state.h" #include "lj_bc.h" -#if LJ_HASFFI #include "lj_ctype.h" -#endif -#if LJ_HASJIT #include "lj_ir.h" #include "lj_jit.h" #include "lj_ircall.h" #include "lj_iropt.h" #include "lj_target.h" -#endif #include "lj_trace.h" #include "lj_dispatch.h" #include "lj_vm.h" -#include "lj_vmevent.h" #include "lj_lib.h" +#include "lj_auditlog.h" #include "luajit.h" @@ -77,17 +73,28 @@ LJLIB_CF(jit_off) LJLIB_CF(jit_flush) { -#if LJ_HASJIT if (L->base < L->top && tvisnumber(L->base)) { int traceno = lj_lib_checkint(L, 1); luaJIT_setmode(L, traceno, LUAJIT_MODE_FLUSH|LUAJIT_MODE_TRACE); return 0; } -#endif return setjitmode(L, LUAJIT_MODE_FLUSH); } -#if LJ_HASJIT +LJLIB_CF(jit_auditlog) +{ + if (L->base < L->top && tvisstr(L->base)) { + /* XXX Support auditlog file size argument. */ + if (lj_auditlog_open(strdata(lj_lib_checkstr(L, 1)), 0)) { + return 0; + } else { + lj_err_caller(L, LJ_ERR_AUDITLOG); + } + } else { + lj_err_argtype(L, 1, "string filename"); + } +} + /* Push a string for every flag bit that is set. */ static void flagbits_to_strings(lua_State *L, uint32_t flags, uint32_t base, const char *str) @@ -96,48 +103,20 @@ static void flagbits_to_strings(lua_State *L, uint32_t flags, uint32_t base, if (flags & base) setstrV(L, L->top++, lj_str_new(L, str+1, *(uint8_t *)str)); } -#endif LJLIB_CF(jit_status) { -#if LJ_HASJIT jit_State *J = L2J(L); L->top = L->base; setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); return (int)(L->top - L->base); -#else - setboolV(L->top++, 0); - return 1; -#endif } -LJLIB_CF(jit_attach) +/* Calling this forces a trace stitch. */ +LJLIB_CF(jit_tracebarrier) { -#ifdef LUAJIT_DISABLE_VMEVENT - luaL_error(L, "vmevent API disabled"); -#else - GCfunc *fn = lj_lib_checkfunc(L, 1); - GCstr *s = lj_lib_optstr(L, 2); - luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE); - if (s) { /* Attach to given event. */ - const uint8_t *p = (const uint8_t *)strdata(s); - uint32_t h = s->len; - while (*p) h = h ^ (lj_rol(h, 6) + *p++); - lua_pushvalue(L, 1); - lua_rawseti(L, -2, VMEVENT_HASHIDX(h)); - G(L)->vmevmask = VMEVENT_NOCACHE; /* Invalidate cache. */ - } else { /* Detach if no event given. */ - setnilV(L->top++); - while (lua_next(L, -2)) { - L->top--; - if (tvisfunc(L->top) && funcV(L->top) == fn) { - setnilV(lj_tab_set(L, tabV(L->top-2), L->top-1)); - } - } - } -#endif return 0; } @@ -148,296 +127,8 @@ LJLIB_PUSH(top-2) LJLIB_SET(version) #include "lj_libdef.h" -/* -- jit.util.* functions ------------------------------------------------ */ - -#define LJLIB_MODULE_jit_util - -/* -- Reflection API for Lua functions ------------------------------------ */ - -/* Return prototype of first argument (Lua function or prototype object) */ -static GCproto *check_Lproto(lua_State *L, int nolua) -{ - TValue *o = L->base; - if (L->top > o) { - if (tvisproto(o)) { - return protoV(o); - } else if (tvisfunc(o)) { - if (isluafunc(funcV(o))) - return funcproto(funcV(o)); - else if (nolua) - return NULL; - } - } - lj_err_argt(L, 1, LUA_TFUNCTION); - return NULL; /* unreachable */ -} - -static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val) -{ - setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val); -} - -/* local info = jit.util.funcinfo(func [,pc]) */ -LJLIB_CF(jit_util_funcinfo) -{ - GCproto *pt = check_Lproto(L, 1); - if (pt) { - BCPos pc = (BCPos)lj_lib_optint(L, 2, 0); - GCtab *t; - lua_createtable(L, 0, 16); /* Increment hash size if fields are added. */ - t = tabV(L->top-1); - setintfield(L, t, "linedefined", pt->firstline); - setintfield(L, t, "lastlinedefined", pt->firstline + pt->numline); - setintfield(L, t, "stackslots", pt->framesize); - setintfield(L, t, "params", pt->numparams); - setintfield(L, t, "bytecodes", (int32_t)pt->sizebc); - setintfield(L, t, "gcconsts", (int32_t)pt->sizekgc); - setintfield(L, t, "nconsts", (int32_t)pt->sizekn); - setintfield(L, t, "upvalues", (int32_t)pt->sizeuv); - if (pc < pt->sizebc) - setintfield(L, t, "currentline", lj_debug_line(pt, pc)); - lua_pushboolean(L, (pt->flags & PROTO_VARARG)); - lua_setfield(L, -2, "isvararg"); - lua_pushboolean(L, (pt->flags & PROTO_CHILD)); - lua_setfield(L, -2, "children"); - setstrV(L, L->top++, proto_chunkname(pt)); - lua_setfield(L, -2, "source"); - lj_debug_pushloc(L, pt, pc); - lua_setfield(L, -2, "loc"); - setprotoV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "proto")), pt); - } else { - GCfunc *fn = funcV(L->base); - GCtab *t; - lua_createtable(L, 0, 4); /* Increment hash size if fields are added. */ - t = tabV(L->top-1); - if (!iscfunc(fn)) - setintfield(L, t, "ffid", fn->c.ffid); - setintptrV(lj_tab_setstr(L, t, lj_str_newlit(L, "addr")), - (intptr_t)(void *)fn->c.f); - setintfield(L, t, "upvalues", fn->c.nupvalues); - } - return 1; -} - -/* local ins, m = jit.util.funcbc(func, pc) */ -LJLIB_CF(jit_util_funcbc) -{ - GCproto *pt = check_Lproto(L, 0); - BCPos pc = (BCPos)lj_lib_checkint(L, 2); - if (pc < pt->sizebc) { - BCIns ins = proto_bc(pt)[pc]; - BCOp op = bc_op(ins); - lua_assert(op < BC__MAX); - setintV(L->top, ins); - setintV(L->top+1, lj_bc_mode[op]); - L->top += 2; - return 2; - } - return 0; -} - -/* local k = jit.util.funck(func, idx) */ -LJLIB_CF(jit_util_funck) -{ - GCproto *pt = check_Lproto(L, 0); - ptrdiff_t idx = (ptrdiff_t)lj_lib_checkint(L, 2); - if (idx >= 0) { - if (idx < (ptrdiff_t)pt->sizekn) { - copyTV(L, L->top-1, proto_knumtv(pt, idx)); - return 1; - } - } else { - if (~idx < (ptrdiff_t)pt->sizekgc) { - GCobj *gc = proto_kgc(pt, idx); - setgcV(L, L->top-1, gc, ~gc->gch.gct); - return 1; - } - } - return 0; -} - -/* local name = jit.util.funcuvname(func, idx) */ -LJLIB_CF(jit_util_funcuvname) -{ - GCproto *pt = check_Lproto(L, 0); - uint32_t idx = (uint32_t)lj_lib_checkint(L, 2); - if (idx < pt->sizeuv) { - setstrV(L, L->top-1, lj_str_newz(L, lj_debug_uvname(pt, idx))); - return 1; - } - return 0; -} - -/* -- Reflection API for traces ------------------------------------------- */ - -#if LJ_HASJIT - -/* Check trace argument. Must not throw for non-existent trace numbers. */ -static GCtrace *jit_checktrace(lua_State *L) -{ - TraceNo tr = (TraceNo)lj_lib_checkint(L, 1); - jit_State *J = L2J(L); - if (tr > 0 && tr < J->sizetrace) - return traceref(J, tr); - return NULL; -} - -/* Names of link types. ORDER LJ_TRLINK */ -static const char *const jit_trlinkname[] = { - "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", - "interpreter", "return", "stitch" -}; - -/* local info = jit.util.traceinfo(tr) */ -LJLIB_CF(jit_util_traceinfo) -{ - GCtrace *T = jit_checktrace(L); - if (T) { - GCtab *t; - lua_createtable(L, 0, 8); /* Increment hash size if fields are added. */ - t = tabV(L->top-1); - setintfield(L, t, "nins", (int32_t)T->nins - REF_BIAS - 1); - setintfield(L, t, "nk", REF_BIAS - (int32_t)T->nk); - setintfield(L, t, "link", T->link); - setintfield(L, t, "nexit", T->nsnap); - setstrV(L, L->top++, lj_str_newz(L, jit_trlinkname[T->linktype])); - lua_setfield(L, -2, "linktype"); - /* There are many more fields. Add them only when needed. */ - return 1; - } - return 0; -} - -/* local m, ot, op1, op2, prev = jit.util.traceir(tr, idx) */ -LJLIB_CF(jit_util_traceir) -{ - GCtrace *T = jit_checktrace(L); - IRRef ref = (IRRef)lj_lib_checkint(L, 2) + REF_BIAS; - if (T && ref >= REF_BIAS && ref < T->nins) { - IRIns *ir = &T->ir[ref]; - int32_t m = lj_ir_mode[ir->o]; - setintV(L->top-2, m); - setintV(L->top-1, ir->ot); - setintV(L->top++, (int32_t)ir->op1 - (irm_op1(m)==IRMref ? REF_BIAS : 0)); - setintV(L->top++, (int32_t)ir->op2 - (irm_op2(m)==IRMref ? REF_BIAS : 0)); - setintV(L->top++, ir->prev); - return 5; - } - return 0; -} - -/* local k, t [, slot] = jit.util.tracek(tr, idx) */ -LJLIB_CF(jit_util_tracek) -{ - GCtrace *T = jit_checktrace(L); - IRRef ref = (IRRef)lj_lib_checkint(L, 2) + REF_BIAS; - if (T && ref >= T->nk && ref < REF_BIAS) { - IRIns *ir = &T->ir[ref]; - int32_t slot = -1; - if (ir->o == IR_KSLOT) { - slot = ir->op2; - ir = &T->ir[ir->op1]; - } -#if LJ_HASFFI - if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) { - ptrdiff_t oldtop = savestack(L, L->top); - luaopen_ffi(L); /* Load FFI library on-demand. */ - L->top = restorestack(L, oldtop); - } -#endif - lj_ir_kvalue(L, L->top-2, ir); - setintV(L->top-1, (int32_t)irt_type(ir->t)); - if (slot == -1) - return 2; - setintV(L->top++, slot); - return 3; - } - return 0; -} - -/* local snap = jit.util.tracesnap(tr, sn) */ -LJLIB_CF(jit_util_tracesnap) -{ - GCtrace *T = jit_checktrace(L); - SnapNo sn = (SnapNo)lj_lib_checkint(L, 2); - if (T && sn < T->nsnap) { - SnapShot *snap = &T->snap[sn]; - SnapEntry *map = &T->snapmap[snap->mapofs]; - MSize n, nent = snap->nent; - GCtab *t; - lua_createtable(L, nent+2, 0); - t = tabV(L->top-1); - setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS); - setintV(lj_tab_setint(L, t, 1), (int32_t)snap->nslots); - for (n = 0; n < nent; n++) - setintV(lj_tab_setint(L, t, (int32_t)(n+2)), (int32_t)map[n]); - setintV(lj_tab_setint(L, t, (int32_t)(nent+2)), (int32_t)SNAP(255, 0, 0)); - return 1; - } - return 0; -} - -/* local mcode, addr, loop = jit.util.tracemc(tr) */ -LJLIB_CF(jit_util_tracemc) -{ - GCtrace *T = jit_checktrace(L); - if (T && T->mcode != NULL) { - setstrV(L, L->top-1, lj_str_new(L, (const char *)T->mcode, T->szmcode)); - setintptrV(L->top++, (intptr_t)(void *)T->mcode); - setintV(L->top++, T->mcloop); - return 3; - } - return 0; -} - -/* local addr = jit.util.traceexitstub([tr,] exitno) */ -LJLIB_CF(jit_util_traceexitstub) -{ -#ifdef EXITSTUBS_PER_GROUP - ExitNo exitno = (ExitNo)lj_lib_checkint(L, 1); - jit_State *J = L2J(L); - if (exitno < EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) { - setintptrV(L->top-1, (intptr_t)(void *)exitstub_addr(J, exitno)); - return 1; - } -#else - if (L->top > L->base+1) { /* Don't throw for one-argument variant. */ - GCtrace *T = jit_checktrace(L); - ExitNo exitno = (ExitNo)lj_lib_checkint(L, 2); - ExitNo maxexit = T->root ? T->nsnap+1 : T->nsnap; - if (T && T->mcode != NULL && exitno < maxexit) { - setintptrV(L->top-1, (intptr_t)(void *)exitstub_trace_addr(T, exitno)); - return 1; - } - } -#endif - return 0; -} - -/* local addr = jit.util.ircalladdr(idx) */ -LJLIB_CF(jit_util_ircalladdr) -{ - uint32_t idx = (uint32_t)lj_lib_checkint(L, 1); - if (idx < IRCALL__MAX) { - setintptrV(L->top-1, (intptr_t)(void *)lj_ir_callinfo[idx].func); - return 1; - } - return 0; -} - -#endif - -#include "lj_libdef.h" - -static int luaopen_jit_util(lua_State *L) -{ - LJ_LIB_REG(L, NULL, jit_util); - return 1; -} - /* -- jit.opt module ------------------------------------------------------ */ -#if LJ_HASJIT #define LJLIB_MODULE_jit_opt @@ -530,109 +221,55 @@ LJLIB_CF(jit_opt_start) #include "lj_libdef.h" -#endif - -/* -- jit.profile module -------------------------------------------------- */ +/* -- jit.vmprofile module ----------------------------------------------- */ -#if LJ_HASPROFILE +#define LJLIB_MODULE_jit_vmprofile -#define LJLIB_MODULE_jit_profile - -/* Not loaded by default, use: local profile = require("jit.profile") */ - -static const char KEY_PROFILE_THREAD = 't'; -static const char KEY_PROFILE_FUNC = 'f'; - -static void jit_profile_callback(lua_State *L2, lua_State *L, int samples, - int vmstate) +LJLIB_CF(jit_vmprofile_open) { - TValue key; - cTValue *tv; - setlightudV(&key, (void *)&KEY_PROFILE_FUNC); - tv = lj_tab_get(L, tabV(registry(L)), &key); - if (tvisfunc(tv)) { - char vmst = (char)vmstate; - int status; - setfuncV(L2, L2->top++, funcV(tv)); - setthreadV(L2, L2->top++, L); - setintV(L2->top++, samples); - setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1)); - status = lua_pcall(L2, 3, 0, 0); /* callback(thread, samples, vmstate) */ - if (status) { - if (G(L2)->panic) G(L2)->panic(L2); - exit(EXIT_FAILURE); - } - lj_trace_abort(G(L2)); + int nargs = (int)(L->top - L->base); + int nostart = nargs >= 3 ? boolV(L->base+2) : 0; + int noselect = nargs >= 2 ? boolV(L->base+1) : 0; + const char *filename = nargs >= 1 ? strdata(lj_lib_checkstr(L, 1)) : NULL; + if (filename) { + return luaJIT_vmprofile_open(L, filename, noselect, nostart); + } else { + lj_err_argtype(L, 1, "filename"); } } -/* profile.start(mode, cb) */ -LJLIB_CF(jit_profile_start) +LJLIB_CF(jit_vmprofile_close) { - GCtab *registry = tabV(registry(L)); - GCstr *mode = lj_lib_optstr(L, 1); - GCfunc *func = lj_lib_checkfunc(L, 2); - lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */ - TValue key; - /* Anchor thread and function in registry. */ - setlightudV(&key, (void *)&KEY_PROFILE_THREAD); - setthreadV(L, lj_tab_set(L, registry, &key), L2); - setlightudV(&key, (void *)&KEY_PROFILE_FUNC); - setfuncV(L, lj_tab_set(L, registry, &key), func); - lj_gc_anybarriert(L, registry); - luaJIT_profile_start(L, mode ? strdata(mode) : "", - (luaJIT_profile_callback)jit_profile_callback, L2); - return 0; + if (L->base < L->top && tvislightud(L->base)) { + return luaJIT_vmprofile_close(L, lightudV(L->base)); + } else { + lj_err_argtype(L, 1, "vmprofile"); + } } -/* profile.stop() */ -LJLIB_CF(jit_profile_stop) +LJLIB_CF(jit_vmprofile_select) { - GCtab *registry; - TValue key; - luaJIT_profile_stop(L); - registry = tabV(registry(L)); - setlightudV(&key, (void *)&KEY_PROFILE_THREAD); - setnilV(lj_tab_set(L, registry, &key)); - setlightudV(&key, (void *)&KEY_PROFILE_FUNC); - setnilV(lj_tab_set(L, registry, &key)); - lj_gc_anybarriert(L, registry); - return 0; + if (L->base < L->top && tvislightud(L->base)) { + return luaJIT_vmprofile_select(L, lightudV(L->base)); + } else { + lj_err_argtype(L, 1, "vmprofile"); + } } -/* dump = profile.dumpstack([thread,] fmt, depth) */ -LJLIB_CF(jit_profile_dumpstack) +LJLIB_CF(jit_vmprofile_start) { - lua_State *L2 = L; - int arg = 0; - size_t len; - int depth; - GCstr *fmt; - const char *p; - if (L->top > L->base && tvisthread(L->base)) { - L2 = threadV(L->base); - arg = 1; - } - fmt = lj_lib_checkstr(L, arg+1); - depth = lj_lib_checkint(L, arg+2); - p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len); - lua_pushlstring(L, p, len); - return 1; + return luaJIT_vmprofile_start(L); } -#include "lj_libdef.h" - -static int luaopen_jit_profile(lua_State *L) +LJLIB_CF(jit_vmprofile_stop) { - LJ_LIB_REG(L, NULL, jit_profile); - return 1; + return luaJIT_vmprofile_stop(L); } -#endif +#include "lj_libdef.h" /* -- JIT compiler initialization ----------------------------------------- */ -#if LJ_HASJIT /* Default values for JIT parameters. */ static const int32_t jit_param_default[JIT_P__MAX+1] = { #define JIT_PARAMINIT(len, name, value) (value), @@ -640,25 +277,15 @@ JIT_PARAMDEF(JIT_PARAMINIT) #undef JIT_PARAMINIT 0 }; -#endif - -#if LJ_TARGET_ARM && LJ_TARGET_LINUX -#include -#endif /* Arch-dependent CPU detection. */ static uint32_t jit_cpudetect(lua_State *L) { uint32_t flags = 0; -#if LJ_TARGET_X86ORX64 uint32_t vendor[4]; uint32_t features[4]; if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { -#if !LJ_HASJIT -#define JIT_F_SSE2 2 -#endif flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; -#if LJ_HASJIT flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; if (vendor[2] == 0x6c65746e) { /* Intel. */ @@ -674,67 +301,8 @@ static uint32_t jit_cpudetect(lua_State *L) lj_vm_cpuid(7, xfeatures); flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; } -#endif } /* Check for required instruction set support on x86 (unnecessary on x64). */ -#if LJ_TARGET_X86 - if (!(flags & JIT_F_SSE2)) - luaL_error(L, "CPU with SSE2 required"); -#endif -#elif LJ_TARGET_ARM -#if LJ_HASJIT - int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ -#if LJ_TARGET_LINUX - if (ver < 70) { /* Runtime ARM CPU detection. */ - struct utsname ut; - uname(&ut); - if (strncmp(ut.machine, "armv", 4) == 0) { - if (ut.machine[4] >= '7') - ver = 70; - else if (ut.machine[4] == '6') - ver = 60; - } - } -#endif - flags |= ver >= 70 ? JIT_F_ARMV7 : - ver >= 61 ? JIT_F_ARMV6T2_ : - ver >= 60 ? JIT_F_ARMV6_ : 0; - flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; -#endif -#elif LJ_TARGET_ARM64 - /* No optional CPU features to detect (for now). */ -#elif LJ_TARGET_PPC -#if LJ_HASJIT -#if LJ_ARCH_SQRT - flags |= JIT_F_SQRT; -#endif -#if LJ_ARCH_ROUND - flags |= JIT_F_ROUND; -#endif -#endif -#elif LJ_TARGET_MIPS -#if LJ_HASJIT - /* Compile-time MIPS CPU detection. */ -#if LJ_ARCH_VERSION >= 20 - flags |= JIT_F_MIPSXXR2; -#endif - /* Runtime MIPS CPU detection. */ -#if defined(__GNUC__) - if (!(flags & JIT_F_MIPSXXR2)) { - int x; -#ifdef __mips16 - x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */ -#else - /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ - __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); -#endif - if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ - } -#endif -#endif -#else -#error "Missing CPU detection for this architecture" -#endif UNUSED(L); return flags; } @@ -743,14 +311,10 @@ static uint32_t jit_cpudetect(lua_State *L) static void jit_init(lua_State *L) { uint32_t flags = jit_cpudetect(L); -#if LJ_HASJIT jit_State *J = L2J(L); J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; memcpy(J->param, jit_param_default, sizeof(J->param)); lj_dispatch_update(G(L)); -#else - UNUSED(flags); -#endif } LUALIB_API int luaopen_jit(lua_State *L) @@ -761,16 +325,8 @@ LUALIB_API int luaopen_jit(lua_State *L) lua_pushinteger(L, LUAJIT_VERSION_NUM); lua_pushliteral(L, LUAJIT_VERSION); LJ_LIB_REG(L, LUA_JITLIBNAME, jit); -#if LJ_HASPROFILE - lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile, - tabref(L->env)); -#endif -#ifndef LUAJIT_DISABLE_JITUTIL - lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env)); -#endif -#if LJ_HASJIT + LJ_LIB_REG(L, "jit.vmprofile", jit_vmprofile); LJ_LIB_REG(L, "jit.opt", jit_opt); -#endif L->top -= 2; return 1; } diff --git a/src/lib_math.c b/src/lib_math.c index 7bb03880bd..8000460326 100644 --- a/src/lib_math.c +++ b/src/lib_math.c @@ -78,11 +78,7 @@ LJLIB_ASM_(math_fmod) LJLIB_ASM(math_ldexp) LJLIB_REC(.) { lj_lib_checknum(L, 1); -#if LJ_DUALNUM && !LJ_TARGET_X86ORX64 - lj_lib_checkint(L, 2); -#else lj_lib_checknum(L, 2); -#endif return FFH_RETRY; } @@ -121,7 +117,7 @@ typedef union { uint64_t u64; double d; } U64double; r ^= z; rs->gen[i] = z; /* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */ -LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs) +LJ_NOINLINE uint64_t lj_math_random_step(RandomState *rs) { uint64_t z, r = 0; TW223_GEN(0, 63, 31, 18) @@ -161,42 +157,13 @@ LJLIB_CF(math_random) LJLIB_REC(.) u.u64 = lj_math_random_step(rs); d = u.d - 1.0; if (n > 0) { -#if LJ_DUALNUM - int isint = 1; - double r1; - lj_lib_checknumber(L, 1); - if (tvisint(L->base)) { - r1 = (lua_Number)intV(L->base); - } else { - isint = 0; - r1 = numV(L->base); - } -#else double r1 = lj_lib_checknum(L, 1); -#endif if (n == 1) { d = lj_vm_floor(d*r1) + 1.0; /* d is an int in range [1, r1] */ } else { -#if LJ_DUALNUM - double r2; - lj_lib_checknumber(L, 2); - if (tvisint(L->base+1)) { - r2 = (lua_Number)intV(L->base+1); - } else { - isint = 0; - r2 = numV(L->base+1); - } -#else double r2 = lj_lib_checknum(L, 2); -#endif d = lj_vm_floor(d*(r2-r1+1.0)) + r1; /* d is an int in range [r1, r2] */ } -#if LJ_DUALNUM - if (isint) { - setintV(L->top-1, lj_num2int(d)); - return 1; - } -#endif } /* else: d is a double in range [0, 1] */ setnumV(L->top++, d); return 1; @@ -221,10 +188,6 @@ LUALIB_API int luaopen_math(lua_State *L) rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); rs->valid = 0; /* Use lazy initialization to save some time on startup. */ LJ_LIB_REG(L, LUA_MATHLIBNAME, math); -#if defined(LUA_COMPAT_MOD) && !LJ_52 - lua_getfield(L, -1, "fmod"); - lua_setfield(L, -2, "mod"); -#endif return 1; } diff --git a/src/lib_os.c b/src/lib_os.c index 9e78d49ac3..43c57bff77 100644 --- a/src/lib_os.c +++ b/src/lib_os.c @@ -23,15 +23,9 @@ #include "lj_str.h" #include "lj_lib.h" -#if LJ_TARGET_POSIX #include -#else -#include -#endif -#if !LJ_TARGET_PSVITA #include -#endif /* ------------------------------------------------------------------------ */ @@ -76,11 +70,6 @@ LJLIB_CF(os_rename) LJLIB_CF(os_tmpname) { -#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA - lj_err_caller(L, LJ_ERR_OSUNIQF); - return 0; -#else -#if LJ_TARGET_POSIX char buf[15+1]; int fp; strcpy(buf, "/tmp/lua_XXXXXX"); @@ -89,23 +78,13 @@ LJLIB_CF(os_tmpname) close(fp); else lj_err_caller(L, LJ_ERR_OSUNIQF); -#else - char buf[L_tmpnam]; - if (tmpnam(buf) == NULL) - lj_err_caller(L, LJ_ERR_OSUNIQF); -#endif lua_pushstring(L, buf); return 1; -#endif } LJLIB_CF(os_getenv) { -#if LJ_TARGET_CONSOLE - lua_pushnil(L); -#else lua_pushstring(L, getenv(luaL_checkstring(L, 1))); /* if NULL push nil */ -#endif return 1; } @@ -173,22 +152,12 @@ LJLIB_CF(os_date) const char *s = luaL_optstring(L, 1, "%c"); time_t t = luaL_opt(L, (time_t)luaL_checknumber, 2, time(NULL)); struct tm *stm; -#if LJ_TARGET_POSIX struct tm rtm; -#endif if (*s == '!') { /* UTC? */ s++; /* Skip '!' */ -#if LJ_TARGET_POSIX stm = gmtime_r(&t, &rtm); -#else - stm = gmtime(&t); -#endif } else { -#if LJ_TARGET_POSIX stm = localtime_r(&t, &rtm); -#else - stm = localtime(&t); -#endif } if (stm == NULL) { /* Invalid date? */ setnilV(L->top++); @@ -262,9 +231,6 @@ LJLIB_CF(os_difftime) LJLIB_CF(os_setlocale) { -#if LJ_TARGET_PSVITA - lua_pushliteral(L, "C"); -#else GCstr *s = lj_lib_optstr(L, 1); const char *str = s ? strdata(s) : NULL; int opt = lj_lib_checkopt(L, 2, 6, @@ -276,7 +242,6 @@ LJLIB_CF(os_setlocale) else if (opt == 4) opt = LC_MONETARY; else if (opt == 6) opt = LC_ALL; lua_pushstring(L, setlocale(opt, str)); -#endif return 1; } diff --git a/src/lib_package.c b/src/lib_package.c index b7655c6b27..f4293e3433 100644 --- a/src/lib_package.c +++ b/src/lib_package.c @@ -32,7 +32,6 @@ #define SYMPREFIX_CF "luaopen_%s" #define SYMPREFIX_BC "luaJIT_BC_%s" -#if LJ_TARGET_DLOPEN #include @@ -65,120 +64,6 @@ static const char *ll_bcsym(void *lib, const char *sym) return (const char *)dlsym(lib, sym); } -#elif LJ_TARGET_WINDOWS - -#define WIN32_LEAN_AND_MEAN -#include - -#ifndef GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS -#define GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS 4 -#define GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT 2 -BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); -#endif - -#undef setprogdir - -static void setprogdir(lua_State *L) -{ - char buff[MAX_PATH + 1]; - char *lb; - DWORD nsize = sizeof(buff); - DWORD n = GetModuleFileNameA(NULL, buff, nsize); - if (n == 0 || n == nsize || (lb = strrchr(buff, '\\')) == NULL) { - luaL_error(L, "unable to get ModuleFileName"); - } else { - *lb = '\0'; - luaL_gsub(L, lua_tostring(L, -1), LUA_EXECDIR, buff); - lua_remove(L, -2); /* remove original string */ - } -} - -static void pusherror(lua_State *L) -{ - DWORD error = GetLastError(); -#if LJ_TARGET_XBOXONE - wchar_t wbuffer[128]; - char buffer[128*2]; - if (FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, - NULL, error, 0, wbuffer, sizeof(wbuffer)/sizeof(wchar_t), NULL) && - WideCharToMultiByte(CP_ACP, 0, wbuffer, 128, buffer, 128*2, NULL, NULL)) -#else - char buffer[128]; - if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, - NULL, error, 0, buffer, sizeof(buffer), NULL)) -#endif - lua_pushstring(L, buffer); - else - lua_pushfstring(L, "system error %d\n", error); -} - -static void ll_unloadlib(void *lib) -{ - FreeLibrary((HINSTANCE)lib); -} - -static void *ll_load(lua_State *L, const char *path, int gl) -{ - HINSTANCE lib = LoadLibraryExA(path, NULL, 0); - if (lib == NULL) pusherror(L); - UNUSED(gl); - return lib; -} - -static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) -{ - lua_CFunction f = (lua_CFunction)GetProcAddress((HINSTANCE)lib, sym); - if (f == NULL) pusherror(L); - return f; -} - -static const char *ll_bcsym(void *lib, const char *sym) -{ - if (lib) { - return (const char *)GetProcAddress((HINSTANCE)lib, sym); - } else { - HINSTANCE h = GetModuleHandleA(NULL); - const char *p = (const char *)GetProcAddress(h, sym); - if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - (const char *)ll_bcsym, &h)) - p = (const char *)GetProcAddress(h, sym); - return p; - } -} - -#else - -#undef PACKAGE_LIB_FAIL -#define PACKAGE_LIB_FAIL "absent" - -#define DLMSG "dynamic libraries not enabled; no support for target OS" - -static void ll_unloadlib(void *lib) -{ - UNUSED(lib); -} - -static void *ll_load(lua_State *L, const char *path, int gl) -{ - UNUSED(path); UNUSED(gl); - lua_pushliteral(L, DLMSG); - return NULL; -} - -static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) -{ - UNUSED(lib); UNUSED(sym); - lua_pushliteral(L, DLMSG); - return NULL; -} - -static const char *ll_bcsym(void *lib, const char *sym) -{ - UNUSED(lib); UNUSED(sym); - return NULL; -} - -#endif /* ------------------------------------------------------------------------ */ @@ -193,8 +78,7 @@ static void **ll_register(lua_State *L, const char *path) lua_pop(L, 1); plib = (void **)lua_newuserdata(L, sizeof(void *)); *plib = NULL; - luaL_getmetatable(L, "_LOADLIB"); - lua_setmetatable(L, -2); + luaL_setmetatable(L, "_LOADLIB"); lua_pushfstring(L, "LOADLIB: %s", path); lua_pushvalue(L, -2); lua_settable(L, LUA_REGISTRYINDEX); @@ -489,29 +373,19 @@ static void modinit(lua_State *L, const char *modname) static int lj_cf_package_module(lua_State *L) { const char *modname = luaL_checkstring(L, 1); - int loaded = lua_gettop(L) + 1; /* index of _LOADED table */ - lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); - lua_getfield(L, loaded, modname); /* get _LOADED[modname] */ - if (!lua_istable(L, -1)) { /* not found? */ - lua_pop(L, 1); /* remove previous result */ - /* try global variable (and create one if it does not exist) */ - if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL) - lj_err_callerv(L, LJ_ERR_BADMODN, modname); - lua_pushvalue(L, -1); - lua_setfield(L, loaded, modname); /* _LOADED[modname] = new table */ - } - /* check whether table already has a _NAME field */ + int lastarg = (int)(L->top - L->base); + luaL_pushmodule(L, modname, 1); lua_getfield(L, -1, "_NAME"); - if (!lua_isnil(L, -1)) { /* is table an initialized module? */ + if (!lua_isnil(L, -1)) { /* Module already initialized? */ lua_pop(L, 1); - } else { /* no; initialize it */ + } else { lua_pop(L, 1); modinit(L, modname); } lua_pushvalue(L, -1); setfenv(L); - dooptions(L, loaded - 1); - return 0; + dooptions(L, lastarg); + return LJ_52; } static int lj_cf_package_seeall(lua_State *L) @@ -534,12 +408,7 @@ static int lj_cf_package_seeall(lua_State *L) static void setpath(lua_State *L, const char *fieldname, const char *envname, const char *def, int noenv) { -#if LJ_TARGET_CONSOLE - const char *path = NULL; - UNUSED(envname); -#else const char *path = getenv(envname); -#endif if (path == NULL || noenv) { lua_pushstring(L, def); } else { @@ -582,8 +451,7 @@ LUALIB_API int luaopen_package(lua_State *L) lj_lib_pushcf(L, lj_cf_package_unloadlib, 1); lua_setfield(L, -2, "__gc"); luaL_register(L, LUA_LOADLIBNAME, package_lib); - lua_pushvalue(L, -1); - lua_replace(L, LUA_ENVIRONINDEX); + lua_copy(L, -1, LUA_ENVIRONINDEX); lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0); for (i = 0; package_loaders[i] != NULL; i++) { lj_lib_pushcf(L, package_loaders[i], 1); diff --git a/src/lib_string.c b/src/lib_string.c index c7f37bc752..417bdf1072 100644 --- a/src/lib_string.c +++ b/src/lib_string.c @@ -681,21 +681,10 @@ LJLIB_CF(string_format) LJLIB_REC(.) luaL_argerror(L, arg, lj_obj_typename[0]); switch (STRFMT_TYPE(sf)) { case STRFMT_INT: - if (tvisint(L->base+arg-1)) { - int32_t k = intV(L->base+arg-1); - if (sf == STRFMT_INT) - lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */ - else - lj_strfmt_putfxint(sb, sf, k); - } else { - lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg)); - } + lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg)); break; case STRFMT_UINT: - if (tvisint(L->base+arg-1)) - lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1)); - else - lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg)); + lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg)); break; case STRFMT_NUM: lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg)); @@ -737,10 +726,6 @@ LUALIB_API int luaopen_string(lua_State *L) GCtab *mt; global_State *g; LJ_LIB_REG(L, LUA_STRLIBNAME, string); -#if defined(LUA_COMPAT_GFIND) && !LJ_52 - lua_getfield(L, -1, "gmatch"); - lua_setfield(L, -2, "gfind"); -#endif mt = lj_tab_new(L, 0, 1); /* NOBARRIER: basemt is a GC root. */ g = G(L); diff --git a/src/lj_alloc.c b/src/lj_alloc.c deleted file mode 100644 index 95d15d046a..0000000000 --- a/src/lj_alloc.c +++ /dev/null @@ -1,1489 +0,0 @@ -/* -** Bundled memory allocator. -** -** Beware: this is a HEAVILY CUSTOMIZED version of dlmalloc. -** The original bears the following remark: -** -** This is a version (aka dlmalloc) of malloc/free/realloc written by -** Doug Lea and released to the public domain, as explained at -** http://creativecommons.org/licenses/publicdomain. -** -** * Version pre-2.8.4 Wed Mar 29 19:46:29 2006 (dl at gee) -** -** No additional copyright is claimed over the customizations. -** Please do NOT bother the original author about this version here! -** -** If you want to use dlmalloc in another project, you should get -** the original from: ftp://gee.cs.oswego.edu/pub/misc/ -** For thread-safe derivatives, take a look at: -** - ptmalloc: http://www.malloc.de/ -** - nedmalloc: http://www.nedprod.com/programs/portable/nedmalloc/ -*/ - -#define lj_alloc_c -#define LUA_CORE - -/* To get the mremap prototype. Must be defined before any system includes. */ -#if defined(__linux__) && !defined(_GNU_SOURCE) -#define _GNU_SOURCE -#endif - -#include "lj_def.h" -#include "lj_arch.h" -#include "lj_alloc.h" - -#ifndef LUAJIT_USE_SYSMALLOC - -#define MAX_SIZE_T (~(size_t)0) -#define MALLOC_ALIGNMENT ((size_t)8U) - -#define DEFAULT_GRANULARITY ((size_t)128U * (size_t)1024U) -#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) -#define DEFAULT_MMAP_THRESHOLD ((size_t)128U * (size_t)1024U) -#define MAX_RELEASE_CHECK_RATE 255 - -/* ------------------- size_t and alignment properties -------------------- */ - -/* The byte and bit size of a size_t */ -#define SIZE_T_SIZE (sizeof(size_t)) -#define SIZE_T_BITSIZE (sizeof(size_t) << 3) - -/* Some constants coerced to size_t */ -/* Annoying but necessary to avoid errors on some platforms */ -#define SIZE_T_ZERO ((size_t)0) -#define SIZE_T_ONE ((size_t)1) -#define SIZE_T_TWO ((size_t)2) -#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1) -#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2) -#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) - -/* The bit mask value corresponding to MALLOC_ALIGNMENT */ -#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE) - -/* the number of bytes to offset an address to align it */ -#define align_offset(A)\ - ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ - ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) - -/* -------------------------- MMAP support ------------------------------- */ - -#define MFAIL ((void *)(MAX_SIZE_T)) -#define CMFAIL ((char *)(MFAIL)) /* defined for convenience */ - -#define IS_DIRECT_BIT (SIZE_T_ONE) - - -/* Determine system-specific block allocation method. */ -#if LJ_TARGET_WINDOWS - -#define WIN32_LEAN_AND_MEAN -#include - -#define LJ_ALLOC_VIRTUALALLOC 1 - -#if LJ_64 && !LJ_GC64 -#define LJ_ALLOC_NTAVM 1 -#endif - -#else - -#include -/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */ -#include - -#define LJ_ALLOC_MMAP 1 - -#if LJ_64 - -#define LJ_ALLOC_MMAP_PROBE 1 - -#if LJ_GC64 -#define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */ -#elif LJ_TARGET_X64 && LJ_HASJIT -/* Due to limitations in the x64 compiler backend. */ -#define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */ -#else -#define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */ -#endif - -#endif - -#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT) -#define LJ_ALLOC_MMAP32 1 -#endif - -#if LJ_TARGET_LINUX -#define LJ_ALLOC_MREMAP 1 -#endif - -#endif - - -#if LJ_ALLOC_VIRTUALALLOC - -#if LJ_ALLOC_NTAVM -/* Undocumented, but hey, that's what we all love so much about Windows. */ -typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, - size_t *size, ULONG alloctype, ULONG prot); -static PNTAVM ntavm; - -/* Number of top bits of the lower 32 bits of an address that must be zero. -** Apparently 0 gives us full 64 bit addresses and 1 gives us the lower 2GB. -*/ -#define NTAVM_ZEROBITS 1 - -static void init_mmap(void) -{ - ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), - "NtAllocateVirtualMemory"); -} -#define INIT_MMAP() init_mmap() - -/* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ -static void *CALL_MMAP(size_t size) -{ - DWORD olderr = GetLastError(); - void *ptr = NULL; - long st = ntavm(INVALID_HANDLE_VALUE, &ptr, NTAVM_ZEROBITS, &size, - MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); - SetLastError(olderr); - return st == 0 ? ptr : MFAIL; -} - -/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static void *DIRECT_MMAP(size_t size) -{ - DWORD olderr = GetLastError(); - void *ptr = NULL; - long st = ntavm(INVALID_HANDLE_VALUE, &ptr, NTAVM_ZEROBITS, &size, - MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, PAGE_READWRITE); - SetLastError(olderr); - return st == 0 ? ptr : MFAIL; -} - -#else - -/* Win32 MMAP via VirtualAlloc */ -static void *CALL_MMAP(size_t size) -{ - DWORD olderr = GetLastError(); - void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); - SetLastError(olderr); - return ptr ? ptr : MFAIL; -} - -/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static void *DIRECT_MMAP(size_t size) -{ - DWORD olderr = GetLastError(); - void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, - PAGE_READWRITE); - SetLastError(olderr); - return ptr ? ptr : MFAIL; -} - -#endif - -/* This function supports releasing coalesed segments */ -static int CALL_MUNMAP(void *ptr, size_t size) -{ - DWORD olderr = GetLastError(); - MEMORY_BASIC_INFORMATION minfo; - char *cptr = (char *)ptr; - while (size) { - if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) - return -1; - if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || - minfo.State != MEM_COMMIT || minfo.RegionSize > size) - return -1; - if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) - return -1; - cptr += minfo.RegionSize; - size -= minfo.RegionSize; - } - SetLastError(olderr); - return 0; -} - -#elif LJ_ALLOC_MMAP - -#define MMAP_PROT (PROT_READ|PROT_WRITE) -#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) -#define MAP_ANONYMOUS MAP_ANON -#endif -#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) - -#if LJ_ALLOC_MMAP_PROBE - -#ifdef MAP_TRYFIXED -#define MMAP_FLAGS_PROBE (MMAP_FLAGS|MAP_TRYFIXED) -#else -#define MMAP_FLAGS_PROBE MMAP_FLAGS -#endif - -#define LJ_ALLOC_MMAP_PROBE_MAX 30 -#define LJ_ALLOC_MMAP_PROBE_LINEAR 5 - -#define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000) - -/* No point in a giant ifdef mess. Just try to open /dev/urandom. -** It doesn't really matter if this fails, since we get some ASLR bits from -** every unsuitable allocation, too. And we prefer linear allocation, anyway. -*/ -#include -#include - -static uintptr_t mmap_probe_seed(void) -{ - uintptr_t val; - int fd = open("/dev/urandom", O_RDONLY); - if (fd != -1) { - int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val)); - (void)close(fd); - if (ok) return val; - } - return 1; /* Punt. */ -} - -static void *mmap_probe(size_t size) -{ - /* Hint for next allocation. Doesn't need to be thread-safe. */ - static uintptr_t hint_addr = 0; - static uintptr_t hint_prng = 0; - int olderr = errno; - int retry; - for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) { - void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0); - uintptr_t addr = (uintptr_t)p; - if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER) { - /* We got a suitable address. Bump the hint address. */ - hint_addr = addr + size; - errno = olderr; - return p; - } - if (p != MFAIL) { - munmap(p, size); - } else if (errno == ENOMEM) { - return MFAIL; - } - if (hint_addr) { - /* First, try linear probing. */ - if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) { - hint_addr += 0x1000000; - if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0) - hint_addr = 0; - continue; - } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) { - /* Next, try a no-hint probe to get back an ASLR address. */ - hint_addr = 0; - continue; - } - } - /* Finally, try pseudo-random probing. */ - if (LJ_UNLIKELY(hint_prng == 0)) { - hint_prng = mmap_probe_seed(); - } - /* The unsuitable address we got has some ASLR PRNG bits. */ - hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1)); - do { /* The PRNG itself is very weak, but see above. */ - hint_prng = hint_prng * 1103515245 + 12345; - hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE; - hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1); - } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER); - } - errno = olderr; - return MFAIL; -} - -#endif - -#if LJ_ALLOC_MMAP32 - -#if defined(__sun__) -#define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000) -#else -#define LJ_ALLOC_MMAP32_START ((uintptr_t)0) -#endif - -static void *mmap_map32(size_t size) -{ -#if LJ_ALLOC_MMAP_PROBE - static int fallback = 0; - if (fallback) - return mmap_probe(size); -#endif - { - int olderr = errno; - void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); - errno = olderr; - /* This only allows 1GB on Linux. So fallback to probing to get 2GB. */ -#if LJ_ALLOC_MMAP_PROBE - if (ptr == MFAIL) { - fallback = 1; - return mmap_probe(size); - } -#endif - return ptr; - } -} - -#endif - -#if LJ_ALLOC_MMAP32 -#define CALL_MMAP(size) mmap_map32(size) -#elif LJ_ALLOC_MMAP_PROBE -#define CALL_MMAP(size) mmap_probe(size) -#else -static void *CALL_MMAP(size_t size) -{ - int olderr = errno; - void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); - errno = olderr; - return ptr; -} -#endif - -#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 - -#include - -static void init_mmap(void) -{ - struct rlimit rlim; - rlim.rlim_cur = rlim.rlim_max = 0x10000; - setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */ -} -#define INIT_MMAP() init_mmap() - -#endif - -static int CALL_MUNMAP(void *ptr, size_t size) -{ - int olderr = errno; - int ret = munmap(ptr, size); - errno = olderr; - return ret; -} - -#if LJ_ALLOC_MREMAP -/* Need to define _GNU_SOURCE to get the mremap prototype. */ -static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) -{ - int olderr = errno; - ptr = mremap(ptr, osz, nsz, flags); - errno = olderr; - return ptr; -} - -#define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) -#define CALL_MREMAP_NOMOVE 0 -#define CALL_MREMAP_MAYMOVE 1 -#if LJ_64 && !LJ_GC64 -#define CALL_MREMAP_MV CALL_MREMAP_NOMOVE -#else -#define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE -#endif -#endif - -#endif - - -#ifndef INIT_MMAP -#define INIT_MMAP() ((void)0) -#endif - -#ifndef DIRECT_MMAP -#define DIRECT_MMAP(s) CALL_MMAP(s) -#endif - -#ifndef CALL_MREMAP -#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) -#endif - -/* ----------------------- Chunk representations ------------------------ */ - -struct malloc_chunk { - size_t prev_foot; /* Size of previous chunk (if free). */ - size_t head; /* Size and inuse bits. */ - struct malloc_chunk *fd; /* double links -- used only if free. */ - struct malloc_chunk *bk; -}; - -typedef struct malloc_chunk mchunk; -typedef struct malloc_chunk *mchunkptr; -typedef struct malloc_chunk *sbinptr; /* The type of bins of chunks */ -typedef size_t bindex_t; /* Described below */ -typedef unsigned int binmap_t; /* Described below */ -typedef unsigned int flag_t; /* The type of various bit flag sets */ - -/* ------------------- Chunks sizes and alignments ----------------------- */ - -#define MCHUNK_SIZE (sizeof(mchunk)) - -#define CHUNK_OVERHEAD (SIZE_T_SIZE) - -/* Direct chunks need a second word of overhead ... */ -#define DIRECT_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) -/* ... and additional padding for fake next-chunk at foot */ -#define DIRECT_FOOT_PAD (FOUR_SIZE_T_SIZES) - -/* The smallest size we can malloc is an aligned minimal chunk */ -#define MIN_CHUNK_SIZE\ - ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) - -/* conversion from malloc headers to user pointers, and back */ -#define chunk2mem(p) ((void *)((char *)(p) + TWO_SIZE_T_SIZES)) -#define mem2chunk(mem) ((mchunkptr)((char *)(mem) - TWO_SIZE_T_SIZES)) -/* chunk associated with aligned address A */ -#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A))) - -/* Bounds on request (not chunk) sizes. */ -#define MAX_REQUEST ((~MIN_CHUNK_SIZE+1) << 2) -#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE) - -/* pad request bytes into a usable size */ -#define pad_request(req) \ - (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) - -/* pad request, checking for minimum (but not maximum) */ -#define request2size(req) \ - (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req)) - -/* ------------------ Operations on head and foot fields ----------------- */ - -#define PINUSE_BIT (SIZE_T_ONE) -#define CINUSE_BIT (SIZE_T_TWO) -#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT) - -/* Head value for fenceposts */ -#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE) - -/* extraction of fields from head words */ -#define cinuse(p) ((p)->head & CINUSE_BIT) -#define pinuse(p) ((p)->head & PINUSE_BIT) -#define chunksize(p) ((p)->head & ~(INUSE_BITS)) - -#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) -#define clear_cinuse(p) ((p)->head &= ~CINUSE_BIT) - -/* Treat space at ptr +/- offset as a chunk */ -#define chunk_plus_offset(p, s) ((mchunkptr)(((char *)(p)) + (s))) -#define chunk_minus_offset(p, s) ((mchunkptr)(((char *)(p)) - (s))) - -/* Ptr to next or previous physical malloc_chunk. */ -#define next_chunk(p) ((mchunkptr)(((char *)(p)) + ((p)->head & ~INUSE_BITS))) -#define prev_chunk(p) ((mchunkptr)(((char *)(p)) - ((p)->prev_foot) )) - -/* extract next chunk's pinuse bit */ -#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT) - -/* Get/set size at footer */ -#define get_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot) -#define set_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot = (s)) - -/* Set size, pinuse bit, and foot */ -#define set_size_and_pinuse_of_free_chunk(p, s)\ - ((p)->head = (s|PINUSE_BIT), set_foot(p, s)) - -/* Set size, pinuse bit, foot, and clear next pinuse */ -#define set_free_with_pinuse(p, s, n)\ - (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) - -#define is_direct(p)\ - (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_DIRECT_BIT)) - -/* Get the internal overhead associated with chunk p */ -#define overhead_for(p)\ - (is_direct(p)? DIRECT_CHUNK_OVERHEAD : CHUNK_OVERHEAD) - -/* ---------------------- Overlaid data structures ----------------------- */ - -struct malloc_tree_chunk { - /* The first four fields must be compatible with malloc_chunk */ - size_t prev_foot; - size_t head; - struct malloc_tree_chunk *fd; - struct malloc_tree_chunk *bk; - - struct malloc_tree_chunk *child[2]; - struct malloc_tree_chunk *parent; - bindex_t index; -}; - -typedef struct malloc_tree_chunk tchunk; -typedef struct malloc_tree_chunk *tchunkptr; -typedef struct malloc_tree_chunk *tbinptr; /* The type of bins of trees */ - -/* A little helper macro for trees */ -#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1]) - -/* ----------------------------- Segments -------------------------------- */ - -struct malloc_segment { - char *base; /* base address */ - size_t size; /* allocated size */ - struct malloc_segment *next; /* ptr to next segment */ -}; - -typedef struct malloc_segment msegment; -typedef struct malloc_segment *msegmentptr; - -/* ---------------------------- malloc_state ----------------------------- */ - -/* Bin types, widths and sizes */ -#define NSMALLBINS (32U) -#define NTREEBINS (32U) -#define SMALLBIN_SHIFT (3U) -#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT) -#define TREEBIN_SHIFT (8U) -#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT) -#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE) -#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD) - -struct malloc_state { - binmap_t smallmap; - binmap_t treemap; - size_t dvsize; - size_t topsize; - mchunkptr dv; - mchunkptr top; - size_t trim_check; - size_t release_checks; - mchunkptr smallbins[(NSMALLBINS+1)*2]; - tbinptr treebins[NTREEBINS]; - msegment seg; -}; - -typedef struct malloc_state *mstate; - -#define is_initialized(M) ((M)->top != 0) - -/* -------------------------- system alloc setup ------------------------- */ - -/* page-align a size */ -#define page_align(S)\ - (((S) + (LJ_PAGESIZE - SIZE_T_ONE)) & ~(LJ_PAGESIZE - SIZE_T_ONE)) - -/* granularity-align a size */ -#define granularity_align(S)\ - (((S) + (DEFAULT_GRANULARITY - SIZE_T_ONE))\ - & ~(DEFAULT_GRANULARITY - SIZE_T_ONE)) - -#if LJ_TARGET_WINDOWS -#define mmap_align(S) granularity_align(S) -#else -#define mmap_align(S) page_align(S) -#endif - -/* True if segment S holds address A */ -#define segment_holds(S, A)\ - ((char *)(A) >= S->base && (char *)(A) < S->base + S->size) - -/* Return segment holding given address */ -static msegmentptr segment_holding(mstate m, char *addr) -{ - msegmentptr sp = &m->seg; - for (;;) { - if (addr >= sp->base && addr < sp->base + sp->size) - return sp; - if ((sp = sp->next) == 0) - return 0; - } -} - -/* Return true if segment contains a segment link */ -static int has_segment_link(mstate m, msegmentptr ss) -{ - msegmentptr sp = &m->seg; - for (;;) { - if ((char *)sp >= ss->base && (char *)sp < ss->base + ss->size) - return 1; - if ((sp = sp->next) == 0) - return 0; - } -} - -/* - TOP_FOOT_SIZE is padding at the end of a segment, including space - that may be needed to place segment records and fenceposts when new - noncontiguous segments are added. -*/ -#define TOP_FOOT_SIZE\ - (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) - -/* ---------------------------- Indexing Bins ---------------------------- */ - -#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) -#define small_index(s) ((s) >> SMALLBIN_SHIFT) -#define small_index2size(i) ((i) << SMALLBIN_SHIFT) -#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE)) - -/* addressing by index. See above about smallbin repositioning */ -#define smallbin_at(M, i) ((sbinptr)((char *)&((M)->smallbins[(i)<<1]))) -#define treebin_at(M,i) (&((M)->treebins[i])) - -/* assign tree index for size S to variable I */ -#define compute_tree_index(S, I)\ -{\ - unsigned int X = (unsigned int)(S >> TREEBIN_SHIFT);\ - if (X == 0) {\ - I = 0;\ - } else if (X > 0xFFFF) {\ - I = NTREEBINS-1;\ - } else {\ - unsigned int K = lj_fls(X);\ - I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ - }\ -} - -/* Bit representing maximum resolved size in a treebin at i */ -#define bit_for_tree_index(i) \ - (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2) - -/* Shift placing maximum resolved bit in a treebin at i as sign bit */ -#define leftshift_for_tree_index(i) \ - ((i == NTREEBINS-1)? 0 : \ - ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2))) - -/* The size of the smallest chunk held in bin with index i */ -#define minsize_for_tree_index(i) \ - ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \ - (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1))) - -/* ------------------------ Operations on bin maps ----------------------- */ - -/* bit corresponding to given index */ -#define idx2bit(i) ((binmap_t)(1) << (i)) - -/* Mark/Clear bits with given index */ -#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i)) -#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i)) -#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i)) - -#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i)) -#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i)) -#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i)) - -/* mask with all bits to left of least bit of x on */ -#define left_bits(x) ((x<<1) | (~(x<<1)+1)) - -/* Set cinuse bit and pinuse bit of next chunk */ -#define set_inuse(M,p,s)\ - ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ - ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT) - -/* Set cinuse and pinuse of this chunk and pinuse of next chunk */ -#define set_inuse_and_pinuse(M,p,s)\ - ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ - ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT) - -/* Set size, cinuse and pinuse bit of this chunk */ -#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ - ((p)->head = (s|PINUSE_BIT|CINUSE_BIT)) - -/* ----------------------- Operations on smallbins ----------------------- */ - -/* Link a free chunk into a smallbin */ -#define insert_small_chunk(M, P, S) {\ - bindex_t I = small_index(S);\ - mchunkptr B = smallbin_at(M, I);\ - mchunkptr F = B;\ - if (!smallmap_is_marked(M, I))\ - mark_smallmap(M, I);\ - else\ - F = B->fd;\ - B->fd = P;\ - F->bk = P;\ - P->fd = F;\ - P->bk = B;\ -} - -/* Unlink a chunk from a smallbin */ -#define unlink_small_chunk(M, P, S) {\ - mchunkptr F = P->fd;\ - mchunkptr B = P->bk;\ - bindex_t I = small_index(S);\ - if (F == B) {\ - clear_smallmap(M, I);\ - } else {\ - F->bk = B;\ - B->fd = F;\ - }\ -} - -/* Unlink the first chunk from a smallbin */ -#define unlink_first_small_chunk(M, B, P, I) {\ - mchunkptr F = P->fd;\ - if (B == F) {\ - clear_smallmap(M, I);\ - } else {\ - B->fd = F;\ - F->bk = B;\ - }\ -} - -/* Replace dv node, binning the old one */ -/* Used only when dvsize known to be small */ -#define replace_dv(M, P, S) {\ - size_t DVS = M->dvsize;\ - if (DVS != 0) {\ - mchunkptr DV = M->dv;\ - insert_small_chunk(M, DV, DVS);\ - }\ - M->dvsize = S;\ - M->dv = P;\ -} - -/* ------------------------- Operations on trees ------------------------- */ - -/* Insert chunk into tree */ -#define insert_large_chunk(M, X, S) {\ - tbinptr *H;\ - bindex_t I;\ - compute_tree_index(S, I);\ - H = treebin_at(M, I);\ - X->index = I;\ - X->child[0] = X->child[1] = 0;\ - if (!treemap_is_marked(M, I)) {\ - mark_treemap(M, I);\ - *H = X;\ - X->parent = (tchunkptr)H;\ - X->fd = X->bk = X;\ - } else {\ - tchunkptr T = *H;\ - size_t K = S << leftshift_for_tree_index(I);\ - for (;;) {\ - if (chunksize(T) != S) {\ - tchunkptr *C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\ - K <<= 1;\ - if (*C != 0) {\ - T = *C;\ - } else {\ - *C = X;\ - X->parent = T;\ - X->fd = X->bk = X;\ - break;\ - }\ - } else {\ - tchunkptr F = T->fd;\ - T->fd = F->bk = X;\ - X->fd = F;\ - X->bk = T;\ - X->parent = 0;\ - break;\ - }\ - }\ - }\ -} - -#define unlink_large_chunk(M, X) {\ - tchunkptr XP = X->parent;\ - tchunkptr R;\ - if (X->bk != X) {\ - tchunkptr F = X->fd;\ - R = X->bk;\ - F->bk = R;\ - R->fd = F;\ - } else {\ - tchunkptr *RP;\ - if (((R = *(RP = &(X->child[1]))) != 0) ||\ - ((R = *(RP = &(X->child[0]))) != 0)) {\ - tchunkptr *CP;\ - while ((*(CP = &(R->child[1])) != 0) ||\ - (*(CP = &(R->child[0])) != 0)) {\ - R = *(RP = CP);\ - }\ - *RP = 0;\ - }\ - }\ - if (XP != 0) {\ - tbinptr *H = treebin_at(M, X->index);\ - if (X == *H) {\ - if ((*H = R) == 0) \ - clear_treemap(M, X->index);\ - } else {\ - if (XP->child[0] == X) \ - XP->child[0] = R;\ - else \ - XP->child[1] = R;\ - }\ - if (R != 0) {\ - tchunkptr C0, C1;\ - R->parent = XP;\ - if ((C0 = X->child[0]) != 0) {\ - R->child[0] = C0;\ - C0->parent = R;\ - }\ - if ((C1 = X->child[1]) != 0) {\ - R->child[1] = C1;\ - C1->parent = R;\ - }\ - }\ - }\ -} - -/* Relays to large vs small bin operations */ - -#define insert_chunk(M, P, S)\ - if (is_small(S)) { insert_small_chunk(M, P, S)\ - } else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); } - -#define unlink_chunk(M, P, S)\ - if (is_small(S)) { unlink_small_chunk(M, P, S)\ - } else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); } - -/* ----------------------- Direct-mmapping chunks ----------------------- */ - -static void *direct_alloc(size_t nb) -{ - size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); - if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */ - char *mm = (char *)(DIRECT_MMAP(mmsize)); - if (mm != CMFAIL) { - size_t offset = align_offset(chunk2mem(mm)); - size_t psize = mmsize - offset - DIRECT_FOOT_PAD; - mchunkptr p = (mchunkptr)(mm + offset); - p->prev_foot = offset | IS_DIRECT_BIT; - p->head = psize|CINUSE_BIT; - chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; - chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; - return chunk2mem(p); - } - } - return NULL; -} - -static mchunkptr direct_resize(mchunkptr oldp, size_t nb) -{ - size_t oldsize = chunksize(oldp); - if (is_small(nb)) /* Can't shrink direct regions below small size */ - return NULL; - /* Keep old chunk if big enough but not too big */ - if (oldsize >= nb + SIZE_T_SIZE && - (oldsize - nb) <= (DEFAULT_GRANULARITY >> 1)) { - return oldp; - } else { - size_t offset = oldp->prev_foot & ~IS_DIRECT_BIT; - size_t oldmmsize = oldsize + offset + DIRECT_FOOT_PAD; - size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); - char *cp = (char *)CALL_MREMAP((char *)oldp - offset, - oldmmsize, newmmsize, CALL_MREMAP_MV); - if (cp != CMFAIL) { - mchunkptr newp = (mchunkptr)(cp + offset); - size_t psize = newmmsize - offset - DIRECT_FOOT_PAD; - newp->head = psize|CINUSE_BIT; - chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; - chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; - return newp; - } - } - return NULL; -} - -/* -------------------------- mspace management -------------------------- */ - -/* Initialize top chunk and its size */ -static void init_top(mstate m, mchunkptr p, size_t psize) -{ - /* Ensure alignment */ - size_t offset = align_offset(chunk2mem(p)); - p = (mchunkptr)((char *)p + offset); - psize -= offset; - - m->top = p; - m->topsize = psize; - p->head = psize | PINUSE_BIT; - /* set size of fake trailing chunk holding overhead space only once */ - chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE; - m->trim_check = DEFAULT_TRIM_THRESHOLD; /* reset on each update */ -} - -/* Initialize bins for a new mstate that is otherwise zeroed out */ -static void init_bins(mstate m) -{ - /* Establish circular links for smallbins */ - bindex_t i; - for (i = 0; i < NSMALLBINS; i++) { - sbinptr bin = smallbin_at(m,i); - bin->fd = bin->bk = bin; - } -} - -/* Allocate chunk and prepend remainder with chunk in successor base. */ -static void *prepend_alloc(mstate m, char *newbase, char *oldbase, size_t nb) -{ - mchunkptr p = align_as_chunk(newbase); - mchunkptr oldfirst = align_as_chunk(oldbase); - size_t psize = (size_t)((char *)oldfirst - (char *)p); - mchunkptr q = chunk_plus_offset(p, nb); - size_t qsize = psize - nb; - set_size_and_pinuse_of_inuse_chunk(m, p, nb); - - /* consolidate remainder with first chunk of old base */ - if (oldfirst == m->top) { - size_t tsize = m->topsize += qsize; - m->top = q; - q->head = tsize | PINUSE_BIT; - } else if (oldfirst == m->dv) { - size_t dsize = m->dvsize += qsize; - m->dv = q; - set_size_and_pinuse_of_free_chunk(q, dsize); - } else { - if (!cinuse(oldfirst)) { - size_t nsize = chunksize(oldfirst); - unlink_chunk(m, oldfirst, nsize); - oldfirst = chunk_plus_offset(oldfirst, nsize); - qsize += nsize; - } - set_free_with_pinuse(q, qsize, oldfirst); - insert_chunk(m, q, qsize); - } - - return chunk2mem(p); -} - -/* Add a segment to hold a new noncontiguous region */ -static void add_segment(mstate m, char *tbase, size_t tsize) -{ - /* Determine locations and sizes of segment, fenceposts, old top */ - char *old_top = (char *)m->top; - msegmentptr oldsp = segment_holding(m, old_top); - char *old_end = oldsp->base + oldsp->size; - size_t ssize = pad_request(sizeof(struct malloc_segment)); - char *rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK); - size_t offset = align_offset(chunk2mem(rawsp)); - char *asp = rawsp + offset; - char *csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; - mchunkptr sp = (mchunkptr)csp; - msegmentptr ss = (msegmentptr)(chunk2mem(sp)); - mchunkptr tnext = chunk_plus_offset(sp, ssize); - mchunkptr p = tnext; - - /* reset top to new space */ - init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); - - /* Set up segment record */ - set_size_and_pinuse_of_inuse_chunk(m, sp, ssize); - *ss = m->seg; /* Push current record */ - m->seg.base = tbase; - m->seg.size = tsize; - m->seg.next = ss; - - /* Insert trailing fenceposts */ - for (;;) { - mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE); - p->head = FENCEPOST_HEAD; - if ((char *)(&(nextp->head)) < old_end) - p = nextp; - else - break; - } - - /* Insert the rest of old top into a bin as an ordinary free chunk */ - if (csp != old_top) { - mchunkptr q = (mchunkptr)old_top; - size_t psize = (size_t)(csp - old_top); - mchunkptr tn = chunk_plus_offset(q, psize); - set_free_with_pinuse(q, psize, tn); - insert_chunk(m, q, psize); - } -} - -/* -------------------------- System allocation -------------------------- */ - -static void *alloc_sys(mstate m, size_t nb) -{ - char *tbase = CMFAIL; - size_t tsize = 0; - - /* Directly map large chunks */ - if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) { - void *mem = direct_alloc(nb); - if (mem != 0) - return mem; - } - - { - size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; - size_t rsize = granularity_align(req); - if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */ - char *mp = (char *)(CALL_MMAP(rsize)); - if (mp != CMFAIL) { - tbase = mp; - tsize = rsize; - } - } - } - - if (tbase != CMFAIL) { - msegmentptr sp = &m->seg; - /* Try to merge with an existing segment */ - while (sp != 0 && tbase != sp->base + sp->size) - sp = sp->next; - if (sp != 0 && segment_holds(sp, m->top)) { /* append */ - sp->size += tsize; - init_top(m, m->top, m->topsize + tsize); - } else { - sp = &m->seg; - while (sp != 0 && sp->base != tbase + tsize) - sp = sp->next; - if (sp != 0) { - char *oldbase = sp->base; - sp->base = tbase; - sp->size += tsize; - return prepend_alloc(m, tbase, oldbase, nb); - } else { - add_segment(m, tbase, tsize); - } - } - - if (nb < m->topsize) { /* Allocate from new or extended top space */ - size_t rsize = m->topsize -= nb; - mchunkptr p = m->top; - mchunkptr r = m->top = chunk_plus_offset(p, nb); - r->head = rsize | PINUSE_BIT; - set_size_and_pinuse_of_inuse_chunk(m, p, nb); - return chunk2mem(p); - } - } - - return NULL; -} - -/* ----------------------- system deallocation -------------------------- */ - -/* Unmap and unlink any mmapped segments that don't contain used chunks */ -static size_t release_unused_segments(mstate m) -{ - size_t released = 0; - size_t nsegs = 0; - msegmentptr pred = &m->seg; - msegmentptr sp = pred->next; - while (sp != 0) { - char *base = sp->base; - size_t size = sp->size; - msegmentptr next = sp->next; - nsegs++; - { - mchunkptr p = align_as_chunk(base); - size_t psize = chunksize(p); - /* Can unmap if first chunk holds entire segment and not pinned */ - if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) { - tchunkptr tp = (tchunkptr)p; - if (p == m->dv) { - m->dv = 0; - m->dvsize = 0; - } else { - unlink_large_chunk(m, tp); - } - if (CALL_MUNMAP(base, size) == 0) { - released += size; - /* unlink obsoleted record */ - sp = pred; - sp->next = next; - } else { /* back out if cannot unmap */ - insert_large_chunk(m, tp, psize); - } - } - } - pred = sp; - sp = next; - } - /* Reset check counter */ - m->release_checks = nsegs > MAX_RELEASE_CHECK_RATE ? - nsegs : MAX_RELEASE_CHECK_RATE; - return released; -} - -static int alloc_trim(mstate m, size_t pad) -{ - size_t released = 0; - if (pad < MAX_REQUEST && is_initialized(m)) { - pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ - - if (m->topsize > pad) { - /* Shrink top space in granularity-size units, keeping at least one */ - size_t unit = DEFAULT_GRANULARITY; - size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - - SIZE_T_ONE) * unit; - msegmentptr sp = segment_holding(m, (char *)m->top); - - if (sp->size >= extra && - !has_segment_link(m, sp)) { /* can't shrink if pinned */ - size_t newsize = sp->size - extra; - /* Prefer mremap, fall back to munmap */ - if ((CALL_MREMAP(sp->base, sp->size, newsize, CALL_MREMAP_NOMOVE) != MFAIL) || - (CALL_MUNMAP(sp->base + newsize, extra) == 0)) { - released = extra; - } - } - - if (released != 0) { - sp->size -= released; - init_top(m, m->top, m->topsize - released); - } - } - - /* Unmap any unused mmapped segments */ - released += release_unused_segments(m); - - /* On failure, disable autotrim to avoid repeated failed future calls */ - if (released == 0 && m->topsize > m->trim_check) - m->trim_check = MAX_SIZE_T; - } - - return (released != 0)? 1 : 0; -} - -/* ---------------------------- malloc support --------------------------- */ - -/* allocate a large request from the best fitting chunk in a treebin */ -static void *tmalloc_large(mstate m, size_t nb) -{ - tchunkptr v = 0; - size_t rsize = ~nb+1; /* Unsigned negation */ - tchunkptr t; - bindex_t idx; - compute_tree_index(nb, idx); - - if ((t = *treebin_at(m, idx)) != 0) { - /* Traverse tree for this bin looking for node with size == nb */ - size_t sizebits = nb << leftshift_for_tree_index(idx); - tchunkptr rst = 0; /* The deepest untaken right subtree */ - for (;;) { - tchunkptr rt; - size_t trem = chunksize(t) - nb; - if (trem < rsize) { - v = t; - if ((rsize = trem) == 0) - break; - } - rt = t->child[1]; - t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; - if (rt != 0 && rt != t) - rst = rt; - if (t == 0) { - t = rst; /* set t to least subtree holding sizes > nb */ - break; - } - sizebits <<= 1; - } - } - - if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ - binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; - if (leftbits != 0) - t = *treebin_at(m, lj_ffs(leftbits)); - } - - while (t != 0) { /* find smallest of tree or subtree */ - size_t trem = chunksize(t) - nb; - if (trem < rsize) { - rsize = trem; - v = t; - } - t = leftmost_child(t); - } - - /* If dv is a better fit, return NULL so malloc will use it */ - if (v != 0 && rsize < (size_t)(m->dvsize - nb)) { - mchunkptr r = chunk_plus_offset(v, nb); - unlink_large_chunk(m, v); - if (rsize < MIN_CHUNK_SIZE) { - set_inuse_and_pinuse(m, v, (rsize + nb)); - } else { - set_size_and_pinuse_of_inuse_chunk(m, v, nb); - set_size_and_pinuse_of_free_chunk(r, rsize); - insert_chunk(m, r, rsize); - } - return chunk2mem(v); - } - return NULL; -} - -/* allocate a small request from the best fitting chunk in a treebin */ -static void *tmalloc_small(mstate m, size_t nb) -{ - tchunkptr t, v; - mchunkptr r; - size_t rsize; - bindex_t i = lj_ffs(m->treemap); - - v = t = *treebin_at(m, i); - rsize = chunksize(t) - nb; - - while ((t = leftmost_child(t)) != 0) { - size_t trem = chunksize(t) - nb; - if (trem < rsize) { - rsize = trem; - v = t; - } - } - - r = chunk_plus_offset(v, nb); - unlink_large_chunk(m, v); - if (rsize < MIN_CHUNK_SIZE) { - set_inuse_and_pinuse(m, v, (rsize + nb)); - } else { - set_size_and_pinuse_of_inuse_chunk(m, v, nb); - set_size_and_pinuse_of_free_chunk(r, rsize); - replace_dv(m, r, rsize); - } - return chunk2mem(v); -} - -/* ----------------------------------------------------------------------- */ - -void *lj_alloc_create(void) -{ - size_t tsize = DEFAULT_GRANULARITY; - char *tbase; - INIT_MMAP(); - tbase = (char *)(CALL_MMAP(tsize)); - if (tbase != CMFAIL) { - size_t msize = pad_request(sizeof(struct malloc_state)); - mchunkptr mn; - mchunkptr msp = align_as_chunk(tbase); - mstate m = (mstate)(chunk2mem(msp)); - memset(m, 0, msize); - msp->head = (msize|PINUSE_BIT|CINUSE_BIT); - m->seg.base = tbase; - m->seg.size = tsize; - m->release_checks = MAX_RELEASE_CHECK_RATE; - init_bins(m); - mn = next_chunk(mem2chunk(m)); - init_top(m, mn, (size_t)((tbase + tsize) - (char *)mn) - TOP_FOOT_SIZE); - return m; - } - return NULL; -} - -void lj_alloc_destroy(void *msp) -{ - mstate ms = (mstate)msp; - msegmentptr sp = &ms->seg; - while (sp != 0) { - char *base = sp->base; - size_t size = sp->size; - sp = sp->next; - CALL_MUNMAP(base, size); - } -} - -static LJ_NOINLINE void *lj_alloc_malloc(void *msp, size_t nsize) -{ - mstate ms = (mstate)msp; - void *mem; - size_t nb; - if (nsize <= MAX_SMALL_REQUEST) { - bindex_t idx; - binmap_t smallbits; - nb = (nsize < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(nsize); - idx = small_index(nb); - smallbits = ms->smallmap >> idx; - - if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ - mchunkptr b, p; - idx += ~smallbits & 1; /* Uses next bin if idx empty */ - b = smallbin_at(ms, idx); - p = b->fd; - unlink_first_small_chunk(ms, b, p, idx); - set_inuse_and_pinuse(ms, p, small_index2size(idx)); - mem = chunk2mem(p); - return mem; - } else if (nb > ms->dvsize) { - if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ - mchunkptr b, p, r; - size_t rsize; - binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); - bindex_t i = lj_ffs(leftbits); - b = smallbin_at(ms, i); - p = b->fd; - unlink_first_small_chunk(ms, b, p, i); - rsize = small_index2size(i) - nb; - /* Fit here cannot be remainderless if 4byte sizes */ - if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) { - set_inuse_and_pinuse(ms, p, small_index2size(i)); - } else { - set_size_and_pinuse_of_inuse_chunk(ms, p, nb); - r = chunk_plus_offset(p, nb); - set_size_and_pinuse_of_free_chunk(r, rsize); - replace_dv(ms, r, rsize); - } - mem = chunk2mem(p); - return mem; - } else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) { - return mem; - } - } - } else if (nsize >= MAX_REQUEST) { - nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ - } else { - nb = pad_request(nsize); - if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) { - return mem; - } - } - - if (nb <= ms->dvsize) { - size_t rsize = ms->dvsize - nb; - mchunkptr p = ms->dv; - if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ - mchunkptr r = ms->dv = chunk_plus_offset(p, nb); - ms->dvsize = rsize; - set_size_and_pinuse_of_free_chunk(r, rsize); - set_size_and_pinuse_of_inuse_chunk(ms, p, nb); - } else { /* exhaust dv */ - size_t dvs = ms->dvsize; - ms->dvsize = 0; - ms->dv = 0; - set_inuse_and_pinuse(ms, p, dvs); - } - mem = chunk2mem(p); - return mem; - } else if (nb < ms->topsize) { /* Split top */ - size_t rsize = ms->topsize -= nb; - mchunkptr p = ms->top; - mchunkptr r = ms->top = chunk_plus_offset(p, nb); - r->head = rsize | PINUSE_BIT; - set_size_and_pinuse_of_inuse_chunk(ms, p, nb); - mem = chunk2mem(p); - return mem; - } - return alloc_sys(ms, nb); -} - -static LJ_NOINLINE void *lj_alloc_free(void *msp, void *ptr) -{ - if (ptr != 0) { - mchunkptr p = mem2chunk(ptr); - mstate fm = (mstate)msp; - size_t psize = chunksize(p); - mchunkptr next = chunk_plus_offset(p, psize); - if (!pinuse(p)) { - size_t prevsize = p->prev_foot; - if ((prevsize & IS_DIRECT_BIT) != 0) { - prevsize &= ~IS_DIRECT_BIT; - psize += prevsize + DIRECT_FOOT_PAD; - CALL_MUNMAP((char *)p - prevsize, psize); - return NULL; - } else { - mchunkptr prev = chunk_minus_offset(p, prevsize); - psize += prevsize; - p = prev; - /* consolidate backward */ - if (p != fm->dv) { - unlink_chunk(fm, p, prevsize); - } else if ((next->head & INUSE_BITS) == INUSE_BITS) { - fm->dvsize = psize; - set_free_with_pinuse(p, psize, next); - return NULL; - } - } - } - if (!cinuse(next)) { /* consolidate forward */ - if (next == fm->top) { - size_t tsize = fm->topsize += psize; - fm->top = p; - p->head = tsize | PINUSE_BIT; - if (p == fm->dv) { - fm->dv = 0; - fm->dvsize = 0; - } - if (tsize > fm->trim_check) - alloc_trim(fm, 0); - return NULL; - } else if (next == fm->dv) { - size_t dsize = fm->dvsize += psize; - fm->dv = p; - set_size_and_pinuse_of_free_chunk(p, dsize); - return NULL; - } else { - size_t nsize = chunksize(next); - psize += nsize; - unlink_chunk(fm, next, nsize); - set_size_and_pinuse_of_free_chunk(p, psize); - if (p == fm->dv) { - fm->dvsize = psize; - return NULL; - } - } - } else { - set_free_with_pinuse(p, psize, next); - } - - if (is_small(psize)) { - insert_small_chunk(fm, p, psize); - } else { - tchunkptr tp = (tchunkptr)p; - insert_large_chunk(fm, tp, psize); - if (--fm->release_checks == 0) - release_unused_segments(fm); - } - } - return NULL; -} - -static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize) -{ - if (nsize >= MAX_REQUEST) { - return NULL; - } else { - mstate m = (mstate)msp; - mchunkptr oldp = mem2chunk(ptr); - size_t oldsize = chunksize(oldp); - mchunkptr next = chunk_plus_offset(oldp, oldsize); - mchunkptr newp = 0; - size_t nb = request2size(nsize); - - /* Try to either shrink or extend into top. Else malloc-copy-free */ - if (is_direct(oldp)) { - newp = direct_resize(oldp, nb); /* this may return NULL. */ - } else if (oldsize >= nb) { /* already big enough */ - size_t rsize = oldsize - nb; - newp = oldp; - if (rsize >= MIN_CHUNK_SIZE) { - mchunkptr rem = chunk_plus_offset(newp, nb); - set_inuse(m, newp, nb); - set_inuse(m, rem, rsize); - lj_alloc_free(m, chunk2mem(rem)); - } - } else if (next == m->top && oldsize + m->topsize > nb) { - /* Expand into top */ - size_t newsize = oldsize + m->topsize; - size_t newtopsize = newsize - nb; - mchunkptr newtop = chunk_plus_offset(oldp, nb); - set_inuse(m, oldp, nb); - newtop->head = newtopsize |PINUSE_BIT; - m->top = newtop; - m->topsize = newtopsize; - newp = oldp; - } - - if (newp != 0) { - return chunk2mem(newp); - } else { - void *newmem = lj_alloc_malloc(m, nsize); - if (newmem != 0) { - size_t oc = oldsize - overhead_for(oldp); - memcpy(newmem, ptr, oc < nsize ? oc : nsize); - lj_alloc_free(m, ptr); - } - return newmem; - } - } -} - -void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize) -{ - (void)osize; - if (nsize == 0) { - return lj_alloc_free(msp, ptr); - } else if (ptr == NULL) { - return lj_alloc_malloc(msp, nsize); - } else { - return lj_alloc_realloc(msp, ptr, nsize); - } -} - -#endif diff --git a/src/lj_alloc.h b/src/lj_alloc.h deleted file mode 100644 index f87a7cf342..0000000000 --- a/src/lj_alloc.h +++ /dev/null @@ -1,17 +0,0 @@ -/* -** Bundled memory allocator. -** Donated to the public domain. -*/ - -#ifndef _LJ_ALLOC_H -#define _LJ_ALLOC_H - -#include "lj_def.h" - -#ifndef LUAJIT_USE_SYSMALLOC -LJ_FUNC void *lj_alloc_create(void); -LJ_FUNC void lj_alloc_destroy(void *msp); -LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize); -#endif - -#endif diff --git a/src/lj_api.c b/src/lj_api.c index d1be3abf57..0412c1a568 100644 --- a/src/lj_api.c +++ b/src/lj_api.c @@ -112,6 +112,13 @@ LUA_API void lua_xmove(lua_State *from, lua_State *to, int n) from->top = f; } +LUA_API const lua_Number *lua_version(lua_State *L) +{ + static const lua_Number version = LUA_VERSION_NUM; + UNUSED(L); + return &version; +} + /* -- Stack manipulation -------------------------------------------------- */ LUA_API int lua_gettop(lua_State *L) @@ -152,30 +159,40 @@ LUA_API void lua_insert(lua_State *L, int idx) copyTV(L, p, L->top); } -LUA_API void lua_replace(lua_State *L, int idx) +static void copy_slot(lua_State *L, TValue *f, int idx) { - api_checknelems(L, 1); if (idx == LUA_GLOBALSINDEX) { - api_check(L, tvistab(L->top-1)); + api_check(L, tvistab(f)); /* NOBARRIER: A thread (i.e. L) is never black. */ - setgcref(L->env, obj2gco(tabV(L->top-1))); + setgcref(L->env, obj2gco(tabV(f))); } else if (idx == LUA_ENVIRONINDEX) { GCfunc *fn = curr_func(L); if (fn->c.gct != ~LJ_TFUNC) lj_err_msg(L, LJ_ERR_NOENV); - api_check(L, tvistab(L->top-1)); - setgcref(fn->c.env, obj2gco(tabV(L->top-1))); - lj_gc_barrier(L, fn, L->top-1); + api_check(L, tvistab(f)); + setgcref(fn->c.env, obj2gco(tabV(f))); + lj_gc_barrier(L, fn, f); } else { TValue *o = index2adr(L, idx); api_checkvalidindex(L, o); - copyTV(L, o, L->top-1); + copyTV(L, o, f); if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ - lj_gc_barrier(L, curr_func(L), L->top-1); + lj_gc_barrier(L, curr_func(L), f); } +} + +LUA_API void lua_replace(lua_State *L, int idx) +{ + api_checknelems(L, 1); + copy_slot(L, L->top - 1, idx); L->top--; } +LUA_API void lua_copy(lua_State *L, int fromidx, int toidx) +{ + copy_slot(L, index2adr(L, fromidx), toidx); +} + LUA_API void lua_pushvalue(lua_State *L, int idx) { copyTV(L, L->top, index2adr(L, idx)); @@ -189,19 +206,11 @@ LUA_API int lua_type(lua_State *L, int idx) cTValue *o = index2adr(L, idx); if (tvisnumber(o)) { return LUA_TNUMBER; -#if LJ_64 && !LJ_GC64 - } else if (tvislightud(o)) { - return LUA_TLIGHTUSERDATA; -#endif } else if (o == niltv(L)) { return LUA_TNONE; } else { /* Magic internal/external tag conversion. ORDER LJ_T */ uint32_t t = ~itype(o); -#if LJ_64 int tt = (int)((U64x(75a06,98042110) >> 4*t) & 15u); -#else - int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) & 15u); -#endif lua_assert(tt != LUA_TNIL || tvisnil(o)); return tt; } @@ -261,18 +270,12 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2) { cTValue *o1 = index2adr(L, idx1); cTValue *o2 = index2adr(L, idx2); - if (tvisint(o1) && tvisint(o2)) { - return intV(o1) == intV(o2); - } else if (tvisnumber(o1) && tvisnumber(o2)) { + if (tvisnumber(o1) && tvisnumber(o2)) { return numberVnum(o1) == numberVnum(o2); } else if (itype(o1) != itype(o2)) { return 0; } else if (tvispri(o1)) { return o1 != niltv(L) && o2 != niltv(L); -#if LJ_64 && !LJ_GC64 - } else if (tvislightud(o1)) { - return o1->u64 == o2->u64; -#endif } else if (gcrefeq(o1->gcr, o2->gcr)) { return 1; } else if (!tvistabud(o1)) { @@ -296,8 +299,6 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2) cTValue *o2 = index2adr(L, idx2); if (o1 == niltv(L) || o2 == niltv(L)) { return 0; - } else if (tvisint(o1) && tvisint(o2)) { - return intV(o1) < intV(o2); } else if (tvisnumber(o1) && tvisnumber(o2)) { return numberVnum(o1) < numberVnum(o2); } else { @@ -325,6 +326,22 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx) return 0; } +LUA_API lua_Number lua_tonumberx(lua_State *L, int idx, int *ok) +{ + cTValue *o = index2adr(L, idx); + TValue tmp; + if (LJ_LIKELY(tvisnumber(o))) { + if (ok) *ok = 1; + return numberVnum(o); + } else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp)) { + if (ok) *ok = 1; + return numV(&tmp); + } else { + if (ok) *ok = 0; + return 0; + } +} + LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) { cTValue *o = index2adr(L, idx); @@ -354,22 +371,32 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) cTValue *o = index2adr(L, idx); TValue tmp; lua_Number n; - if (LJ_LIKELY(tvisint(o))) { - return intV(o); - } else if (LJ_LIKELY(tvisnum(o))) { + if (LJ_LIKELY(tvisnum(o))) { n = numV(o); } else { if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) return 0; - if (tvisint(&tmp)) - return (lua_Integer)intV(&tmp); n = numV(&tmp); } -#if LJ_64 return (lua_Integer)n; -#else - return lj_num2int(n); -#endif +} + +LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) +{ + cTValue *o = index2adr(L, idx); + TValue tmp; + lua_Number n; + if (LJ_LIKELY(tvisnum(o))) { + n = numV(o); + } else { + if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) { + if (ok) *ok = 0; + return 0; + } + n = numV(&tmp); + } + if (ok) *ok = 1; + return (lua_Integer)n; } LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) @@ -377,22 +404,14 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) cTValue *o = index2adr(L, idx); TValue tmp; lua_Number n; - if (LJ_LIKELY(tvisint(o))) { - return intV(o); - } else if (LJ_LIKELY(tvisnum(o))) { + if (LJ_LIKELY(tvisnum(o))) { n = numV(o); } else { if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) lj_err_argt(L, idx, LUA_TNUMBER); - if (tvisint(&tmp)) - return (lua_Integer)intV(&tmp); n = numV(&tmp); } -#if LJ_64 return (lua_Integer)n; -#else - return lj_num2int(n); -#endif } LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) @@ -400,24 +419,16 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) cTValue *o = index2adr(L, idx); TValue tmp; lua_Number n; - if (LJ_LIKELY(tvisint(o))) { - return intV(o); - } else if (LJ_LIKELY(tvisnum(o))) { + if (LJ_LIKELY(tvisnum(o))) { n = numV(o); } else if (tvisnil(o)) { return def; } else { if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) lj_err_argt(L, idx, LUA_TNUMBER); - if (tvisint(&tmp)) - return (lua_Integer)intV(&tmp); n = numV(&tmp); } -#if LJ_64 return (lua_Integer)n; -#else - return lj_num2int(n); -#endif } LUA_API int lua_toboolean(lua_State *L, int idx) @@ -858,7 +869,7 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2) lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1])); } -LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) +LUALIB_API void *luaL_testudata(lua_State *L, int idx, const char *tname) { cTValue *o = index2adr(L, idx); if (tvisudata(o)) { @@ -867,8 +878,14 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable)) return uddata(ud); } - lj_err_argtype(L, idx, tname); - return NULL; /* unreachable */ + return NULL; /* value is not a userdata with a metatable */ +} + +LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) +{ + void *p = luaL_testudata(L, idx, tname); + if (!p) lj_err_argtype(L, idx, tname); + return p; } /* -- Object setters ------------------------------------------------------ */ @@ -977,6 +994,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) return 1; } +LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname) +{ + lua_getfield(L, LUA_REGISTRYINDEX, tname); + lua_setmetatable(L, -2); +} + LUA_API int lua_setfenv(lua_State *L, int idx) { cTValue *o = index2adr(L, idx); @@ -1017,7 +1040,6 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n) /* -- Calls --------------------------------------------------------------- */ -#if LJ_FR2 static TValue *api_call_base(lua_State *L, int nargs) { TValue *o = L->top, *base = o - nargs; @@ -1026,13 +1048,10 @@ static TValue *api_call_base(lua_State *L, int nargs) setnilV(o); return o+1; } -#else -#define api_call_base(L, nargs) (L->top - (nargs)) -#endif LUA_API void lua_call(lua_State *L, int nargs, int nresults) { - api_check(L, L->status == 0 || L->status == LUA_ERRERR); + api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); api_checknelems(L, nargs+1); lj_vm_call(L, api_call_base(L, nargs), nresults+1); } @@ -1043,7 +1062,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) uint8_t oldh = hook_save(g); ptrdiff_t ef; int status; - api_check(L, L->status == 0 || L->status == LUA_ERRERR); + api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); api_checknelems(L, nargs+1); if (errfunc == 0) { ef = 0; @@ -1075,7 +1094,7 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) global_State *g = G(L); uint8_t oldh = hook_save(g); int status; - api_check(L, L->status == 0 || L->status == LUA_ERRERR); + api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); status = lj_vm_cpcall(L, func, ud, cpcall); if (status) hook_restore(g, oldh); return status; @@ -1096,6 +1115,11 @@ LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) /* -- Coroutine yield and resume ------------------------------------------ */ +LUA_API int lua_isyieldable(lua_State *L) +{ + return cframe_canyield(L->cframe); +} + LUA_API int lua_yield(lua_State *L, int nresults) { void *cf = L->cframe; @@ -1123,13 +1147,7 @@ LUA_API int lua_yield(lua_State *L, int nresults) setframe_gc(top, obj2gco(L), LJ_TTHREAD); setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT); L->top = L->base = top+1; -#if LJ_TARGET_X64 lj_err_throw(L, LUA_YIELD); -#else - L->cframe = NULL; - L->status = LUA_YIELD; - lj_vm_unwind_c(cf, LUA_YIELD); -#endif } } lj_err_msg(L, LJ_ERR_CYIELD); @@ -1140,7 +1158,7 @@ LUA_API int lua_resume(lua_State *L, int nargs) { if (L->cframe == NULL && L->status <= LUA_YIELD) return lj_vm_resume(L, - L->status == 0 ? api_call_base(L, nargs) : L->top - nargs, + L->status == LUA_OK ? api_call_base(L, nargs) : L->top - nargs, 0, 0); L->top = L->base; setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP)); diff --git a/src/lj_arch.h b/src/lj_arch.h index 9bf6f481b4..c2d3855056 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -39,66 +39,11 @@ #define LUAJIT_OS_POSIX 5 /* Select native target if no target defined. */ -#ifndef LUAJIT_TARGET - -#if defined(__i386) || defined(__i386__) || defined(_M_IX86) -#define LUAJIT_TARGET LUAJIT_ARCH_X86 -#elif defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -#define LUAJIT_TARGET LUAJIT_ARCH_X64 -#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) -#define LUAJIT_TARGET LUAJIT_ARCH_ARM -#elif defined(__aarch64__) -#define LUAJIT_TARGET LUAJIT_ARCH_ARM64 -#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) -#define LUAJIT_TARGET LUAJIT_ARCH_PPC -#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64) -#define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 -#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) -#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 -#else -#error "No support for this architecture (yet)" -#endif - -#endif /* Select native OS if no target OS defined. */ -#ifndef LUAJIT_OS - -#if defined(_WIN32) && !defined(_XBOX_VER) -#define LUAJIT_OS LUAJIT_OS_WINDOWS -#elif defined(__linux__) -#define LUAJIT_OS LUAJIT_OS_LINUX -#elif defined(__MACH__) && defined(__APPLE__) -#define LUAJIT_OS LUAJIT_OS_OSX -#elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ - defined(__NetBSD__) || defined(__OpenBSD__) || \ - defined(__DragonFly__)) && !defined(__ORBIS__) -#define LUAJIT_OS LUAJIT_OS_BSD -#elif (defined(__sun__) && defined(__svr4__)) -#define LUAJIT_OS LUAJIT_OS_POSIX -#elif defined(__CYGWIN__) -#define LJ_TARGET_CYGWIN 1 -#define LUAJIT_OS LUAJIT_OS_POSIX -#else -#define LUAJIT_OS LUAJIT_OS_OTHER -#endif - -#endif /* Set target OS properties. */ -#if LUAJIT_OS == LUAJIT_OS_WINDOWS -#define LJ_OS_NAME "Windows" -#elif LUAJIT_OS == LUAJIT_OS_LINUX #define LJ_OS_NAME "Linux" -#elif LUAJIT_OS == LUAJIT_OS_OSX -#define LJ_OS_NAME "OSX" -#elif LUAJIT_OS == LUAJIT_OS_BSD -#define LJ_OS_NAME "BSD" -#elif LUAJIT_OS == LUAJIT_OS_POSIX -#define LJ_OS_NAME "POSIX" -#else -#define LJ_OS_NAME "Other" -#endif #define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS) #define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) @@ -107,68 +52,17 @@ #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX -#ifdef __CELLOS_LV2__ -#define LJ_TARGET_PS3 1 -#define LJ_TARGET_CONSOLE 1 -#endif - -#ifdef __ORBIS__ -#define LJ_TARGET_PS4 1 -#define LJ_TARGET_CONSOLE 1 -#undef NULL -#define NULL ((void*)0) -#endif -#ifdef __psp2__ -#define LJ_TARGET_PSVITA 1 -#define LJ_TARGET_CONSOLE 1 -#endif -#if _XBOX_VER >= 200 -#define LJ_TARGET_XBOX360 1 -#define LJ_TARGET_CONSOLE 1 -#endif -#ifdef _DURANGO -#define LJ_TARGET_XBOXONE 1 -#define LJ_TARGET_CONSOLE 1 -#define LJ_TARGET_GC64 1 -#endif -#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ -#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ -#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ -#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */ /* Set target architecture properties. */ -#if LUAJIT_TARGET == LUAJIT_ARCH_X86 - -#define LJ_ARCH_NAME "x86" -#define LJ_ARCH_BITS 32 -#define LJ_ARCH_ENDIAN LUAJIT_LE -#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN -#define LJ_ABI_WIN 1 -#else -#define LJ_ABI_WIN 0 -#endif -#define LJ_TARGET_X86 1 -#define LJ_TARGET_X86ORX64 1 -#define LJ_TARGET_EHRETREG 0 -#define LJ_TARGET_MASKSHIFT 1 -#define LJ_TARGET_MASKROT 1 -#define LJ_TARGET_UNALIGNED 1 -#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL - -#elif LUAJIT_TARGET == LUAJIT_ARCH_X64 #define LJ_ARCH_NAME "x64" #define LJ_ARCH_BITS 64 #define LJ_ARCH_ENDIAN LUAJIT_LE -#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN -#define LJ_ABI_WIN 1 -#else #define LJ_ABI_WIN 0 -#endif #define LJ_TARGET_X64 1 #define LJ_TARGET_X86ORX64 1 #define LJ_TARGET_EHRETREG 0 @@ -176,186 +70,8 @@ #define LJ_TARGET_MASKSHIFT 1 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNALIGNED 1 -#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL -#ifdef LUAJIT_ENABLE_GC64 -#define LJ_TARGET_GC64 1 -#endif - -#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM - -#define LJ_ARCH_NAME "arm" -#define LJ_ARCH_BITS 32 -#define LJ_ARCH_ENDIAN LUAJIT_LE -#if !defined(LJ_ARCH_HASFPU) && __SOFTFP__ -#define LJ_ARCH_HASFPU 0 -#endif -#if !defined(LJ_ABI_SOFTFP) && !__ARM_PCS_VFP -#define LJ_ABI_SOFTFP 1 -#endif -#define LJ_ABI_EABI 1 -#define LJ_TARGET_ARM 1 -#define LJ_TARGET_EHRETREG 0 -#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ -#define LJ_TARGET_MASKSHIFT 0 -#define LJ_TARGET_MASKROT 1 -#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ -#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL - -#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__ -#define LJ_ARCH_VERSION 80 -#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ -#define LJ_ARCH_VERSION 70 -#elif __ARM_ARCH_6T2__ -#define LJ_ARCH_VERSION 61 -#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ -#define LJ_ARCH_VERSION 60 -#else -#define LJ_ARCH_VERSION 50 -#endif - -#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 - -#define LJ_ARCH_NAME "arm64" -#define LJ_ARCH_BITS 64 -#define LJ_ARCH_ENDIAN LUAJIT_LE -#define LJ_TARGET_ARM64 1 -#define LJ_TARGET_EHRETREG 0 -#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ -#define LJ_TARGET_MASKSHIFT 1 -#define LJ_TARGET_MASKROT 1 -#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ -#define LJ_TARGET_GC64 1 -#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL - -#define LJ_ARCH_VERSION 80 - -#elif LUAJIT_TARGET == LUAJIT_ARCH_PPC - -#ifndef LJ_ARCH_ENDIAN -#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ -#define LJ_ARCH_ENDIAN LUAJIT_LE -#else -#define LJ_ARCH_ENDIAN LUAJIT_BE -#endif -#endif - -#if _LP64 -#define LJ_ARCH_BITS 64 -#if LJ_ARCH_ENDIAN == LUAJIT_LE -#define LJ_ARCH_NAME "ppc64le" -#else -#define LJ_ARCH_NAME "ppc64" -#endif -#else -#define LJ_ARCH_BITS 32 -#define LJ_ARCH_NAME "ppc" -#endif - -#define LJ_TARGET_PPC 1 -#define LJ_TARGET_EHRETREG 3 -#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ -#define LJ_TARGET_MASKSHIFT 0 -#define LJ_TARGET_MASKROT 1 -#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ -#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE - -#if LJ_TARGET_CONSOLE -#define LJ_ARCH_PPC32ON64 1 -#define LJ_ARCH_NOFFI 1 -#elif LJ_ARCH_BITS == 64 -#define LJ_ARCH_PPC64 1 -#define LJ_TARGET_GC64 1 -#define LJ_ARCH_NOJIT 1 /* NYI */ -#endif - -#if _ARCH_PWR7 -#define LJ_ARCH_VERSION 70 -#elif _ARCH_PWR6 -#define LJ_ARCH_VERSION 60 -#elif _ARCH_PWR5X -#define LJ_ARCH_VERSION 51 -#elif _ARCH_PWR5 -#define LJ_ARCH_VERSION 50 -#elif _ARCH_PWR4 -#define LJ_ARCH_VERSION 40 -#else -#define LJ_ARCH_VERSION 0 -#endif -#if _ARCH_PPCSQ -#define LJ_ARCH_SQRT 1 -#endif -#if _ARCH_PWR5X -#define LJ_ARCH_ROUND 1 -#endif -#if __PPU__ -#define LJ_ARCH_CELL 1 -#endif -#if LJ_TARGET_XBOX360 -#define LJ_ARCH_XENON 1 -#endif - -#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64 - -#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) -#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 -#define LJ_ARCH_NAME "mipsel" -#else -#define LJ_ARCH_NAME "mips64el" -#endif -#define LJ_ARCH_ENDIAN LUAJIT_LE -#else -#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 -#define LJ_ARCH_NAME "mips" -#else -#define LJ_ARCH_NAME "mips64" -#endif -#define LJ_ARCH_ENDIAN LUAJIT_BE -#endif - -#if !defined(LJ_ARCH_HASFPU) -#ifdef __mips_soft_float -#define LJ_ARCH_HASFPU 0 -#else -#define LJ_ARCH_HASFPU 1 -#endif -#endif - -#if !defined(LJ_ABI_SOFTFP) -#ifdef __mips_soft_float -#define LJ_ABI_SOFTFP 1 -#else -#define LJ_ABI_SOFTFP 0 -#endif -#endif - -#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 -#define LJ_ARCH_BITS 32 -#define LJ_TARGET_MIPS32 1 -#else -#if LJ_ABI_SOFTFP || !LJ_ARCH_HASFPU -#define LJ_ARCH_NOJIT 1 /* NYI */ -#endif -#define LJ_ARCH_BITS 64 -#define LJ_TARGET_MIPS64 1 #define LJ_TARGET_GC64 1 -#endif -#define LJ_TARGET_MIPS 1 -#define LJ_TARGET_EHRETREG 4 -#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ -#define LJ_TARGET_MASKSHIFT 1 -#define LJ_TARGET_MASKROT 1 -#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ -#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL - -#if _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2 -#define LJ_ARCH_VERSION 20 -#else -#define LJ_ARCH_VERSION 10 -#endif -#else -#error "No target architecture defined" -#endif #ifndef LJ_PAGESIZE #define LJ_PAGESIZE 4096 @@ -363,116 +79,20 @@ /* Check for minimum required compiler versions. */ #if defined(__GNUC__) -#if LJ_TARGET_X86 -#if (__GNUC__ < 3) || ((__GNUC__ == 3) && __GNUC_MINOR__ < 4) -#error "Need at least GCC 3.4 or newer" -#endif -#elif LJ_TARGET_X64 #if __GNUC__ < 4 #error "Need at least GCC 4.0 or newer" #endif -#elif LJ_TARGET_ARM -#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) -#error "Need at least GCC 4.2 or newer" -#endif -#elif LJ_TARGET_ARM64 -#if __clang__ -#if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5) -#error "Need at least Clang 3.5 or newer" -#endif -#else -#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8) -#error "Need at least GCC 4.8 or newer" -#endif -#endif -#elif !LJ_TARGET_PS3 -#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3) -#error "Need at least GCC 4.3 or newer" -#endif -#endif #endif /* Check target-specific constraints. */ #ifndef _BUILDVM_H -#if LJ_TARGET_X64 #if __USING_SJLJ_EXCEPTIONS__ #error "Need a C compiler with native exception handling on x64" #endif -#elif LJ_TARGET_ARM -#if defined(__ARMEB__) -#error "No support for big-endian ARM" -#endif -#if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__ -#error "No support for Cortex-M CPUs" -#endif -#if !(__ARM_EABI__ || LJ_TARGET_IOS) -#error "Only ARM EABI or iOS 3.0+ ABI is supported" -#endif -#elif LJ_TARGET_ARM64 -#if defined(__AARCH64EB__) -#error "No support for big-endian ARM64" -#endif -#if defined(_ILP32) -#error "No support for ILP32 model on ARM64" -#endif -#elif LJ_TARGET_PPC -#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) -#error "No support for PowerPC CPUs without double-precision FPU" -#endif -#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE -#error "No support for little-endian PPC32" -#endif -#if LJ_ARCH_PPC64 -#error "No support for PowerPC 64 bit mode (yet)" -#endif -#ifdef __NO_FPRS__ -#error "No support for PPC/e500 anymore (use LuaJIT 2.0)" -#endif -#elif LJ_TARGET_MIPS32 -#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) -#error "Only o32 ABI supported for MIPS32" -#endif -#elif LJ_TARGET_MIPS64 -#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) -#error "Only n64 ABI supported for MIPS64" -#endif -#endif -#endif - -/* Enable or disable the dual-number mode for the VM. */ -#if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \ - (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1) -#error "No support for this number mode on this architecture" -#endif -#if LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL || \ - (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL_SINGLE && LUAJIT_NUMMODE != 1) || \ - (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE_DUAL && LUAJIT_NUMMODE == 2) -#define LJ_DUALNUM 1 -#else -#define LJ_DUALNUM 0 -#endif - -#if LJ_TARGET_IOS || LJ_TARGET_CONSOLE -/* Runtime code generation is restricted on iOS. Complain to Apple, not me. */ -/* Ditto for the consoles. Complain to Sony or MS, not me. */ -#ifndef LUAJIT_ENABLE_JIT -#define LJ_OS_NOJIT 1 -#endif -#endif - -/* 64 bit GC references. */ -#if LJ_TARGET_GC64 -#define LJ_GC64 1 -#else -#define LJ_GC64 0 #endif /* 2-slot frame info. */ -#if LJ_GC64 #define LJ_FR2 1 -#else -#define LJ_FR2 0 -#endif /* Disable or enable the JIT compiler. */ #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) @@ -481,80 +101,24 @@ #define LJ_HASJIT 1 #endif -/* Disable or enable the FFI extension. */ -#if defined(LUAJIT_DISABLE_FFI) || defined(LJ_ARCH_NOFFI) -#define LJ_HASFFI 0 -#else -#define LJ_HASFFI 1 -#endif - -#if defined(LUAJIT_DISABLE_PROFILE) -#define LJ_HASPROFILE 0 -#elif LJ_TARGET_POSIX -#define LJ_HASPROFILE 1 -#define LJ_PROFILE_SIGPROF 1 -#elif LJ_TARGET_PS3 -#define LJ_HASPROFILE 1 -#define LJ_PROFILE_PTHREAD 1 -#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360 -#define LJ_HASPROFILE 1 -#define LJ_PROFILE_WTHREAD 1 -#else -#define LJ_HASPROFILE 0 -#endif - #ifndef LJ_ARCH_HASFPU #define LJ_ARCH_HASFPU 1 #endif -#ifndef LJ_ABI_SOFTFP #define LJ_ABI_SOFTFP 0 -#endif #define LJ_SOFTFP (!LJ_ARCH_HASFPU) #if LJ_ARCH_ENDIAN == LUAJIT_BE -#define LJ_LE 0 -#define LJ_BE 1 #define LJ_ENDIAN_SELECT(le, be) be #define LJ_ENDIAN_LOHI(lo, hi) hi lo #else -#define LJ_LE 1 -#define LJ_BE 0 #define LJ_ENDIAN_SELECT(le, be) le #define LJ_ENDIAN_LOHI(lo, hi) lo hi #endif -#if LJ_ARCH_BITS == 32 -#define LJ_32 1 -#define LJ_64 0 -#else -#define LJ_32 0 -#define LJ_64 1 -#endif - #ifndef LJ_TARGET_UNALIGNED #define LJ_TARGET_UNALIGNED 0 #endif -/* Various workarounds for embedded operating systems or weak C runtimes. */ -#if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS -#define LUAJIT_NO_LOG2 -#endif -#if defined(__symbian__) || LJ_TARGET_WINDOWS -#define LUAJIT_NO_EXP2 -#endif -#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) -#define LJ_NO_SYSTEM 1 -#endif - -#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__ -/* NYI: no support for compact unwind specification, yet. */ -#define LUAJIT_NO_UNWIND 1 -#endif - -#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 -#define LJ_NO_UNWIND 1 -#endif - /* Compatibility with Lua 5.1 vs. 5.2. */ #ifdef LUAJIT_ENABLE_LUA52COMPAT #define LJ_52 1 diff --git a/src/lj_asm.c b/src/lj_asm.c index 7c09dd9f50..281acee57b 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -8,15 +8,12 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_gc.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_frame.h" -#if LJ_HASFFI #include "lj_ctype.h" -#endif #include "lj_ir.h" #include "lj_jit.h" #include "lj_ircall.h" @@ -49,9 +46,7 @@ typedef struct ASMState { IRIns *ir; /* Copy of pointer to IR instructions/constants. */ jit_State *J; /* JIT compiler state. */ -#if LJ_TARGET_X86ORX64 x86ModRM mrm; /* Fused x86 address operand. */ -#endif RegSet freeset; /* Set of free registers. */ RegSet modset; /* Set of registers modified inside the loop. */ @@ -167,19 +162,7 @@ IRFLDEF(FLOFS) /* -- Target-specific instruction emitter --------------------------------- */ -#if LJ_TARGET_X86ORX64 #include "lj_emit_x86.h" -#elif LJ_TARGET_ARM -#include "lj_emit_arm.h" -#elif LJ_TARGET_ARM64 -#include "lj_emit_arm64.h" -#elif LJ_TARGET_PPC -#include "lj_emit_ppc.h" -#elif LJ_TARGET_MIPS -#include "lj_emit_mips.h" -#else -#error "Missing instruction emitter for target CPU" -#endif /* Generic load/store of register from/to stack slot. */ #define emit_spload(as, ir, r, ofs) \ @@ -324,11 +307,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref) lua_assert(!rset_test(as->freeset, r)); ra_free(as, r); ra_modified(as, r); -#if LJ_64 emit_loadu64(as, r, ra_krefk(as, ref)); -#else - emit_loadi(as, r, ra_krefk(as, ref)); -#endif return r; } ir = IR(ref); @@ -338,27 +317,21 @@ static Reg ra_rematk(ASMState *as, IRRef ref) ra_modified(as, r); ir->r = RID_INIT; /* Do not keep any hint. */ RA_DBGX((as, "remat $i $r", ir, r)); -#if !LJ_SOFTFP if (ir->o == IR_KNUM) { emit_loadk64(as, r, ir); } else -#endif if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ emit_getgl(as, r, jit_base); } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ emit_getgl(as, r, cur_L); -#if LJ_64 } else if (ir->o == IR_KINT64) { emit_loadu64(as, r, ir_kint64(ir)->u64); -#if LJ_GC64 } else if (ir->o == IR_KGC) { emit_loadu64(as, r, (uintptr_t)ir_kgc(ir)); } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { emit_loadu64(as, r, (uintptr_t)ir_kptr(ir)); -#endif -#endif } else { lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); @@ -482,7 +455,6 @@ static void ra_evictset(ASMState *as, RegSet drop) { RegSet work; as->modset |= drop; -#if !LJ_SOFTFP work = (drop & ~as->freeset) & RSET_FPR; while (work) { Reg r = rset_pickbot(work); @@ -490,7 +462,6 @@ static void ra_evictset(ASMState *as, RegSet drop) rset_clear(work, r); checkmclim(as); } -#endif work = (drop & ~as->freeset); while (work) { Reg r = rset_pickbot(work); @@ -504,7 +475,6 @@ static void ra_evictset(ASMState *as, RegSet drop) static void ra_evictk(ASMState *as) { RegSet work; -#if !LJ_SOFTFP work = ~as->freeset & RSET_FPR; while (work) { Reg r = rset_pickbot(work); @@ -515,7 +485,6 @@ static void ra_evictk(ASMState *as) } rset_clear(work, r); } -#endif work = ~as->freeset & RSET_GPR; while (work) { Reg r = rset_pickbot(work); @@ -539,7 +508,6 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) IRRef ref; r = rset_pickbot(work); ref = regcost_ref(as->cost[r]); -#if LJ_64 if (ref < ASMREF_L) { if (ra_iskref(ref)) { if (k == ra_krefk(as, ref)) @@ -547,23 +515,14 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) } else { IRIns *ir = IR(ref); if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || -#if LJ_GC64 (ir->o == IR_KINT && k == ir->i) || (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && k == (intptr_t)ir_kptr(ir)) -#else - (ir->o != IR_KINT64 && k == ir->i) -#endif ) return r; } } -#else - if (ref < ASMREF_L && - k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) - return r; -#endif rset_clear(work, r); } pick = as->freeset & allow; @@ -718,7 +677,6 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r) } } -#if LJ_TARGET_X86ORX64 /* Propagate dest register to left reference. Emit moves as needed. ** This is a required fixup step for all 2-operand machine instructions. */ @@ -734,16 +692,12 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) emit_loadk64(as, dest, ir); return; } -#if LJ_64 } else if (ir->o == IR_KINT64) { emit_loadk64(as, dest, ir); return; -#if LJ_GC64 } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) { emit_loadk64(as, dest, ir); return; -#endif -#endif } else if (ir->o != IR_KPRI) { lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); @@ -767,81 +721,7 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) } } } -#else -/* Similar to ra_left, except we override any hints. */ -static void ra_leftov(ASMState *as, Reg dest, IRRef lref) -{ - IRIns *ir = IR(lref); - Reg left = ir->r; - if (ra_noreg(left)) { - ra_sethint(ir->r, dest); /* Propagate register hint. */ - left = ra_allocref(as, lref, - (LJ_SOFTFP || dest < RID_MAX_GPR) ? RSET_GPR : RSET_FPR); - } - ra_noweak(as, left); - if (dest != left) { - /* Use register renaming if dest is the PHI reg. */ - if (irt_isphi(ir->t) && as->phireg[dest] == lref) { - ra_modified(as, left); - ra_rename(as, left, dest); - } else { - emit_movrr(as, ir, dest, left); - } - } -} -#endif -#if !LJ_64 -/* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */ -static void ra_destpair(ASMState *as, IRIns *ir) -{ - Reg destlo = ir->r, desthi = (ir+1)->r; - /* First spill unrelated refs blocking the destination registers. */ - if (!rset_test(as->freeset, RID_RETLO) && - destlo != RID_RETLO && desthi != RID_RETLO) - ra_restore(as, regcost_ref(as->cost[RID_RETLO])); - if (!rset_test(as->freeset, RID_RETHI) && - destlo != RID_RETHI && desthi != RID_RETHI) - ra_restore(as, regcost_ref(as->cost[RID_RETHI])); - /* Next free the destination registers (if any). */ - if (ra_hasreg(destlo)) { - ra_free(as, destlo); - ra_modified(as, destlo); - } else { - destlo = RID_RETLO; - } - if (ra_hasreg(desthi)) { - ra_free(as, desthi); - ra_modified(as, desthi); - } else { - desthi = RID_RETHI; - } - /* Check for conflicts and shuffle the registers as needed. */ - if (destlo == RID_RETHI) { - if (desthi == RID_RETLO) { -#if LJ_TARGET_X86 - *--as->mcp = XI_XCHGa + RID_RETHI; -#else - emit_movrr(as, ir, RID_RETHI, RID_TMP); - emit_movrr(as, ir, RID_RETLO, RID_RETHI); - emit_movrr(as, ir, RID_TMP, RID_RETLO); -#endif - } else { - emit_movrr(as, ir, RID_RETHI, RID_RETLO); - if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); - } - } else if (desthi == RID_RETLO) { - emit_movrr(as, ir, RID_RETLO, RID_RETHI); - if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); - } else { - if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); - if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); - } - /* Restore spill slots (if any). */ - if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI); - if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO); -} -#endif /* -- Snapshot handling --------- ----------------------------------------- */ @@ -879,13 +759,9 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) if (!irref_isk(ref) && (!(ra_used(ir) || ir->r == RID_SUNK))) { if (ir->r == RID_SINK) { ir->r = RID_SUNK; -#if LJ_HASFFI if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */ asm_snap_alloc1(as, ir->op2); - if (LJ_32 && (ir+1)->o == IR_HIOP) - asm_snap_alloc1(as, (ir+1)->op2); } else -#endif { /* Allocate stored values for TNEW, TDUP and CNEW. */ IRIns *irs; lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); @@ -894,8 +770,6 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || irs->o == IR_FSTORE || irs->o == IR_XSTORE); asm_snap_alloc1(as, irs->op2); - if (LJ_32 && (irs+1)->o == IR_HIOP) - asm_snap_alloc1(as, (irs+1)->op2); } } } else { @@ -1017,7 +891,7 @@ static uint32_t ir_khash(IRIns *ir) } else { lua_assert(irt_isgcv(ir->t)); lo = u32ptr(ir_kgc(ir)); - hi = lo + HASH_BIAS; + hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15); } return hashrot(lo, hi); } @@ -1070,7 +944,7 @@ static void asm_gcstep(ASMState *as, IRIns *ir) IRIns *ira; for (ira = IR(as->stopins+1); ira < ir; ira++) if ((ira->o == IR_TNEW || ira->o == IR_TDUP || - (LJ_HASFFI && (ira->o == IR_CNEW || ira->o == IR_CNEWI))) && + (ira->o == IR_CNEW || ira->o == IR_CNEWI)) && ra_used(ira)) as->gcsteps++; if (as->gcsteps) @@ -1103,11 +977,7 @@ static void asm_bufhdr(ASMState *as, IRIns *ir) emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); } -#if LJ_TARGET_X86ORX64 ra_left(as, sb, ir->op1); -#else - ra_leftov(as, sb, ir->op1); -#endif } static void asm_bufput(ASMState *as, IRIns *ir) @@ -1115,7 +985,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; IRRef args[3]; IRIns *irs; - int kchar = -1; + int kchar = -129; args[0] = ir->op1; /* SBuf * */ args[1] = ir->op2; /* GCstr * */ irs = IR(ir->op2); @@ -1123,7 +993,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) if (irs->o == IR_KGC) { GCstr *s = ir_kstr(irs); if (s->len == 1) { /* Optimize put of single-char string constant. */ - kchar = strdata(s)[0]; + kchar = (int8_t)strdata(s)[0]; /* Signed! */ args[1] = ASMREF_TMP1; /* int, truncated to char */ ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; } @@ -1150,7 +1020,7 @@ static void asm_bufput(ASMState *as, IRIns *ir) asm_gencall(as, ci, args); if (args[1] == ASMREF_TMP1) { Reg tmp = ra_releasetmp(as, ASMREF_TMP1); - if (kchar == -1) + if (kchar == -129) asm_tvptr(as, tmp, irs->op1); else ra_allockreg(as, kchar, tmp); @@ -1191,34 +1061,6 @@ static void asm_tostr(ASMState *as, IRIns *ir) asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); } -#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86 -static void asm_conv64(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); - IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); - IRCallID id; - IRRef args[2]; - lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP); - args[LJ_BE] = (ir-1)->op1; - args[LJ_LE] = ir->op1; - if (st == IRT_NUM || st == IRT_FLOAT) { - id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); - ir--; - } else { - id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); - } - { -#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP - CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; - cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ -#else - const CCallInfo *ci = &lj_ir_callinfo[id]; -#endif - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); - } -} -#endif /* -- Memory references --------------------------------------------------- */ @@ -1239,11 +1081,7 @@ static void asm_newref(ASMState *as, IRIns *ir) static void asm_lref(ASMState *as, IRIns *ir) { Reg r = ra_dest(as, ir, RSET_GPR); -#if LJ_TARGET_X86ORX64 ra_left(as, r, ASMREF_L); -#else - ra_leftov(as, r, ASMREF_L); -#endif } /* -- Calls --------------------------------------------------------------- */ @@ -1273,16 +1111,11 @@ static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) nargs++; while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } } -#if LJ_HASFFI if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); -#if LJ_TARGET_X86 - nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); -#endif } -#endif return (nargs | (ir->t.irt << CCI_OTSHIFT)); } @@ -1305,7 +1138,6 @@ static void asm_call(ASMState *as, IRIns *ir) asm_gencall(as, ci, args); } -#if !LJ_SOFTFP static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; @@ -1329,7 +1161,6 @@ static int asm_fpjoin_pow(ASMState *as, IRIns *ir) } return 0; } -#endif /* -- PHI and loop handling ----------------------------------------------- */ @@ -1411,7 +1242,6 @@ static void asm_phi_shuffle(ASMState *as) } /* Restore/remat invariants whose registers are modified inside the loop. */ -#if !LJ_SOFTFP work = as->modset & ~(as->freeset | as->phiset) & RSET_FPR; while (work) { Reg r = rset_pickbot(work); @@ -1419,7 +1249,6 @@ static void asm_phi_shuffle(ASMState *as) rset_clear(work, r); checkmclim(as); } -#endif work = as->modset & ~(as->freeset | as->phiset); while (work) { Reg r = rset_pickbot(work); @@ -1453,15 +1282,11 @@ static void asm_phi_copyspill(ASMState *as) if (ra_hasspill(ir->s) && ra_hasspill(IR(ir->op1)->s)) need |= irt_isfp(ir->t) ? 2 : 1; /* Unsynced spill slot? */ if ((need & 1)) { /* Copy integer spill slots. */ -#if !LJ_TARGET_X86ORX64 - Reg r = RID_TMP; -#else Reg r = RID_RET; if ((as->freeset & RSET_GPR)) r = rset_pickbot((as->freeset & RSET_GPR)); else emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); -#endif for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) { if (ra_hasspill(ir->s)) { IRIns *irl = IR(ir->op1); @@ -1472,18 +1297,11 @@ static void asm_phi_copyspill(ASMState *as) } } } -#if LJ_TARGET_X86ORX64 if (!rset_test(as->freeset, r)) emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); -#endif } -#if !LJ_SOFTFP if ((need & 2)) { /* Copy FP spill slots. */ -#if LJ_TARGET_X86 - Reg r = RID_XMM0; -#else Reg r = RID_FPRET; -#endif if ((as->freeset & RSET_FPR)) r = rset_pickbot((as->freeset & RSET_FPR)); if (!rset_test(as->freeset, r)) @@ -1501,7 +1319,6 @@ static void asm_phi_copyspill(ASMState *as) if (!rset_test(as->freeset, r)) emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); } -#endif } /* Emit renames for left PHIs which are only spilled outside the loop. */ @@ -1587,19 +1404,7 @@ static void asm_loop(ASMState *as) /* -- Target-specific assembler ------------------------------------------- */ -#if LJ_TARGET_X86ORX64 #include "lj_asm_x86.h" -#elif LJ_TARGET_ARM -#include "lj_asm_arm.h" -#elif LJ_TARGET_ARM64 -#include "lj_asm_arm64.h" -#elif LJ_TARGET_PPC -#include "lj_asm_ppc.h" -#elif LJ_TARGET_MIPS -#include "lj_asm_mips.h" -#else -#error "Missing assembler for target CPU" -#endif /* -- Instruction dispatch ------------------------------------------------ */ @@ -1615,7 +1420,6 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_PHI: asm_phi(as, ir); break; case IR_HIOP: asm_hiop(as, ir); break; case IR_GCSTEP: asm_gcstep(as, ir); break; - case IR_PROF: asm_prof(as, ir); break; /* Guarded assertions. */ case IR_LT: case IR_GE: case IR_LE: case IR_GT: @@ -1652,12 +1456,6 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_MUL: asm_mul(as, ir); break; case IR_MOD: asm_mod(as, ir); break; case IR_NEG: asm_neg(as, ir); break; -#if LJ_SOFTFP - case IR_DIV: case IR_POW: case IR_ABS: - case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: - lua_assert(0); /* Unused for LJ_SOFTFP. */ - break; -#else case IR_DIV: asm_div(as, ir); break; case IR_POW: asm_pow(as, ir); break; case IR_ABS: asm_abs(as, ir); break; @@ -1665,7 +1463,6 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_LDEXP: asm_ldexp(as, ir); break; case IR_FPMATH: asm_fpmath(as, ir); break; case IR_TOBIT: asm_tobit(as, ir); break; -#endif case IR_MIN: asm_min(as, ir); break; case IR_MAX: asm_max(as, ir); break; @@ -1844,11 +1641,6 @@ static void asm_head_side(ASMState *as) emit_setvmstate(as, (int32_t)as->T->traceno); emit_spsub(as, spdelta); -#if !LJ_TARGET_X86ORX64 - /* Restore BASE register from parent spill slot. */ - if (ra_hasspill(irp->s)) - emit_spload(as, IR(REF_BASE), IR(REF_BASE)->r, sps_scale(irp->s)); -#endif /* Restore target registers from parent spill slots. */ if (pass3) { @@ -1956,12 +1748,7 @@ static void asm_tail_link(ASMState *as) if (bc_isret(bc_op(*retpc))) pc = retpc; } -#if LJ_GC64 emit_loadu64(as, RID_LPC, u64ptr(pc)); -#else - ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); - ra_allockreg(as, i32ptr(pc), RID_LPC); -#endif mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: @@ -1978,7 +1765,7 @@ static void asm_tail_link(ASMState *as) emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */ - setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal)); + setgcref(IR(as->J->ktrace)[1].gcr, obj2gco(as->J->curfinal)); IR(as->J->ktrace)->o = IR_KGC; } @@ -2000,9 +1787,6 @@ static void asm_setup_regsp(ASMState *as) IRRef nins = T->nins; IRIns *ir, *lastir; int inloop; -#if LJ_TARGET_ARM - uint32_t rload = 0xa6402a64; -#endif ra_setup(as); @@ -2010,12 +1794,8 @@ static void asm_setup_regsp(ASMState *as) for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { ir->prev = REGSP_INIT; if (irt_is64(ir->t) && ir->o != IR_KNULL) { -#if LJ_GC64 + /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ -#else - /* Make life easier for backends by putting address of constant in i. */ - ir->i = (int32_t)(intptr_t)(ir+1); -#endif ir++; } } @@ -2064,17 +1844,6 @@ static void asm_setup_regsp(ASMState *as) case IR_LOOP: inloop = 1; break; -#if LJ_TARGET_ARM - case IR_SLOAD: - if (!((ir->op2 & IRSLOAD_TYPECHECK) || (ir+1)->o == IR_HIOP)) - break; - /* fallthrough */ - case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - if (!LJ_SOFTFP && irt_isnum(ir->t)) break; - ir->prev = (uint16_t)REGSP_HINT((rload & 15)); - rload = lj_ror(rload, 4); - continue; -#endif case IR_CALLXS: { CCallInfo ci; ci.flags = asm_callx_flags(as, ir); @@ -2091,55 +1860,11 @@ static void asm_setup_regsp(ASMState *as) (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; continue; } -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) - case IR_HIOP: - switch ((ir-1)->o) { -#if LJ_SOFTFP && LJ_TARGET_ARM - case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - if (ra_hashint((ir-1)->r)) { - ir->prev = (ir-1)->prev + 1; - continue; - } - break; -#endif -#if !LJ_SOFTFP && LJ_NEED_FP64 - case IR_CONV: - if (irt_isfp((ir-1)->t)) { - ir->prev = REGSP_HINT(RID_FPRET); - continue; - } - /* fallthrough */ -#endif - case IR_CALLN: case IR_CALLXS: -#if LJ_SOFTFP - case IR_MIN: case IR_MAX: -#endif - (ir-1)->prev = REGSP_HINT(RID_RETLO); - ir->prev = REGSP_HINT(RID_RETHI); - continue; - default: - break; - } - break; -#endif -#if LJ_SOFTFP - case IR_MIN: case IR_MAX: - if ((ir+1)->o != IR_HIOP) break; - /* fallthrough */ -#endif /* C calls evict all scratch regs and return results in RID_RET. */ case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: if (REGARG_NUMGPR < 3 && as->evenspill < 3) as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ -#if LJ_TARGET_X86 && LJ_HASFFI - if (0) { - case IR_CNEW: - if (ir->op2 != REF_NIL && as->evenspill < 4) - as->evenspill = 4; /* lj_cdata_newv needs 4 args. */ - } -#else case IR_CNEW: -#endif case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR: case IR_BUFSTR: ir->prev = REGSP_HINT(RID_RET); @@ -2150,26 +1875,13 @@ static void asm_setup_regsp(ASMState *as) if (inloop) as->modset = RSET_SCRATCH; break; -#if !LJ_SOFTFP case IR_ATAN2: -#if LJ_TARGET_X86 - if (as->evenspill < 4) /* Leave room to call atan2(). */ - as->evenspill = 4; -#endif -#if !LJ_TARGET_X86ORX64 - case IR_LDEXP: -#endif -#endif case IR_POW: if (!LJ_SOFTFP && irt_isnum(ir->t)) { if (inloop) as->modset |= RSET_SCRATCH; -#if LJ_TARGET_X86 - break; -#else ir->prev = REGSP_HINT(RID_FPRET); continue; -#endif } /* fallthrough for integer POW */ case IR_DIV: case IR_MOD: @@ -2181,7 +1893,6 @@ static void asm_setup_regsp(ASMState *as) } break; case IR_FPMATH: -#if LJ_TARGET_X86ORX64 if (ir->op2 <= IRFPM_TRUNC) { if (!(as->flags & JIT_F_SSE4_1)) { ir->prev = REGSP_HINT(RID_XMM0); @@ -2190,20 +1901,11 @@ static void asm_setup_regsp(ASMState *as) continue; } break; - } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) { - if (as->evenspill < 4) /* Leave room to call pow(). */ - as->evenspill = 4; } -#endif if (inloop) as->modset |= RSET_SCRATCH; -#if LJ_TARGET_X86 - break; -#else ir->prev = REGSP_HINT(RID_FPRET); continue; -#endif -#if LJ_TARGET_X86ORX64 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ case IR_BSHL: case IR_BSHR: case IR_BSAR: if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */ @@ -2215,13 +1917,10 @@ static void asm_setup_regsp(ASMState *as) rset_set(as->modset, RID_ECX); } break; -#endif /* Do not propagate hints across type conversions or loads. */ case IR_TOBIT: case IR_XLOAD: -#if !LJ_TARGET_ARM case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: -#endif break; case IR_CONV: if (irt_isfp(ir->t) || (ir->op2 & IRCONV_SRCMASK) == IRT_NUM || @@ -2247,12 +1946,29 @@ static void asm_setup_regsp(ASMState *as) /* -- Assembler core ------------------------------------------------------ */ +/* Do we want the profiler to attribute VM time to this trace? + * + * Not if the root of this trace is a Lua function. We assume that the + * root cause of running the interpreter is a loop that failed to + * compile somewhere and that entry/exit through function traces is + * only noise that should be filtered out. + * + * This helps the profiler to point out the code that needs to be + * changed to reduce time spent in the interpreter. + */ +static int asm_should_profile_exit(jit_State *J, GCtrace *T) +{ + GCtrace *root = traceref(J, T->root ? T->root : T->traceno); + BCOp op = bc_op(root->startins); + return op != BC_FUNCF && op != BC_FUNCV; +} + /* Assemble a trace. */ void lj_asm_trace(jit_State *J, GCtrace *T) { ASMState as_; ASMState *as = &as_; - MCode *origtop; + MCode *origtop, *firstins; /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */ { @@ -2272,7 +1988,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) /* Setup initial state. Copy some fields to reduce indirections. */ as->J = J; as->T = T; - J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */ + J->curfinal = lj_trace_alloc(J->L, T); as->flags = J->flags; as->loopref = J->loopref; as->realign = NULL; @@ -2338,7 +2054,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) /* Assemble a trace in linear backwards order. */ for (as->curins--; as->curins > as->stopins; as->curins--) { IRIns *ir = IR(as->curins); - lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ + MCode *end = as->mcp; if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) continue; /* Dead-code elimination can be soooo easy. */ if (irt_isguard(ir->t)) @@ -2346,8 +2062,12 @@ void lj_asm_trace(jit_State *J, GCtrace *T) RA_DBG_REF(); checkmclim(as); asm_ir(as, ir); + lua_assert(as->curins-REF_BIAS < J->curfinal->nszirmcode); + J->curfinal->szirmcode[as->curins-REF_BIAS] = (uint16_t)(end - as->mcp); } + firstins = as->mcp; /* MCode assembled for IR instructions. */ + if (as->realign && J->curfinal->nins >= T->nins) continue; /* Retry in case only the MCode needs to be realigned. */ @@ -2372,6 +2092,8 @@ void lj_asm_trace(jit_State *J, GCtrace *T) memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins, (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */ T->nins = J->curfinal->nins; + /* Log size of trace head */ + J->curfinal->szirmcode[0] = (uint16_t)((intptr_t)firstins - (intptr_t)as->mcp); break; /* Done. */ } @@ -2379,6 +2101,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) lj_trace_free(J2G(J), J->curfinal); J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */ J->curfinal = lj_trace_alloc(J->L, T); + lua_assert(J->curfinal->nszirmcode); as->realign = NULL; } @@ -2391,11 +2114,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T) T->mcode = as->mcp; T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0; if (!as->loopref) - asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ + /* Note: this may change as->mctop! */ + asm_tail_fixup(as, T->link, asm_should_profile_exit(J, T)); T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); lj_mcode_sync(T->mcode, origtop); } #undef IR -#endif diff --git a/src/lj_asm.h b/src/lj_asm.h index 2819481b6d..ddaa6bf600 100644 --- a/src/lj_asm.h +++ b/src/lj_asm.h @@ -8,10 +8,8 @@ #include "lj_jit.h" -#if LJ_HASJIT LJ_FUNC void lj_asm_trace(jit_State *J, GCtrace *T); LJ_FUNC void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target); -#endif #endif diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h deleted file mode 100644 index 37bfa40f2f..0000000000 --- a/src/lj_asm_arm.h +++ /dev/null @@ -1,2210 +0,0 @@ -/* -** ARM IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Register allocator extensions --------------------------------------- */ - -/* Allocate a register with a hint. */ -static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!ra_hashint(r) && !iscrossref(as, ref)) - ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ - r = ra_allocref(as, ref, allow); - } - ra_noweak(as, r); - return r; -} - -/* Allocate a scratch register pair. */ -static Reg ra_scratchpair(ASMState *as, RegSet allow) -{ - RegSet pick1 = as->freeset & allow; - RegSet pick2 = pick1 & (pick1 >> 1) & RSET_GPREVEN; - Reg r; - if (pick2) { - r = rset_picktop(pick2); - } else { - RegSet pick = pick1 & (allow >> 1) & RSET_GPREVEN; - if (pick) { - r = rset_picktop(pick); - ra_restore(as, regcost_ref(as->cost[r+1])); - } else { - pick = pick1 & (allow << 1) & RSET_GPRODD; - if (pick) { - r = ra_restore(as, regcost_ref(as->cost[rset_picktop(pick)-1])); - } else { - r = ra_evict(as, allow & (allow >> 1) & RSET_GPREVEN); - ra_restore(as, regcost_ref(as->cost[r+1])); - } - } - } - lua_assert(rset_test(RSET_GPREVEN, r)); - ra_modified(as, r); - ra_modified(as, r+1); - RA_DBGX((as, "scratchpair $r $r", r, r+1)); - return r; -} - -#if !LJ_SOFTFP -/* Allocate two source registers for three-operand instructions. */ -static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - Reg left = irl->r, right = irr->r; - if (ra_hasreg(left)) { - ra_noweak(as, left); - if (ra_noreg(right)) - right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); - else - ra_noweak(as, right); - } else if (ra_hasreg(right)) { - ra_noweak(as, right); - left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); - } else if (ra_hashint(right)) { - right = ra_allocref(as, ir->op2, allow); - left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); - } else { - left = ra_allocref(as, ir->op1, allow); - right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); - } - return left | (right << 8); -} -#endif - -/* -- Guard handling ------------------------------------------------------ */ - -/* Generate an exit stub group at the bottom of the reserved MCode memory. */ -static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) -{ - MCode *mxp = as->mcbot; - int i; - if (mxp + 4*4+4*EXITSTUBS_PER_GROUP >= as->mctop) - asm_mclimit(as); - /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */ - *mxp++ = ARMI_STR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_LR)|ARMF_N(RID_SP); - *mxp = ARMI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)-2)&0x00ffffffu); - mxp++; - *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */ - *mxp++ = group*EXITSTUBS_PER_GROUP; - for (i = 0; i < EXITSTUBS_PER_GROUP; i++) - *mxp++ = ARMI_B|((-6-i)&0x00ffffffu); - lj_mcode_sync(as->mcbot, mxp); - lj_mcode_commitbot(as->J, mxp); - as->mcbot = mxp; - as->mclim = as->mcbot + MCLIM_REDZONE; - return mxp - EXITSTUBS_PER_GROUP; -} - -/* Setup all needed exit stubs. */ -static void asm_exitstub_setup(ASMState *as, ExitNo nexits) -{ - ExitNo i; - if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) - lj_trace_err(as->J, LJ_TRERR_SNAPOV); - for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++) - if (as->J->exitstubgroup[i] == NULL) - as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); -} - -/* Emit conditional branch to exit for guard. */ -static void asm_guardcc(ASMState *as, ARMCC cc) -{ - MCode *target = exitstub_addr(as->J, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = ARMI_BL | ((target-p-2) & 0x00ffffffu); - emit_branch(as, ARMF_CC(ARMI_B, cc^1), p+1); - return; - } - emit_branch(as, ARMF_CC(ARMI_BL, cc), target); -} - -/* -- Operand fusion ------------------------------------------------------ */ - -/* Limit linear search to this distance. Avoids O(n^2) behavior. */ -#define CONFLICT_SEARCH_LIM 31 - -/* Check if there's no conflicting instruction between curins and ref. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict) -{ - IRIns *ir = as->ir; - IRRef i = as->curins; - if (i > ref + CONFLICT_SEARCH_LIM) - return 0; /* Give up, ref is too far away. */ - while (--i > ref) - if (ir[i].o == conflict) - return 0; /* Conflict found. */ - return 1; /* Ok, no conflict. */ -} - -/* Fuse the array base of colocated arrays. */ -static int32_t asm_fuseabase(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) - return (int32_t)sizeof(GCtab); - return 0; -} - -/* Fuse array/hash/upvalue reference into register+offset operand. */ -static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, - int lim) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r)) { - if (ir->o == IR_AREF) { - if (mayfuse(as, ref)) { - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (ofs > -lim && ofs < lim) { - *ofsp = ofs; - return ra_alloc1(as, refa, allow); - } - } - } - } else if (ir->o == IR_HREFK) { - if (mayfuse(as, ref)) { - int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); - if (ofs < lim) { - *ofsp = ofs; - return ra_alloc1(as, ir->op1, allow); - } - } - } else if (ir->o == IR_UREFC) { - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); - *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ - return ra_allock(as, (ofs & ~255), allow); - } - } - } - *ofsp = 0; - return ra_alloc1(as, ref, allow); -} - -/* Fuse m operand into arithmetic/logic instructions. */ -static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow) -{ - IRIns *ir = IR(ref); - if (ra_hasreg(ir->r)) { - ra_noweak(as, ir->r); - return ARMF_M(ir->r); - } else if (irref_isk(ref)) { - uint32_t k = emit_isk12(ai, ir->i); - if (k) - return k; - } else if (mayfuse(as, ref)) { - if (ir->o >= IR_BSHL && ir->o <= IR_BROR) { - Reg m = ra_alloc1(as, ir->op1, allow); - ARMShift sh = ir->o == IR_BSHL ? ARMSH_LSL : - ir->o == IR_BSHR ? ARMSH_LSR : - ir->o == IR_BSAR ? ARMSH_ASR : ARMSH_ROR; - if (irref_isk(ir->op2)) { - return m | ARMF_SH(sh, (IR(ir->op2)->i & 31)); - } else { - Reg s = ra_alloc1(as, ir->op2, rset_exclude(allow, m)); - return m | ARMF_RSH(sh, s); - } - } else if (ir->o == IR_ADD && ir->op1 == ir->op2) { - Reg m = ra_alloc1(as, ir->op1, allow); - return m | ARMF_SH(ARMSH_LSL, 1); - } - } - return ra_allocref(as, ref, allow); -} - -/* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */ -static IRRef asm_fuselsl2(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL && - irref_isk(ir->op2) && IR(ir->op2)->i == 2) - return ir->op1; - return 0; /* No fusion. */ -} - -/* Fuse XLOAD/XSTORE reference into load/store operand. */ -static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, - RegSet allow, int32_t ofs) -{ - IRIns *ir = IR(ref); - Reg base; - if (ra_noreg(ir->r) && canfuse(as, ir)) { - int32_t lim = (!LJ_SOFTFP && (ai & 0x08000000)) ? 1024 : - (ai & 0x04000000) ? 4096 : 256; - if (ir->o == IR_ADD) { - int32_t ofs2; - if (irref_isk(ir->op2) && - (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim && - (!(!LJ_SOFTFP && (ai & 0x08000000)) || !(ofs2 & 3))) { - ofs = ofs2; - ref = ir->op1; - } else if (ofs == 0 && !(!LJ_SOFTFP && (ai & 0x08000000))) { - IRRef lref = ir->op1, rref = ir->op2; - Reg rn, rm; - if ((ai & 0x04000000)) { - IRRef sref = asm_fuselsl2(as, rref); - if (sref) { - rref = sref; - ai |= ARMF_SH(ARMSH_LSL, 2); - } else if ((sref = asm_fuselsl2(as, lref)) != 0) { - lref = rref; - rref = sref; - ai |= ARMF_SH(ARMSH_LSL, 2); - } - } - rn = ra_alloc1(as, lref, allow); - rm = ra_alloc1(as, rref, rset_exclude(allow, rn)); - if ((ai & 0x04000000)) ai |= ARMI_LS_R; - emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); - return; - } - } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { - lua_assert(ofs == 0); - ofs = (int32_t)sizeof(GCstr); - if (irref_isk(ir->op2)) { - ofs += IR(ir->op2)->i; - ref = ir->op1; - } else if (irref_isk(ir->op1)) { - ofs += IR(ir->op1)->i; - ref = ir->op2; - } else { - /* NYI: Fuse ADD with constant. */ - Reg rn = ra_alloc1(as, ir->op1, allow); - uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); - if ((ai & 0x04000000)) - emit_lso(as, ai, rd, rd, ofs); - else - emit_lsox(as, ai, rd, rd, ofs); - emit_dn(as, ARMI_ADD^m, rd, rn); - return; - } - if (ofs <= -lim || ofs >= lim) { - Reg rn = ra_alloc1(as, ref, allow); - Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); - if ((ai & 0x04000000)) ai |= ARMI_LS_R; - emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); - return; - } - } - } - base = ra_alloc1(as, ref, allow); -#if !LJ_SOFTFP - if ((ai & 0x08000000)) - emit_vlso(as, ai, rd, base, ofs); - else -#endif - if ((ai & 0x04000000)) - emit_lso(as, ai, rd, base, ofs); - else - emit_lsox(as, ai, rd, base, ofs); -} - -#if !LJ_SOFTFP -/* Fuse to multiply-add/sub instruction. */ -static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air) -{ - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irm; - if (lref != rref && - ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && - ra_noreg(irm->r)) || - (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && - (rref = lref, ai = air, ra_noreg(irm->r))))) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); - Reg right, left = ra_alloc2(as, irm, - rset_exclude(rset_exclude(RSET_FPR, dest), add)); - right = (left >> 8); left &= 255; - emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15)); - if (dest != add) emit_dm(as, ARMI_VMOV_D, (dest & 15), (add & 15)); - return 1; - } - return 0; -} -#endif - -/* -- Calls --------------------------------------------------------------- */ - -/* Generate a call to a C function. */ -static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) -{ - uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 0; -#if LJ_SOFTFP - Reg gpr = REGARG_FIRSTGPR; -#else - Reg gpr, fpr = REGARG_FIRSTFPR, fprodd = 0; -#endif - if ((void *)ci->func) - emit_call(as, (void *)ci->func); -#if !LJ_SOFTFP - for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) - as->cost[gpr] = REGCOST(~0u, ASMREF_L); - gpr = REGARG_FIRSTGPR; -#endif - for (n = 0; n < nargs; n++) { /* Setup args. */ - IRRef ref = args[n]; - IRIns *ir = IR(ref); -#if !LJ_SOFTFP - if (ref && irt_isfp(ir->t)) { - RegSet of = as->freeset; - Reg src; - if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) { - if (irt_isnum(ir->t)) { - if (fpr <= REGARG_LASTFPR) { - ra_leftov(as, fpr, ref); - fpr++; - continue; - } - } else if (fprodd) { /* Ick. */ - src = ra_alloc1(as, ref, RSET_FPR); - emit_dm(as, ARMI_VMOV_S, (fprodd & 15), (src & 15) | 0x00400000); - fprodd = 0; - continue; - } else if (fpr <= REGARG_LASTFPR) { - ra_leftov(as, fpr, ref); - fprodd = fpr++; - continue; - } - /* Workaround to protect argument GPRs from being used for remat. */ - as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); - src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */ - as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); - fprodd = 0; - goto stackfp; - } - /* Workaround to protect argument GPRs from being used for remat. */ - as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); - src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */ - as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); - if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ - if (irt_isnum(ir->t)) { - lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */ - emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); - gpr += 2; - } else { - emit_dn(as, ARMI_VMOV_R_S, gpr, (src & 15)); - gpr++; - } - } else { - stackfp: - if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; - emit_spstore(as, ir, src, ofs); - ofs += irt_isnum(ir->t) ? 8 : 4; - } - } else -#endif - { - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ - if (ref) ra_leftov(as, gpr, ref); - gpr++; - } else { - if (ref) { - Reg r = ra_alloc1(as, ref, RSET_GPR); - emit_spstore(as, ir, r, ofs); - } - ofs += 4; - } - } - } -} - -/* Setup result reg/sp for call. Evict scratch regs. */ -static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - RegSet drop = RSET_SCRATCH; - int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - if (hiop && ra_hasreg((ir+1)->r)) - rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ - ra_evictset(as, drop); /* Evictions must be performed first. */ - if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (!LJ_SOFTFP && irt_isfp(ir->t)) { - if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { - Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); - if (irt_isnum(ir->t)) - emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, dest); - else - emit_dn(as, ARMI_VMOV_S_R, RID_RET, dest); - } else { - ra_destreg(as, ir, RID_FPRET); - } - } else if (hiop) { - ra_destpair(as, ir); - } else { - ra_destreg(as, ir, RID_RET); - } - } - UNUSED(ci); -} - -static void asm_callx(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX*2]; - CCallInfo ci; - IRRef func; - IRIns *irf; - ci.flags = asm_callx_flags(as, ir); - asm_collectargs(as, ir, &ci, args); - asm_setupresult(as, ir, &ci); - func = ir->op2; irf = IR(func); - if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } - if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)(irf->i); - } else { /* Need a non-argument register for indirect calls. */ - Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R4, RID_R12+1)); - emit_m(as, ARMI_BLXr, freg); - ci.func = (ASMFunction)(void *)0; - } - asm_gencall(as, &ci, args); -} - -/* -- Returns ------------------------------------------------------------- */ - -/* Return to lower frame. Guard that it goes to the right spot. */ -static void asm_retf(ASMState *as, IRIns *ir) -{ - Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); - void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); - as->topslot -= (BCReg)delta; - if ((int32_t)as->topslot < 0) as->topslot = 0; - irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - /* Need to force a spill on REF_BASE now to update the stack slot. */ - emit_lso(as, ARMI_STR, base, RID_SP, ra_spill(as, IR(REF_BASE))); - emit_setgl(as, base, jit_base); - emit_addptr(as, base, -8*delta); - asm_guardcc(as, CC_NE); - emit_nm(as, ARMI_CMP, RID_TMP, - ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); - emit_lso(as, ARMI_LDR, RID_TMP, base, -4); -} - -/* -- Type conversions ---------------------------------------------------- */ - -#if !LJ_SOFTFP -static void asm_tointg(ASMState *as, IRIns *ir, Reg left) -{ - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - Reg dest = ra_dest(as, ir, RSET_GPR); - asm_guardcc(as, CC_NE); - emit_d(as, ARMI_VMRS, 0); - emit_dm(as, ARMI_VCMP_D, (tmp & 15), (left & 15)); - emit_dm(as, ARMI_VCVT_F64_S32, (tmp & 15), (tmp & 15)); - emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); - emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (left & 15)); -} - -static void asm_tobit(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_FPR; - Reg left = ra_alloc1(as, ir->op1, allow); - Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); - Reg tmp = ra_scratch(as, rset_clear(allow, right)); - Reg dest = ra_dest(as, ir, RSET_GPR); - emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); - emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15)); -} -#endif - -static void asm_conv(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); -#if !LJ_SOFTFP - int stfp = (st == IRT_NUM || st == IRT_FLOAT); -#endif - IRRef lref = ir->op1; - /* 64 bit integer conversions are handled by SPLIT. */ - lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); -#if LJ_SOFTFP - /* FP conversions are handled by SPLIT. */ - lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); - /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ -#else - lua_assert(irt_type(ir->t) != st); - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ - emit_dm(as, st == IRT_NUM ? ARMI_VCVT_F32_F64 : ARMI_VCVT_F64_F32, - (dest & 15), (ra_alloc1(as, lref, RSET_FPR) & 15)); - } else { /* Integer to FP conversion. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - ARMIns ai = irt_isfloat(ir->t) ? - (st == IRT_INT ? ARMI_VCVT_F32_S32 : ARMI_VCVT_F32_U32) : - (st == IRT_INT ? ARMI_VCVT_F64_S32 : ARMI_VCVT_F64_U32); - emit_dm(as, ai, (dest & 15), (dest & 15)); - emit_dn(as, ARMI_VMOV_S_R, left, (dest & 15)); - } - } else if (stfp) { /* FP to integer conversion. */ - if (irt_isguard(ir->t)) { - /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); - asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); - } else { - Reg left = ra_alloc1(as, lref, RSET_FPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - Reg dest = ra_dest(as, ir, RSET_GPR); - ARMIns ai; - emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); - ai = irt_isint(ir->t) ? - (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) : - (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32); - emit_dm(as, ai, (tmp & 15), (left & 15)); - } - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); - if ((as->flags & JIT_F_ARMV6)) { - ARMIns ai = st == IRT_I8 ? ARMI_SXTB : - st == IRT_U8 ? ARMI_UXTB : - st == IRT_I16 ? ARMI_SXTH : ARMI_UXTH; - emit_dm(as, ai, dest, left); - } else if (st == IRT_U8) { - emit_dn(as, ARMI_AND|ARMI_K12|255, dest, left); - } else { - uint32_t shift = st == IRT_I8 ? 24 : 16; - ARMShift sh = st == IRT_U16 ? ARMSH_LSR : ARMSH_ASR; - emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, RID_TMP); - emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_LSL, shift), RID_TMP, left); - } - } else { /* Handle 32/32 bit no-op (cast). */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } - } -} - -static void asm_strto(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; - Reg rlo = 0, rhi = 0, tmp; - int destused = ra_used(ir); - int32_t ofs = 0; - ra_evictset(as, RSET_SCRATCH); -#if LJ_SOFTFP - if (destused) { - if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && - (ir->s & 1) == 0 && ir->s + 1 == (ir+1)->s) { - int i; - for (i = 0; i < 2; i++) { - Reg r = (ir+i)->r; - if (ra_hasreg(r)) { - ra_free(as, r); - ra_modified(as, r); - emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); - } - } - ofs = sps_scale(ir->s); - destused = 0; - } else { - rhi = ra_dest(as, ir+1, RSET_GPR); - rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); - } - } - asm_guardcc(as, CC_EQ); - if (destused) { - emit_lso(as, ARMI_LDR, rhi, RID_SP, 4); - emit_lso(as, ARMI_LDR, rlo, RID_SP, 0); - } -#else - UNUSED(rhi); - if (destused) { - if (ra_hasspill(ir->s)) { - ofs = sps_scale(ir->s); - destused = 0; - if (ra_hasreg(ir->r)) { - ra_free(as, ir->r); - ra_modified(as, ir->r); - emit_spload(as, ir, ir->r, ofs); - } - } else { - rlo = ra_dest(as, ir, RSET_FPR); - } - } - asm_guardcc(as, CC_EQ); - if (destused) - emit_vlso(as, ARMI_VLDR_D, rlo, RID_SP, 0); -#endif - emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET); /* Test return status. */ - args[0] = ir->op1; /* GCstr *str */ - args[1] = ASMREF_TMP1; /* TValue *n */ - asm_gencall(as, ci, args); - tmp = ra_releasetmp(as, ASMREF_TMP1); - if (ofs == 0) - emit_dm(as, ARMI_MOV, tmp, RID_SP); - else - emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); -} - -/* -- Memory references --------------------------------------------------- */ - -/* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) -{ - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) { - /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i32ptr(ir_knum(ir)), dest); - } else { -#if LJ_SOFTFP - lua_assert(0); -#else - /* Otherwise force a spill and use the spill slot. */ - emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); -#endif - } - } else { - /* Otherwise use [sp] and [sp+4] to hold the TValue. */ - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; - emit_dm(as, ARMI_MOV, dest, RID_SP); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_lso(as, ARMI_STR, src, RID_SP, 0); - } - if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) - type = ra_alloc1(as, ref+1, allow); - else - type = ra_allock(as, irt_toitype(ir->t), allow); - emit_lso(as, ARMI_STR, type, RID_SP, 4); - } -} - -static void asm_aref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx, base; - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - uint32_t k = emit_isk12(ARMI_ADD, ofs + 8*IR(ir->op2)->i); - if (k) { - base = ra_alloc1(as, refa, RSET_GPR); - emit_dn(as, ARMI_ADD^k, dest, base); - return; - } - } - base = ra_alloc1(as, ir->op1, RSET_GPR); - idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, base, idx); -} - -/* Inlined hash lookup. Specialized for key type and for const keys. -** The equivalent C code is: -** Node *n = hashkey(t, key); -** do { -** if (lj_obj_equal(&n->key, key)) return &n->val; -** } while ((n = nextnode(n))); -** return niltv(L); -*/ -static void asm_href(ASMState *as, IRIns *ir, IROp merge) -{ - RegSet allow = RSET_GPR; - int destused = ra_used(ir); - Reg dest = ra_dest(as, ir, allow); - Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = 0, keyhi = 0, keynumhi = RID_NONE, tmp = RID_TMP; - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); - IRType1 kt = irkey->t; - int32_t k = 0, khi = emit_isk12(ARMI_CMP, irt_toitype(kt)); - uint32_t khash; - MCLabel l_end, l_loop; - rset_clear(allow, tab); - if (!irref_isk(refkey) || irt_isstr(kt)) { -#if LJ_SOFTFP - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - if (irkey[1].o == IR_HIOP) { - if (ra_hasreg((irkey+1)->r)) { - keynumhi = (irkey+1)->r; - keyhi = RID_TMP; - ra_noweak(as, keynumhi); - } else { - keyhi = keynumhi = ra_allocref(as, refkey+1, allow); - } - rset_clear(allow, keynumhi); - khi = 0; - } -#else - if (irt_isnum(kt)) { - key = ra_scratch(as, allow); - rset_clear(allow, key); - keyhi = keynumhi = ra_scratch(as, allow); - rset_clear(allow, keyhi); - khi = 0; - } else { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } -#endif - } else if (irt_isnum(kt)) { - int32_t val = (int32_t)ir_knum(irkey)->u32.lo; - k = emit_isk12(ARMI_CMP, val); - if (!k) { - key = ra_allock(as, val, allow); - rset_clear(allow, key); - } - val = (int32_t)ir_knum(irkey)->u32.hi; - khi = emit_isk12(ARMI_CMP, val); - if (!khi) { - keyhi = ra_allock(as, val, allow); - rset_clear(allow, keyhi); - } - } else if (!irt_ispri(kt)) { - k = emit_isk12(ARMI_CMP, irkey->i); - if (!k) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } - } - if (!irt_ispri(kt)) - tmp = ra_scratchpair(as, allow); - - /* Key not found in chain: jump to exit (if merged) or load niltv. */ - l_end = emit_label(as); - as->invmcp = NULL; - if (merge == IR_NE) - asm_guardcc(as, CC_AL); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); - - /* Follow hash chain until the end. */ - l_loop = --as->mcp; - emit_n(as, ARMI_CMP|ARMI_K12|0, dest); - emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(Node, next)); - - /* Type and value comparison. */ - if (merge == IR_EQ) - asm_guardcc(as, CC_EQ); - else - emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); - if (!irt_ispri(kt)) { - emit_nm(as, ARMF_CC(ARMI_CMP, CC_EQ)^k, tmp, key); - emit_nm(as, ARMI_CMP^khi, tmp+1, keyhi); - emit_lsox(as, ARMI_LDRD, tmp, dest, (int32_t)offsetof(Node, key)); - } else { - emit_n(as, ARMI_CMP^khi, tmp); - emit_lso(as, ARMI_LDR, tmp, dest, (int32_t)offsetof(Node, key.it)); - } - *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); - - /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; - if (khash == 0) { - emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - } else { - emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); - emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); - if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */ - emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); - emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash)); - emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); - } else if (irref_isk(refkey)) { - emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, - rset_exclude(rset_exclude(RSET_GPR, tab), dest)); - emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); - } else { /* Must match with hash*() in lj_tab.c. */ - if (ra_hasreg(keynumhi)) { /* Canonicalize +-0.0 to 0.0. */ - if (keyhi == RID_TMP) - emit_dm(as, ARMF_CC(ARMI_MOV, CC_NE), keyhi, keynumhi); - emit_d(as, ARMF_CC(ARMI_MOV, CC_EQ)|ARMI_K12|0, keyhi); - } - emit_dnm(as, ARMI_AND, tmp, tmp, RID_TMP); - emit_dnm(as, ARMI_SUB|ARMF_SH(ARMSH_ROR, 32-HASH_ROT3), tmp, tmp, tmp+1); - emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_ROR, 32-((HASH_ROT2+HASH_ROT1)&31)), - tmp, tmp+1, tmp); - emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); - emit_dnm(as, ARMI_SUB|ARMF_SH(ARMSH_ROR, 32-HASH_ROT1), tmp+1, tmp+1, tmp); - if (ra_hasreg(keynumhi)) { - emit_dnm(as, ARMI_EOR, tmp+1, tmp, key); - emit_dnm(as, ARMI_ORR|ARMI_S, RID_TMP, tmp, key); /* Test for +-0.0. */ - emit_dnm(as, ARMI_ADD, tmp, keynumhi, keynumhi); -#if !LJ_SOFTFP - emit_dnm(as, ARMI_VMOV_RR_D, key, keynumhi, - (ra_alloc1(as, refkey, RSET_FPR) & 15)); -#endif - } else { - emit_dnm(as, ARMI_EOR, tmp+1, tmp, key); - emit_opk(as, ARMI_ADD, tmp, key, (int32_t)HASH_BIAS, - rset_exclude(rset_exclude(RSET_GPR, tab), key)); - } - } - } -} - -static void asm_hrefk(ASMState *as, IRIns *ir) -{ - IRIns *kslot = IR(ir->op2); - IRIns *irkey = IR(kslot->op1); - int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); - int32_t kofs = ofs + (int32_t)offsetof(Node, key); - Reg dest = (ra_used(ir) || ofs > 4095) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; - Reg node = ra_alloc1(as, ir->op1, RSET_GPR); - Reg key = RID_NONE, type = RID_TMP, idx = node; - RegSet allow = rset_exclude(RSET_GPR, node); - lua_assert(ofs % sizeof(Node) == 0); - if (ofs > 4095) { - idx = dest; - rset_clear(allow, dest); - kofs = (int32_t)offsetof(Node, key); - } else if (ra_hasreg(dest)) { - emit_opk(as, ARMI_ADD, dest, node, ofs, allow); - } - asm_guardcc(as, CC_NE); - if (!irt_ispri(irkey->t)) { - RegSet even = (as->freeset & allow); - even = even & (even >> 1) & RSET_GPREVEN; - if (even) { - key = ra_scratch(as, even); - if (rset_test(as->freeset, key+1)) { - type = key+1; - ra_modified(as, type); - } - } else { - key = ra_scratch(as, allow); - } - rset_clear(allow, key); - } - rset_clear(allow, type); - if (irt_isnum(irkey->t)) { - emit_opk(as, ARMF_CC(ARMI_CMP, CC_EQ), 0, type, - (int32_t)ir_knum(irkey)->u32.hi, allow); - emit_opk(as, ARMI_CMP, 0, key, - (int32_t)ir_knum(irkey)->u32.lo, allow); - } else { - if (ra_hasreg(key)) - emit_opk(as, ARMF_CC(ARMI_CMP, CC_EQ), 0, key, irkey->i, allow); - emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype(irkey->t), type); - } - emit_lso(as, ARMI_LDR, type, idx, kofs+4); - if (ra_hasreg(key)) emit_lso(as, ARMI_LDR, key, idx, kofs); - if (ofs > 4095) - emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); -} - -static void asm_uref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, ARMI_LDR, dest, v); - } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); - emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP); - emit_opk(as, ARMI_ADD, dest, uv, - (int32_t)offsetof(GCupval, tv), RSET_GPR); - emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); - } else { - emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v)); - } - emit_lso(as, ARMI_LDR, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); - } -} - -static void asm_fref(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); -} - -static void asm_strref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - IRRef ref = ir->op2, refk = ir->op1; - Reg r; - if (irref_isk(ref)) { - IRRef tmp = refk; refk = ref; ref = tmp; - } else if (!irref_isk(refk)) { - uint32_t k, m = ARMI_K12|sizeof(GCstr); - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - IRIns *irr = IR(ir->op2); - if (ra_hasreg(irr->r)) { - ra_noweak(as, irr->r); - right = irr->r; - } else if (mayfuse(as, irr->op2) && - irr->o == IR_ADD && irref_isk(irr->op2) && - (k = emit_isk12(ARMI_ADD, - (int32_t)sizeof(GCstr) + IR(irr->op2)->i))) { - m = k; - right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); - } else { - right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - emit_dn(as, ARMI_ADD^m, dest, dest); - emit_dnm(as, ARMI_ADD, dest, left, right); - return; - } - r = ra_alloc1(as, ref, RSET_GPR); - emit_opk(as, ARMI_ADD, dest, r, - sizeof(GCstr) + IR(refk)->i, rset_exclude(RSET_GPR, r)); -} - -/* -- Loads and stores ---------------------------------------------------- */ - -static ARMIns asm_fxloadins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: return ARMI_LDRSB; - case IRT_U8: return ARMI_LDRB; - case IRT_I16: return ARMI_LDRSH; - case IRT_U16: return ARMI_LDRH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D; - case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; - default: return ARMI_LDR; - } -} - -static ARMIns asm_fxstoreins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return ARMI_STRB; - case IRT_I16: case IRT_U16: return ARMI_STRH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D; - case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; - default: return ARMI_STR; - } -} - -static void asm_fload(ASMState *as, IRIns *ir) -{ - if (ir->op1 == REF_NIL) { - lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */ - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); - ARMIns ai = asm_fxloadins(ir); - int32_t ofs; - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); - return; - } - } - ofs = field_ofs[ir->op2]; - if ((ai & 0x04000000)) - emit_lso(as, ai, dest, idx, ofs); - else - emit_lsox(as, ai, dest, idx, ofs); - } -} - -static void asm_fstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, RSET_GPR); - IRIns *irf = IR(ir->op1); - Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); - int32_t ofs = field_ofs[irf->op2]; - ARMIns ai = asm_fxstoreins(ir); - if ((ai & 0x04000000)) - emit_lso(as, ai, src, idx, ofs); - else - emit_lsox(as, ai, src, idx, ofs); - } -} - -static void asm_xload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, - (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); -} - -static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, - (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src), ofs); - } -} - -#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) - -static void asm_ahuvload(ASMState *as, IRIns *ir) -{ - int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); - IRType t = hiop ? IRT_NUM : irt_type(ir->t); - Reg dest = RID_NONE, type = RID_NONE, idx; - RegSet allow = RSET_GPR; - int32_t ofs = 0; - if (hiop && ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); - rset_clear(allow, type); - } - if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); - rset_clear(allow, dest); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow, - (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); - if (!hiop || type == RID_NONE) { - rset_clear(allow, idx); - if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && - rset_test((as->freeset & allow), dest+1)) { - type = dest+1; - ra_modified(as, type); - } else { - type = RID_TMP; - } - } - asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); - emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); - if (ra_hasreg(dest)) { -#if !LJ_SOFTFP - if (t == IRT_NUM) - emit_vlso(as, ARMI_VLDR_D, dest, idx, ofs); - else -#endif - emit_lso(as, ARMI_LDR, dest, idx, ofs); - } - emit_lso(as, ARMI_LDR, type, idx, ofs+4); -} - -static void asm_ahustore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - RegSet allow = RSET_GPR; - Reg idx, src = RID_NONE, type = RID_NONE; - int32_t ofs = 0; -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); - idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1024); - emit_vlso(as, ARMI_VSTR_D, src, idx, ofs); - } else -#endif - { - int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); - if (!irt_ispri(ir->t)) { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - } - if (hiop) - type = ra_alloc1(as, (ir+1)->op2, allow); - else - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), 4096); - if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs); - emit_lso(as, ARMI_STR, type, idx, ofs+4); - } - } -} - -static void asm_sload(ASMState *as, IRIns *ir) -{ - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); - int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); - IRType t = hiop ? IRT_NUM : irt_type(ir->t); - Reg dest = RID_NONE, type = RID_NONE, base; - RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); -#if LJ_SOFTFP - lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ - if (hiop && ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); - rset_clear(allow, type); - } -#else - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) { - dest = ra_scratch(as, RSET_FPR); - asm_tointg(as, ir, dest); - t = IRT_NUM; /* Continue with a regular number type check. */ - } else -#endif - if (ra_used(ir)) { - Reg tmp = RID_NONE; - if ((ir->op2 & IRSLOAD_CONVERT)) - tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); - rset_clear(allow, dest); - base = ra_alloc1(as, REF_BASE, allow); - if ((ir->op2 & IRSLOAD_CONVERT)) { - if (t == IRT_INT) { - emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); - emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15)); - t = IRT_NUM; /* Check for original type. */ - } else { - emit_dm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15)); - emit_dn(as, ARMI_VMOV_S_R, tmp, (dest & 15)); - t = IRT_INT; /* Check for original type. */ - } - dest = tmp; - } - goto dotypecheck; - } - base = ra_alloc1(as, REF_BASE, allow); -dotypecheck: - rset_clear(allow, base); - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - if (ra_noreg(type)) { - if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && - rset_test((as->freeset & allow), dest+1)) { - type = dest+1; - ra_modified(as, type); - } else { - type = RID_TMP; - } - } - asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); - emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); - } - if (ra_hasreg(dest)) { -#if !LJ_SOFTFP - if (t == IRT_NUM) { - if (ofs < 1024) { - emit_vlso(as, ARMI_VLDR_D, dest, base, ofs); - } else { - if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4); - emit_vlso(as, ARMI_VLDR_D, dest, RID_TMP, 0); - emit_opk(as, ARMI_ADD, RID_TMP, base, ofs, allow); - return; - } - } else -#endif - emit_lso(as, ARMI_LDR, dest, base, ofs); - } - if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4); -} - -/* -- Allocations --------------------------------------------------------- */ - -#if LJ_HASFFI -static void asm_cnew(ASMState *as, IRIns *ir) -{ - CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID id = (CTypeID)IR(ir->op1)->i; - CTSize sz; - CTInfo info = lj_ctype_info(cts, id, &sz); - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[4]; - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); - RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - - as->gcsteps++; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); - if (ra_used(ir)) - ra_destreg(as, ir, RID_RET); /* GCcdata * */ - - /* Initialize immutable cdata object. */ - if (ir->o == IR_CNEWI) { - int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); - if (sz == 8) { - ofs += 4; ir++; - lua_assert(ir->o == IR_HIOP); - } - for (;;) { - Reg r = ra_alloc1(as, ir->op2, allow); - emit_lso(as, ARMI_STR, r, RID_RET, ofs); - rset_clear(allow, r); - if (ofs == sizeof(GCcdata)) break; - ofs -= 4; ir--; - } - } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ - ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* CTypeID id */ - args[2] = ir->op2; /* CTSize sz */ - args[3] = ASMREF_TMP1; /* CTSize align */ - asm_gencall(as, ci, args); - emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); - return; - } - - /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ - { - uint32_t k = emit_isk12(ARMI_MOV, id); - Reg r = k ? RID_R1 : ra_allock(as, id, allow); - emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); - emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); - emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); - if (k) emit_d(as, ARMI_MOV^k, RID_R1); - } - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ - asm_gencall(as, ci, args); - ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), - ra_releasetmp(as, ASMREF_TMP1)); -} -#else -#define asm_cnew(as, ir) ((void)0) -#endif - -/* -- Write barriers ------------------------------------------------------ */ - -static void asm_tbar(ASMState *as, IRIns *ir) -{ - Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); - Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); - Reg gr = ra_allock(as, i32ptr(J2G(as->J)), - rset_exclude(rset_exclude(RSET_GPR, tab), link)); - Reg mark = RID_TMP; - MCLabel l_end = emit_label(as); - emit_lso(as, ARMI_STR, link, tab, (int32_t)offsetof(GCtab, gclist)); - emit_lso(as, ARMI_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); - emit_lso(as, ARMI_STR, tab, gr, - (int32_t)offsetof(global_State, gc.grayagain)); - emit_dn(as, ARMI_BIC|ARMI_K12|LJ_GC_BLACK, mark, mark); - emit_lso(as, ARMI_LDR, link, gr, - (int32_t)offsetof(global_State, gc.grayagain)); - emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); - emit_n(as, ARMI_TST|ARMI_K12|LJ_GC_BLACK, mark); - emit_lso(as, ARMI_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); -} - -static void asm_obar(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; - IRRef args[2]; - MCLabel l_end; - Reg obj, val, tmp; - /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ir->op1; /* TValue *tv */ - asm_gencall(as, ci, args); - if ((l_end[-1] >> 28) == CC_AL) - l_end[-1] = ARMF_CC(l_end[-1], CC_NE); - else - emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); - ra_allockreg(as, i32ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1)); - obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); - emit_n(as, ARMF_CC(ARMI_TST, CC_NE)|ARMI_K12|LJ_GC_BLACK, tmp); - emit_n(as, ARMI_TST|ARMI_K12|LJ_GC_WHITES, RID_TMP); - val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); - emit_lso(as, ARMI_LDRB, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); - emit_lso(as, ARMI_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); -} - -/* -- Arithmetic and logic operations ------------------------------------- */ - -#if !LJ_SOFTFP -static void asm_fparith(ASMState *as, IRIns *ir, ARMIns ai) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15)); -} - -static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); - emit_dm(as, ai, (dest & 15), (left & 15)); -} - -static void asm_callround(ASMState *as, IRIns *ir, int id) -{ - /* The modified regs must match with the *.dasc implementation. */ - RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| - RID2RSET(RID_R3)|RID2RSET(RID_R12); - RegSet of; - Reg dest, src; - ra_evictset(as, drop); - dest = ra_dest(as, ir, RSET_FPR); - emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); - emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : - id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : - (void *)lj_vm_trunc_sf); - /* Workaround to protect argument GPRs from being used for remat. */ - of = as->freeset; - as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); - as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); - src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ - as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); - emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); -} - -static void asm_fpmath(ASMState *as, IRIns *ir) -{ - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; - if (ir->op2 <= IRFPM_TRUNC) - asm_callround(as, ir, ir->op2); - else if (ir->op2 == IRFPM_SQRT) - asm_fpunary(as, ir, ARMI_VSQRT_D); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); -} -#endif - -static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) -{ - IRIns *ir; - if (irref_isk(rref)) - return 0; /* Don't swap constants to the left. */ - if (irref_isk(lref)) - return 1; /* But swap constants to the right. */ - ir = IR(rref); - if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || - (ir->o == IR_ADD && ir->op1 == ir->op2)) - return 0; /* Don't swap fusable operands to the left. */ - ir = IR(lref); - if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || - (ir->o == IR_ADD && ir->op1 == ir->op2)) - return 1; /* But swap fusable operands to the right. */ - return 0; /* Otherwise don't swap. */ -} - -static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai) -{ - IRRef lref = ir->op1, rref = ir->op2; - Reg left, dest = ra_dest(as, ir, RSET_GPR); - uint32_t m; - if (asm_swapops(as, lref, rref)) { - IRRef tmp = lref; lref = rref; rref = tmp; - if ((ai & ~ARMI_S) == ARMI_SUB || (ai & ~ARMI_S) == ARMI_SBC) - ai ^= (ARMI_SUB^ARMI_RSB); - } - left = ra_hintalloc(as, lref, dest, RSET_GPR); - m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); - if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ - asm_guardcc(as, CC_VS); - ai |= ARMI_S; - } - emit_dn(as, ai^m, dest, left); -} - -static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) -{ - if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ - as->flagmcp = NULL; - as->mcp++; - ai |= ARMI_S; - } - asm_intop(as, ir, ai); -} - -static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_dn(as, ai|ARMI_K12|0, dest, left); -} - -/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ -static void asm_intmul(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - Reg tmp = RID_NONE; - /* ARMv5 restriction: dest != left and dest_hi != left. */ - if (dest == left && left != right) { left = right; right = dest; } - if (irt_isguard(ir->t)) { /* IR_MULOV */ - if (!(as->flags & JIT_F_ARMV6) && dest == left) - tmp = left = ra_scratch(as, rset_exclude(RSET_GPR, left)); - asm_guardcc(as, CC_NE); - emit_nm(as, ARMI_TEQ|ARMF_SH(ARMSH_ASR, 31), RID_TMP, dest); - emit_dnm(as, ARMI_SMULL|ARMF_S(right), dest, RID_TMP, left); - } else { - if (!(as->flags & JIT_F_ARMV6) && dest == left) tmp = left = RID_TMP; - emit_nm(as, ARMI_MUL|ARMF_S(right), dest, left); - } - /* Only need this for the dest == left == right case. */ - if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right); -} - -static void asm_add(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, ARMI_VMLA_D, ARMI_VMLA_D)) - asm_fparith(as, ir, ARMI_VADD_D); - return; - } -#endif - asm_intop_s(as, ir, ARMI_ADD); -} - -static void asm_sub(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, ARMI_VNMLS_D, ARMI_VMLS_D)) - asm_fparith(as, ir, ARMI_VSUB_D); - return; - } -#endif - asm_intop_s(as, ir, ARMI_SUB); -} - -static void asm_mul(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, ARMI_VMUL_D); - return; - } -#endif - asm_intmul(as, ir); -} - -#define asm_addov(as, ir) asm_add(as, ir) -#define asm_subov(as, ir) asm_sub(as, ir) -#define asm_mulov(as, ir) asm_mul(as, ir) - -#if !LJ_SOFTFP -#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) -#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) -#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) -#endif - -#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) - -static void asm_neg(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, ARMI_VNEG_D); - return; - } -#endif - asm_intneg(as, ir, ARMI_RSB); -} - -static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) -{ - if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */ - uint32_t cc = (as->mcp[1] >> 28); - as->flagmcp = NULL; - if (cc <= CC_NE) { - as->mcp++; - ai |= ARMI_S; - } else if (cc == CC_GE) { - *++as->mcp ^= ((CC_GE^CC_PL) << 28); - ai |= ARMI_S; - } else if (cc == CC_LT) { - *++as->mcp ^= ((CC_LT^CC_MI) << 28); - ai |= ARMI_S; - } /* else: other conds don't work with bit ops. */ - } - if (ir->op2 == 0) { - Reg dest = ra_dest(as, ir, RSET_GPR); - uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); - emit_d(as, ai^m, dest); - } else { - /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ - asm_intop(as, ir, ai); - } -} - -#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN) - -static void asm_bswap(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - if ((as->flags & JIT_F_ARMV6)) { - emit_dm(as, ARMI_REV, dest, left); - } else { - Reg tmp2 = dest; - if (tmp2 == left) - tmp2 = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, dest), left)); - emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_LSR, 8), dest, tmp2, RID_TMP); - emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_ROR, 8), tmp2, left); - emit_dn(as, ARMI_BIC|ARMI_K12|256*8|255, RID_TMP, RID_TMP); - emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_ROR, 16), RID_TMP, left, left); - } -} - -#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND) -#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR) -#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR) - -static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) -{ - if (irref_isk(ir->op2)) { /* Constant shifts. */ - /* NYI: Turn SHL+SHR or BAND+SHR into uxtb, uxth or ubfx. */ - /* NYI: Turn SHL+ASR into sxtb, sxth or sbfx. */ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - int32_t shift = (IR(ir->op2)->i & 31); - emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, left); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dm(as, ARMI_MOV|ARMF_RSH(sh, right), dest, left); - } -} - -#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL) -#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) -#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) -#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) -#define asm_brol(as, ir) lua_assert(0) - -static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) -{ - uint32_t kcmp = 0, kmov = 0; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - Reg right = 0; - if (irref_isk(ir->op2)) { - kcmp = emit_isk12(ARMI_CMP, IR(ir->op2)->i); - if (kcmp) kmov = emit_isk12(ARMI_MOV, IR(ir->op2)->i); - } - if (!kmov) { - kcmp = 0; - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - if (kmov || dest != right) { - emit_dm(as, ARMF_CC(ARMI_MOV, cc)^kmov, dest, right); - cc ^= 1; /* Must use opposite conditions for paired moves. */ - } else { - cc ^= (CC_LT^CC_GT); /* Otherwise may swap CC_LT <-> CC_GT. */ - } - if (dest != left) emit_dm(as, ARMF_CC(ARMI_MOV, cc), dest, left); - emit_nm(as, ARMI_CMP^kcmp, left, right); -} - -#if LJ_SOFTFP -static void asm_sfpmin_max(ASMState *as, IRIns *ir, int cc) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; - RegSet drop = RSET_SCRATCH; - Reg r; - IRRef args[4]; - args[0] = ir->op1; args[1] = (ir+1)->op1; - args[2] = ir->op2; args[3] = (ir+1)->op2; - /* __aeabi_cdcmple preserves r0-r3. */ - if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); - if (ra_hasreg((ir+1)->r)) rset_clear(drop, (ir+1)->r); - if (!rset_test(as->freeset, RID_R2) && - regcost_ref(as->cost[RID_R2]) == args[2]) rset_clear(drop, RID_R2); - if (!rset_test(as->freeset, RID_R3) && - regcost_ref(as->cost[RID_R3]) == args[3]) rset_clear(drop, RID_R3); - ra_evictset(as, drop); - ra_destpair(as, ir); - emit_dm(as, ARMF_CC(ARMI_MOV, cc), RID_RETHI, RID_R3); - emit_dm(as, ARMF_CC(ARMI_MOV, cc), RID_RETLO, RID_R2); - emit_call(as, (void *)ci->func); - for (r = RID_R0; r <= RID_R3; r++) - ra_leftov(as, r, args[r-RID_R0]); -} -#else -static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc) -{ - Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = ((left >> 8) & 15); left &= 15; - if (dest != left) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc^1), dest, left); - if (dest != right) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc), dest, right); - emit_d(as, ARMI_VMRS, 0); - emit_dm(as, ARMI_VCMP_D, left, right); -} -#endif - -static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) -{ -#if LJ_SOFTFP - UNUSED(fcc); -#else - if (irt_isnum(ir->t)) - asm_fpmin_max(as, ir, fcc); - else -#endif - asm_intmin_max(as, ir, cc); -} - -#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) -#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) - -/* -- Comparisons --------------------------------------------------------- */ - -/* Map of comparisons to flags. ORDER IR. */ -static const uint8_t asm_compmap[IR_ABC+1] = { - /* op FP swp int cc FP cc */ - /* LT */ CC_GE + (CC_HS << 4), - /* GE x */ CC_LT + (CC_HI << 4), - /* LE */ CC_GT + (CC_HI << 4), - /* GT x */ CC_LE + (CC_HS << 4), - /* ULT x */ CC_HS + (CC_LS << 4), - /* UGE */ CC_LO + (CC_LO << 4), - /* ULE x */ CC_HI + (CC_LO << 4), - /* UGT */ CC_LS + (CC_LS << 4), - /* EQ */ CC_NE + (CC_NE << 4), - /* NE */ CC_EQ + (CC_EQ << 4), - /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ -}; - -#if LJ_SOFTFP -/* FP comparisons. */ -static void asm_sfpcomp(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; - RegSet drop = RSET_SCRATCH; - Reg r; - IRRef args[4]; - int swp = (((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1) << 1); - args[swp^0] = ir->op1; args[swp^1] = (ir+1)->op1; - args[swp^2] = ir->op2; args[swp^3] = (ir+1)->op2; - /* __aeabi_cdcmple preserves r0-r3. This helps to reduce spills. */ - for (r = RID_R0; r <= RID_R3; r++) - if (!rset_test(as->freeset, r) && - regcost_ref(as->cost[r]) == args[r-RID_R0]) rset_clear(drop, r); - ra_evictset(as, drop); - asm_guardcc(as, (asm_compmap[ir->o] >> 4)); - emit_call(as, (void *)ci->func); - for (r = RID_R0; r <= RID_R3; r++) - ra_leftov(as, r, args[r-RID_R0]); -} -#else -/* FP comparisons. */ -static void asm_fpcomp(ASMState *as, IRIns *ir) -{ - Reg left, right; - ARMIns ai; - int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); - if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { - left = (ra_alloc1(as, ir->op1, RSET_FPR) & 15); - right = 0; - ai = ARMI_VCMPZ_D; - } else { - left = ra_alloc2(as, ir, RSET_FPR); - if (swp) { - right = (left & 15); left = ((left >> 8) & 15); - } else { - right = ((left >> 8) & 15); left &= 15; - } - ai = ARMI_VCMP_D; - } - asm_guardcc(as, (asm_compmap[ir->o] >> 4)); - emit_d(as, ARMI_VMRS, 0); - emit_dm(as, ai, left, right); -} -#endif - -/* Integer comparisons. */ -static void asm_intcomp(ASMState *as, IRIns *ir) -{ - ARMCC cc = (asm_compmap[ir->o] & 15); - IRRef lref = ir->op1, rref = ir->op2; - Reg left; - uint32_t m; - int cmpprev0 = 0; - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); - if (asm_swapops(as, lref, rref)) { - Reg tmp = lref; lref = rref; rref = tmp; - if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ - else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ - } - if (irref_isk(rref) && IR(rref)->i == 0) { - IRIns *irl = IR(lref); - cmpprev0 = (irl+1 == ir); - /* Combine comp(BAND(left, right), 0) into tst left, right. */ - if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { - IRRef blref = irl->op1, brref = irl->op2; - uint32_t m2 = 0; - Reg bleft; - if (asm_swapops(as, blref, brref)) { - Reg tmp = blref; blref = brref; brref = tmp; - } - if (irref_isk(brref)) { - m2 = emit_isk12(ARMI_AND, IR(brref)->i); - if ((m2 & (ARMI_AND^ARMI_BIC))) - goto notst; /* Not beneficial if we miss a constant operand. */ - } - if (cc == CC_GE) cc = CC_PL; - else if (cc == CC_LT) cc = CC_MI; - else if (cc > CC_NE) goto notst; /* Other conds don't work with tst. */ - bleft = ra_alloc1(as, blref, RSET_GPR); - if (!m2) m2 = asm_fuseopm(as, 0, brref, rset_exclude(RSET_GPR, bleft)); - asm_guardcc(as, cc); - emit_n(as, ARMI_TST^m2, bleft); - return; - } - } -notst: - left = ra_alloc1(as, lref, RSET_GPR); - m = asm_fuseopm(as, ARMI_CMP, rref, rset_exclude(RSET_GPR, left)); - asm_guardcc(as, cc); - emit_n(as, ARMI_CMP^m, left); - /* Signed comparison with zero and referencing previous ins? */ - if (cmpprev0 && (cc <= CC_NE || cc >= CC_GE)) - as->flagmcp = as->mcp; /* Allow elimination of the compare. */ -} - -static void asm_comp(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) - asm_fpcomp(as, ir); - else -#endif - asm_intcomp(as, ir); -} - -#define asm_equal(as, ir) asm_comp(as, ir) - -#if LJ_HASFFI -/* 64 bit integer comparisons. */ -static void asm_int64comp(ASMState *as, IRIns *ir) -{ - int signedcomp = (ir->o <= IR_GT); - ARMCC cclo, cchi; - Reg leftlo, lefthi; - uint32_t mlo, mhi; - RegSet allow = RSET_GPR, oldfree; - - /* Always use unsigned comparison for loword. */ - cclo = asm_compmap[ir->o + (signedcomp ? 4 : 0)] & 15; - leftlo = ra_alloc1(as, ir->op1, allow); - oldfree = as->freeset; - mlo = asm_fuseopm(as, ARMI_CMP, ir->op2, rset_clear(allow, leftlo)); - allow &= ~(oldfree & ~as->freeset); /* Update for allocs of asm_fuseopm. */ - - /* Use signed or unsigned comparison for hiword. */ - cchi = asm_compmap[ir->o] & 15; - lefthi = ra_alloc1(as, (ir+1)->op1, allow); - mhi = asm_fuseopm(as, ARMI_CMP, (ir+1)->op2, rset_clear(allow, lefthi)); - - /* All register allocations must be performed _before_ this point. */ - if (signedcomp) { - MCLabel l_around = emit_label(as); - asm_guardcc(as, cclo); - emit_n(as, ARMI_CMP^mlo, leftlo); - emit_branch(as, ARMF_CC(ARMI_B, CC_NE), l_around); - if (cchi == CC_GE || cchi == CC_LE) cchi ^= 6; /* GE -> GT, LE -> LT */ - asm_guardcc(as, cchi); - } else { - asm_guardcc(as, cclo); - emit_n(as, ARMF_CC(ARMI_CMP, CC_EQ)^mlo, leftlo); - } - emit_n(as, ARMI_CMP^mhi, lefthi); -} -#endif - -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ - -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ -static void asm_hiop(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI || LJ_SOFTFP - /* HIOP is marked as a store because it needs its own DCE logic. */ - int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ - if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; - if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ - as->curins--; /* Always skip the loword comparison. */ -#if LJ_SOFTFP - if (!irt_isint(ir->t)) { - asm_sfpcomp(as, ir-1); - return; - } -#endif -#if LJ_HASFFI - asm_int64comp(as, ir-1); -#endif - return; -#if LJ_SOFTFP - } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { - as->curins--; /* Always skip the loword min/max. */ - if (uselo || usehi) - asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); - return; -#elif LJ_HASFFI - } else if ((ir-1)->o == IR_CONV) { - as->curins--; /* Always skip the CONV. */ - if (usehi || uselo) - asm_conv64(as, ir); - return; -#endif - } else if ((ir-1)->o == IR_XSTORE) { - if ((ir-1)->r != RID_SINK) - asm_xstore_(as, ir, 4); - return; - } - if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ - switch ((ir-1)->o) { -#if LJ_HASFFI - case IR_ADD: - as->curins--; - asm_intop(as, ir, ARMI_ADC); - asm_intop(as, ir-1, ARMI_ADD|ARMI_S); - break; - case IR_SUB: - as->curins--; - asm_intop(as, ir, ARMI_SBC); - asm_intop(as, ir-1, ARMI_SUB|ARMI_S); - break; - case IR_NEG: - as->curins--; - asm_intneg(as, ir, ARMI_RSC); - asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); - break; -#endif -#if LJ_SOFTFP - case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - case IR_STRTO: - if (!uselo) - ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ - break; -#endif - case IR_CALLN: - case IR_CALLS: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; -#if LJ_SOFTFP - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: -#endif - case IR_CNEWI: - /* Nothing to do here. Handled by lo op itself. */ - break; - default: lua_assert(0); break; - } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); -#endif -} - -/* -- Profiling ----------------------------------------------------------- */ - -static void asm_prof(ASMState *as, IRIns *ir) -{ - UNUSED(ir); - asm_guardcc(as, CC_NE); - emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP); - emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); -} - -/* -- Stack handling ------------------------------------------------------ */ - -/* Check Lua stack size for overflow. Use exit handler as fallback. */ -static void asm_stack_check(ASMState *as, BCReg topslot, - IRIns *irp, RegSet allow, ExitNo exitno) -{ - Reg pbase; - uint32_t k; - if (irp) { - if (!ra_hasspill(irp->s)) { - pbase = irp->r; - lua_assert(ra_hasreg(pbase)); - } else if (allow) { - pbase = rset_pickbot(allow); - } else { - pbase = RID_RET; - emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */ - } - } else { - pbase = RID_BASE; - } - emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); - k = emit_isk12(0, (int32_t)(8*topslot)); - lua_assert(k); - emit_n(as, ARMI_CMP^k, RID_TMP); - emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); - emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, - (int32_t)offsetof(lua_State, maxstack)); - if (irp) { /* Must not spill arbitrary registers in head of side trace. */ - int32_t i = i32ptr(&J2G(as->J)->cur_L); - if (ra_hasspill(irp->s)) - emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); - emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); - if (ra_hasspill(irp->s) && !allow) - emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ - emit_loadi(as, RID_TMP, (i & ~4095)); - } else { - emit_getgl(as, RID_TMP, cur_L); - } -} - -/* Restore Lua stack from on-trace state. */ -static void asm_stack_restore(ASMState *as, SnapShot *snap) -{ - SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; - MSize n, nent = snap->nent; - /* Store the value of all modified slots to the Lua stack. */ - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); - IRRef ref = snap_ref(sn); - IRIns *ir = IR(ref); - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { -#if LJ_SOFTFP - RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); - Reg tmp; - lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ - tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, - rset_exclude(RSET_GPREVEN, RID_BASE)); - emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); - if (rset_test(as->freeset, tmp+1)) odd = RID2RSET(tmp+1); - tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, odd); - emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4); -#else - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs); -#endif - } else { - RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); - Reg type; - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); - emit_lso(as, ARMI_STR, src, RID_BASE, ofs); - if (rset_test(as->freeset, src+1)) odd = RID2RSET(src+1); - } - if ((sn & (SNAP_CONT|SNAP_FRAME))) { - if (s == 0) continue; /* Do not overwrite link to previous frame. */ - type = ra_allock(as, (int32_t)(*flinks--), odd); -#if LJ_SOFTFP - } else if ((sn & SNAP_SOFTFPNUM)) { - type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); -#endif - } else { - type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); - } - emit_lso(as, ARMI_STR, type, RID_BASE, ofs+4); - } - checkmclim(as); - } - lua_assert(map + nent == flinks); -} - -/* -- GC handling --------------------------------------------------------- */ - -/* Check GC threshold and do one or more GC steps. */ -static void asm_gc_check(ASMState *as) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; - IRRef args[2]; - MCLabel l_end; - Reg tmp1, tmp2; - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ - asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ - emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ASMREF_TMP2; /* MSize steps */ - asm_gencall(as, ci, args); - tmp1 = ra_releasetmp(as, ASMREF_TMP1); - tmp2 = ra_releasetmp(as, ASMREF_TMP2); - emit_loadi(as, tmp2, as->gcsteps); - /* Jump around GC step if GC total < GC threshold. */ - emit_branch(as, ARMF_CC(ARMI_B, CC_LS), l_end); - emit_nm(as, ARMI_CMP, RID_TMP, tmp2); - emit_lso(as, ARMI_LDR, tmp2, tmp1, - (int32_t)offsetof(global_State, gc.threshold)); - emit_lso(as, ARMI_LDR, RID_TMP, tmp1, - (int32_t)offsetof(global_State, gc.total)); - ra_allockreg(as, i32ptr(J2G(as->J)), tmp1); - as->gcsteps = 0; - checkmclim(as); -} - -/* -- Loop handling ------------------------------------------------------- */ - -/* Fixup the loop branch. */ -static void asm_loop_fixup(ASMState *as) -{ - MCode *p = as->mctop; - MCode *target = as->mcp; - if (as->loopinv) { /* Inverted loop branch? */ - /* asm_guardcc already inverted the bcc and patched the final bl. */ - p[-2] |= ((uint32_t)(target-p) & 0x00ffffffu); - } else { - p[-1] = ARMI_B | ((uint32_t)((target-p)-1) & 0x00ffffffu); - } -} - -/* -- Head of trace ------------------------------------------------------- */ - -/* Reload L register from g->cur_L. */ -static void asm_head_lreg(ASMState *as) -{ - IRIns *ir = IR(ASMREF_L); - if (ra_used(ir)) { - Reg r = ra_dest(as, ir, RSET_GPR); - emit_getgl(as, r, cur_L); - ra_evictk(as); - } -} - -/* Coalesce BASE register for a root trace. */ -static void asm_head_root_base(ASMState *as) -{ - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - ra_destreg(as, ir, RID_BASE); -} - -/* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) -{ - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - if (ra_hasspill(irp->s)) { - rset_clear(allow, ra_dest(as, ir, allow)); - } else { - Reg r = irp->r; - lua_assert(ra_hasreg(r)); - rset_clear(allow, r); - if (r != ir->r && !rset_test(as->freeset, r)) - ra_restore(as, regcost_ref(as->cost[r])); - ra_destreg(as, ir, r); - } - return allow; -} - -/* -- Tail of trace ------------------------------------------------------- */ - -/* Fixup the tail code. */ -static void asm_tail_fixup(ASMState *as, TraceNo lnk) -{ - MCode *p = as->mctop; - MCode *target; - int32_t spadj = as->T->spadjust; - if (spadj == 0) { - as->mctop = --p; - } else { - /* Patch stack adjustment. */ - uint32_t k = emit_isk12(ARMI_ADD, spadj); - lua_assert(k); - p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); - } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = ARMI_B|(((target-p)-1)&0x00ffffffu); -} - -/* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) -{ - MCode *p = as->mctop - 1; /* Leave room for exit branch. */ - if (as->loopref) { - as->invmcp = as->mcp = p; - } else { - as->mcp = p-1; /* Leave room for stack pointer adjustment. */ - as->invmcp = NULL; - } - *p = 0; /* Prevent load/store merging. */ -} - -/* -- Trace setup --------------------------------------------------------- */ - -/* Ensure there are enough stack slots for call arguments. */ -static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = CCI_XNARGS(ci); - int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) { - if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { - if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) { - if (irt_isnum(IR(args[i])->t)) { - if (nfpr > 0) nfpr--; - else fprodd = 0, nslots = (nslots + 3) & ~1; - } else { - if (fprodd) fprodd--; - else if (nfpr > 0) fprodd = 1, nfpr--; - else nslots++; - } - } else if (irt_isnum(IR(args[i])->t)) { - ngpr &= ~1; - if (ngpr > 0) ngpr -= 2; else nslots += 2; - } else { - if (ngpr > 0) ngpr--; else nslots++; - } - } else { - if (ngpr > 0) ngpr--; else nslots++; - } - } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; - return REGSP_HINT(RID_RET); -} - -static void asm_setup_target(ASMState *as) -{ - /* May need extra exit for asm_stack_check on side traces. */ - asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); -} - -/* -- Trace patching ------------------------------------------------------ */ - -/* Patch exit jumps of existing machine code to a new target. */ -void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) -{ - MCode *p = T->mcode; - MCode *pe = (MCode *)((char *)p + T->szmcode); - MCode *cstart = NULL, *cend = p; - MCode *mcarea = lj_mcode_patch(J, p, 0); - MCode *px = exitstub_addr(J, exitno) - 2; - for (; p < pe; p++) { - /* Look for bl_cc exitstub, replace with b_cc target. */ - uint32_t ins = *p; - if ((ins & 0x0f000000u) == 0x0b000000u && ins < 0xf0000000u && - ((ins ^ (px-p)) & 0x00ffffffu) == 0) { - *p = (ins & 0xfe000000u) | (((target-p)-2) & 0x00ffffffu); - cend = p+1; - if (!cstart) cstart = p; - } - } - lua_assert(cstart != NULL); - lj_mcode_sync(cstart, cend); - lj_mcode_patch(J, mcarea, 1); -} - diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h deleted file mode 100644 index 328e4d7740..0000000000 --- a/src/lj_asm_arm64.h +++ /dev/null @@ -1,2008 +0,0 @@ -/* -** ARM64 IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -** -** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -** Sponsored by Cisco Systems, Inc. -*/ - -/* -- Register allocator extensions --------------------------------------- */ - -/* Allocate a register with a hint. */ -static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!ra_hashint(r) && !iscrossref(as, ref)) - ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ - r = ra_allocref(as, ref, allow); - } - ra_noweak(as, r); - return r; -} - -/* Allocate two source registers for three-operand instructions. */ -static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - Reg left = irl->r, right = irr->r; - if (ra_hasreg(left)) { - ra_noweak(as, left); - if (ra_noreg(right)) - right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); - else - ra_noweak(as, right); - } else if (ra_hasreg(right)) { - ra_noweak(as, right); - left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); - } else if (ra_hashint(right)) { - right = ra_allocref(as, ir->op2, allow); - left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); - } else { - left = ra_allocref(as, ir->op1, allow); - right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); - } - return left | (right << 8); -} - -/* -- Guard handling ------------------------------------------------------ */ - -/* Setup all needed exit stubs. */ -static void asm_exitstub_setup(ASMState *as, ExitNo nexits) -{ - ExitNo i; - MCode *mxp = as->mctop; - if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) - asm_mclimit(as); - /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ - for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = A64I_BL|((-3-i)&0x03ffffffu); - *--mxp = A64I_MOVZw|A64F_U16(as->T->traceno); - mxp--; - *mxp = A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu); - *--mxp = A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP); - as->mctop = mxp; -} - -static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) -{ - /* Keep this in-sync with exitstub_trace_addr(). */ - return as->mctop + exitno + 3; -} - -/* Emit conditional branch to exit for guard. */ -static void asm_guardcc(ASMState *as, A64CC cc) -{ - MCode *target = asm_exitstub_addr(as, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = A64I_B | ((target-p) & 0x03ffffffu); - emit_cond_branch(as, cc^1, p-1); - return; - } - emit_cond_branch(as, cc, target); -} - -/* Emit test and branch instruction to exit for guard. */ -static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) -{ - MCode *target = asm_exitstub_addr(as, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = A64I_B | ((target-p) & 0x03ffffffu); - emit_tnb(as, ai^0x01000000u, r, bit, p-1); - return; - } - emit_tnb(as, ai, r, bit, target); -} - -/* Emit compare and branch instruction to exit for guard. */ -static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r) -{ - MCode *target = asm_exitstub_addr(as, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = A64I_B | ((target-p) & 0x03ffffffu); - emit_cnb(as, ai^0x01000000u, r, p-1); - return; - } - emit_cnb(as, ai, r, target); -} - -/* -- Operand fusion ------------------------------------------------------ */ - -/* Limit linear search to this distance. Avoids O(n^2) behavior. */ -#define CONFLICT_SEARCH_LIM 31 - -static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) -{ - if (irref_isk(ref)) { - IRIns *ir = IR(ref); - if (ir->o == IR_KNULL || !irt_is64(ir->t)) { - *k = ir->i; - return 1; - } else if (checki32((int64_t)ir_k64(ir)->u64)) { - *k = (int32_t)ir_k64(ir)->u64; - return 1; - } - } - return 0; -} - -/* Check if there's no conflicting instruction between curins and ref. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict) -{ - IRIns *ir = as->ir; - IRRef i = as->curins; - if (i > ref + CONFLICT_SEARCH_LIM) - return 0; /* Give up, ref is too far away. */ - while (--i > ref) - if (ir[i].o == conflict) - return 0; /* Conflict found. */ - return 1; /* Ok, no conflict. */ -} - -/* Fuse the array base of colocated arrays. */ -static int32_t asm_fuseabase(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) - return (int32_t)sizeof(GCtab); - return 0; -} - -#define FUSE_REG 0x40000000 - -/* Fuse array/hash/upvalue reference into register+offset operand. */ -static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, - A64Ins ins) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r)) { - if (ir->o == IR_AREF) { - if (mayfuse(as, ref)) { - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (emit_checkofs(ins, ofs)) { - *ofsp = ofs; - return ra_alloc1(as, refa, allow); - } - } else { - Reg base = ra_alloc1(as, ir->op1, allow); - *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base)); - return base; - } - } - } else if (ir->o == IR_HREFK) { - if (mayfuse(as, ref)) { - int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); - if (emit_checkofs(ins, ofs)) { - *ofsp = ofs; - return ra_alloc1(as, ir->op1, allow); - } - } - } else if (ir->o == IR_UREFC) { - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; - int64_t ofs = glofs(as, &uv->tv); - if (emit_checkofs(ins, ofs)) { - *ofsp = (int32_t)ofs; - return RID_GL; - } - } - } - } - *ofsp = 0; - return ra_alloc1(as, ref, allow); -} - -/* Fuse m operand into arithmetic/logic instructions. */ -static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) -{ - IRIns *ir = IR(ref); - if (ra_hasreg(ir->r)) { - ra_noweak(as, ir->r); - return A64F_M(ir->r); - } else if (irref_isk(ref)) { - uint32_t m; - int64_t k = get_k64val(ir); - if ((ai & 0x1f000000) == 0x0a000000) - m = emit_isk13(k, irt_is64(ir->t)); - else - m = emit_isk12(k); - if (m) - return m; - } else if (mayfuse(as, ref)) { - if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) || - (ir->o == IR_ADD && ir->op1 == ir->op2)) { - A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR : - ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL; - int shift = ir->o == IR_ADD ? 1 : - (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); - IRIns *irl = IR(ir->op1); - if (sh == A64SH_LSL && - irl->o == IR_CONV && - irl->op2 == ((IRT_I64<op1, allow); - return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift); - } else { - Reg m = ra_alloc1(as, ir->op1, allow); - return A64F_M(m) | A64F_SH(sh, shift); - } - } else if (ir->o == IR_CONV && - ir->op2 == ((IRT_I64<op1, allow); - return A64F_M(m) | A64F_EX(A64EX_SXTW); - } - } - return A64F_M(ra_allocref(as, ref, allow)); -} - -/* Fuse XLOAD/XSTORE reference into load/store operand. */ -static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, - RegSet allow) -{ - IRIns *ir = IR(ref); - Reg base; - int32_t ofs = 0; - if (ra_noreg(ir->r) && canfuse(as, ir)) { - if (ir->o == IR_ADD) { - if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) { - ref = ir->op1; - } else { - Reg rn, rm; - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irl = IR(lref); - if (mayfuse(as, irl->op1)) { - unsigned int shift = 4; - if (irl->o == IR_BSHL && irref_isk(irl->op2)) { - shift = (IR(irl->op2)->i & 63); - } else if (irl->o == IR_ADD && irl->op1 == irl->op2) { - shift = 1; - } - if ((ai >> 30) == shift) { - lref = irl->op1; - irl = IR(lref); - ai |= A64I_LS_SH; - } - } - if (irl->o == IR_CONV && - irl->op2 == ((IRT_I64<op1; - ai |= A64I_LS_SXTWx; - } else { - ai |= A64I_LS_LSLx; - } - rm = ra_alloc1(as, lref, allow); - rn = ra_alloc1(as, rref, rset_exclude(allow, rm)); - emit_dnm(as, (ai^A64I_LS_R), rd, rn, rm); - return; - } - } else if (ir->o == IR_STRREF) { - if (asm_isk32(as, ir->op2, &ofs)) { - ref = ir->op1; - } else if (asm_isk32(as, ir->op1, &ofs)) { - ref = ir->op2; - } else { - Reg rn = ra_alloc1(as, ir->op1, allow); - IRIns *irr = IR(ir->op2); - uint32_t m; - if (irr+1 == ir && !ra_used(irr) && - irr->o == IR_ADD && irref_isk(irr->op2)) { - ofs = sizeof(GCstr) + IR(irr->op2)->i; - if (emit_checkofs(ai, ofs)) { - Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn)); - m = A64F_M(rm) | A64F_EX(A64EX_SXTW); - goto skipopm; - } - } - m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); - ofs = sizeof(GCstr); - skipopm: - emit_lso(as, ai, rd, rd, ofs); - emit_dn(as, A64I_ADDx^m, rd, rn); - return; - } - ofs += sizeof(GCstr); - if (!emit_checkofs(ai, ofs)) { - Reg rn = ra_alloc1(as, ref, allow); - Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); - emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm); - return; - } - } - } - base = ra_alloc1(as, ref, allow); - emit_lso(as, ai, (rd & 31), base, ofs); -} - -/* Fuse FP multiply-add/sub. */ -static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) -{ - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irm; - if (lref != rref && - ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && - ra_noreg(irm->r)) || - (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && - (rref = lref, ai = air, ra_noreg(irm->r))))) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); - Reg left = ra_alloc2(as, irm, - rset_exclude(rset_exclude(RSET_FPR, dest), add)); - Reg right = (left >> 8); left &= 255; - emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31)); - return 1; - } - return 0; -} - -/* Fuse BAND + BSHL/BSHR into UBFM. */ -static int asm_fuseandshift(ASMState *as, IRIns *ir) -{ - IRIns *irl = IR(ir->op1); - lua_assert(ir->o == IR_BAND); - if (canfuse(as, irl) && irref_isk(ir->op2)) { - uint64_t mask = get_k64val(IR(ir->op2)); - if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) { - int32_t shmask = irt_is64(irl->t) ? 63 : 31; - int32_t shift = (IR(irl->op2)->i & shmask); - int32_t imms = shift; - if (irl->o == IR_BSHL) { - mask >>= shift; - shift = (shmask-shift+1) & shmask; - imms = 0; - } - if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, irl->op1, RSET_GPR); - A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw; - imms += 63 - emit_clz64(mask); - if (imms > shmask) imms = shmask; - emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left); - return 1; - } - } - } - return 0; -} - -/* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */ -static int asm_fuseorshift(ASMState *as, IRIns *ir) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - lua_assert(ir->o == IR_BOR); - if (canfuse(as, irl) && canfuse(as, irr) && - ((irl->o == IR_BSHR && irr->o == IR_BSHL) || - (irl->o == IR_BSHL && irr->o == IR_BSHR))) { - if (irref_isk(irl->op2) && irref_isk(irr->op2)) { - IRRef lref = irl->op1, rref = irr->op1; - uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i; - if (irl->o == IR_BSHR) { /* BSHR needs to be the right operand. */ - uint32_t tmp2; - IRRef tmp1 = lref; lref = rref; rref = tmp1; - tmp2 = lshift; lshift = rshift; rshift = tmp2; - } - if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) { - A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, lref, RSET_GPR); - Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left)); - emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right); - return 1; - } - } - } - return 0; -} - -/* -- Calls --------------------------------------------------------------- */ - -/* Generate a call to a C function. */ -static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) -{ - uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 0; - Reg gpr, fpr = REGARG_FIRSTFPR; - if ((void *)ci->func) - emit_call(as, (void *)ci->func); - for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) - as->cost[gpr] = REGCOST(~0u, ASMREF_L); - gpr = REGARG_FIRSTGPR; - for (n = 0; n < nargs; n++) { /* Setup args. */ - IRRef ref = args[n]; - IRIns *ir = IR(ref); - if (ref) { - if (irt_isfp(ir->t)) { - if (fpr <= REGARG_LASTFPR) { - lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */ - ra_leftov(as, fpr, ref); - fpr++; - } else { - Reg r = ra_alloc1(as, ref, RSET_FPR); - emit_spstore(as, ir, r, ofs); - ofs += 8; - } - } else { - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ - ra_leftov(as, gpr, ref); - gpr++; - } else { - Reg r = ra_alloc1(as, ref, RSET_GPR); - emit_spstore(as, ir, r, ofs); - ofs += 8; - } - } - } - } -} - -/* Setup result reg/sp for call. Evict scratch regs. */ -static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - RegSet drop = RSET_SCRATCH; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); /* Evictions must be performed first. */ - if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (irt_isfp(ir->t)) { - if (ci->flags & CCI_CASTU64) { - Reg dest = ra_dest(as, ir, RSET_FPR) & 31; - emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R, - dest, RID_RET); - } else { - ra_destreg(as, ir, RID_FPRET); - } - } else { - ra_destreg(as, ir, RID_RET); - } - } - UNUSED(ci); -} - -static void asm_callx(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX*2]; - CCallInfo ci; - IRRef func; - IRIns *irf; - ci.flags = asm_callx_flags(as, ir); - asm_collectargs(as, ir, &ci, args); - asm_setupresult(as, ir, &ci); - func = ir->op2; irf = IR(func); - if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } - if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(ir_k64(irf)->u64); - } else { /* Need a non-argument register for indirect calls. */ - Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); - emit_n(as, A64I_BLR, freg); - ci.func = (ASMFunction)(void *)0; - } - asm_gencall(as, &ci, args); -} - -/* -- Returns ------------------------------------------------------------- */ - -/* Return to lower frame. Guard that it goes to the right spot. */ -static void asm_retf(ASMState *as, IRIns *ir) -{ - Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); - void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); - as->topslot -= (BCReg)delta; - if ((int32_t)as->topslot < 0) as->topslot = 0; - irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - /* Need to force a spill on REF_BASE now to update the stack slot. */ - emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE))); - emit_setgl(as, base, jit_base); - emit_addptr(as, base, -8*delta); - asm_guardcc(as, CC_NE); - emit_nm(as, A64I_CMPx, RID_TMP, - ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base))); - emit_lso(as, A64I_LDRx, RID_TMP, base, -8); -} - -/* -- Type conversions ---------------------------------------------------- */ - -static void asm_tointg(ASMState *as, IRIns *ir, Reg left) -{ - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - Reg dest = ra_dest(as, ir, RSET_GPR); - asm_guardcc(as, CC_NE); - emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31)); - emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest); - emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31)); -} - -static void asm_tobit(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_FPR; - Reg left = ra_alloc1(as, ir->op1, allow); - Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); - Reg tmp = ra_scratch(as, rset_clear(allow, right)); - Reg dest = ra_dest(as, ir, RSET_GPR); - emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31)); - emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31)); -} - -static void asm_conv(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); - int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); - int stfp = (st == IRT_NUM || st == IRT_FLOAT); - IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ - emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32, - (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31)); - } else { /* Integer to FP conversion. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - A64Ins ai = irt_isfloat(ir->t) ? - (((IRT_IS64 >> st) & 1) ? - (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) : - (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) : - (((IRT_IS64 >> st) & 1) ? - (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) : - (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32)); - emit_dn(as, ai, (dest & 31), left); - } - } else if (stfp) { /* FP to integer conversion. */ - if (irt_isguard(ir->t)) { - /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); - asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); - } else { - Reg left = ra_alloc1(as, lref, RSET_FPR); - Reg dest = ra_dest(as, ir, RSET_GPR); - A64Ins ai = irt_is64(ir->t) ? - (st == IRT_NUM ? - (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : - (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : - (st == IRT_NUM ? - (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : - (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); - emit_dn(as, ai, dest, (left & 31)); - } - } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, lref, RSET_GPR); - A64Ins ai = st == IRT_I8 ? A64I_SXTBw : - st == IRT_U8 ? A64I_UXTBw : - st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw; - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); - emit_dn(as, ai, dest, left); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irt_is64(ir->t)) { - if (st64 || !(ir->op2 & IRCONV_SEXT)) { - /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } else { /* 32 to 64 bit sign extension. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - emit_dn(as, A64I_SXTW, dest, left); - } - } else { - if (st64) { - /* This is either a 32 bit reg/reg mov which zeroes the hiword - ** or a load of the loword from a 64 bit address. - */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - emit_dm(as, A64I_MOVw, dest, left); - } else { /* 32/32 bit no-op (cast). */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } - } - } -} - -static void asm_strto(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; - Reg dest = 0, tmp; - int destused = ra_used(ir); - int32_t ofs = 0; - ra_evictset(as, RSET_SCRATCH); - if (destused) { - if (ra_hasspill(ir->s)) { - ofs = sps_scale(ir->s); - destused = 0; - if (ra_hasreg(ir->r)) { - ra_free(as, ir->r); - ra_modified(as, ir->r); - emit_spload(as, ir, ir->r, ofs); - } - } else { - dest = ra_dest(as, ir, RSET_FPR); - } - } - if (destused) - emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); - asm_guardcnb(as, A64I_CBZ, RID_RET); - args[0] = ir->op1; /* GCstr *str */ - args[1] = ASMREF_TMP1; /* TValue *n */ - asm_gencall(as, ci, args); - tmp = ra_releasetmp(as, ASMREF_TMP1); - emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR); -} - -/* -- Memory references --------------------------------------------------- */ - -/* Store tagged value for ref at base+ofs. */ -static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) -{ - RegSet allow = rset_exclude(RSET_GPR, base); - IRIns *ir = IR(ref); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (irref_isk(ref)) { - TValue k; - lj_ir_kvalue(as->J->L, &k, ir); - emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs); - } else { - Reg src = ra_alloc1(as, ref, allow); - rset_clear(allow, src); - if (irt_isinteger(ir->t)) { - Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); - emit_lso(as, A64I_STRx, RID_TMP, base, ofs); - emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src); - } else { - Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - emit_lso(as, A64I_STRx, RID_TMP, base, ofs); - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type); - } - } -} - -/* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) -{ - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) { - /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i64ptr(ir_knum(ir)), dest); - } else { - /* Otherwise force a spill and use the spill slot. */ - emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR); - } - } else { - /* Otherwise use g->tmptv to hold the TValue. */ - asm_tvstore64(as, dest, 0, ref); - ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest); - } -} - -static void asm_aref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx, base; - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i); - if (k) { - base = ra_alloc1(as, refa, RSET_GPR); - emit_dn(as, A64I_ADDx^k, dest, base); - return; - } - } - base = ra_alloc1(as, ir->op1, RSET_GPR); - idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx); -} - -/* Inlined hash lookup. Specialized for key type and for const keys. -** The equivalent C code is: -** Node *n = hashkey(t, key); -** do { -** if (lj_obj_equal(&n->key, key)) return &n->val; -** } while ((n = nextnode(n))); -** return niltv(L); -*/ -static void asm_href(ASMState *as, IRIns *ir, IROp merge) -{ - RegSet allow = RSET_GPR; - int destused = ra_used(ir); - Reg dest = ra_dest(as, ir, allow); - Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = 0, tmp = RID_TMP; - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); - int isk = irref_isk(ir->op2); - IRType1 kt = irkey->t; - uint32_t k = 0; - uint32_t khash; - MCLabel l_end, l_loop, l_next; - rset_clear(allow, tab); - - if (!isk) { - key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); - rset_clear(allow, key); - if (!irt_isstr(kt)) { - tmp = ra_scratch(as, allow); - rset_clear(allow, tmp); - } - } else if (irt_isnum(kt)) { - int64_t val = (int64_t)ir_knum(irkey)->u64; - if (!(k = emit_isk12(val))) { - key = ra_allock(as, val, allow); - rset_clear(allow, key); - } - } else if (!irt_ispri(kt)) { - if (!(k = emit_isk12(irkey->i))) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } - } - - /* Key not found in chain: jump to exit (if merged) or load niltv. */ - l_end = emit_label(as); - as->invmcp = NULL; - if (merge == IR_NE) - asm_guardcc(as, CC_AL); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); - - /* Follow hash chain until the end. */ - l_loop = --as->mcp; - emit_n(as, A64I_CMPx^A64I_K12^0, dest); - emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); - l_next = emit_label(as); - - /* Type and value comparison. */ - if (merge == IR_EQ) - asm_guardcc(as, CC_EQ); - else - emit_cond_branch(as, CC_EQ, l_end); - - if (irt_isnum(kt)) { - if (isk) { - /* Assumes -0.0 is already canonicalized to +0.0. */ - if (k) - emit_n(as, A64I_CMPx^k, tmp); - else - emit_nm(as, A64I_CMPx, key, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); - } else { - Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow); - Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); - rset_clear(allow, tisnum); - emit_nm(as, A64I_FCMPd, key, ftmp); - emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); - emit_cond_branch(as, CC_LO, l_next); - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); - } - } else if (irt_isaddr(kt)) { - Reg scr; - if (isk) { - int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; - scr = ra_allock(as, kk, allow); - emit_nm(as, A64I_CMPx, scr, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); - } else { - scr = ra_scratch(as, allow); - emit_nm(as, A64I_CMPx, tmp, scr); - emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); - } - rset_clear(allow, scr); - } else { - Reg type, scr; - lua_assert(irt_ispri(kt) && !irt_isnil(kt)); - type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); - scr = ra_scratch(as, rset_clear(allow, type)); - rset_clear(allow, scr); - emit_nm(as, A64I_CMPw, scr, type); - emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); - } - - *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE; - if (!isk && irt_isaddr(kt)) { - Reg type = ra_allock(as, (int32_t)irt_toitype(kt), allow); - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); - rset_clear(allow, type); - } - /* Load main position relative to tab->node into dest. */ - khash = isk ? ir_khash(irkey) : 1; - if (khash == 0) { - emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node)); - } else { - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest); - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest); - emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node)); - if (isk) { - Reg tmphash = ra_allock(as, khash, allow); - emit_dnm(as, A64I_ANDw, dest, dest, tmphash); - emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); - } else if (irt_isstr(kt)) { - /* Fetch of str->hash is cheaper than ra_allock. */ - emit_dnm(as, A64I_ANDw, dest, dest, tmp); - emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash)); - emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); - } else { /* Must match with hash*() in lj_tab.c. */ - emit_dnm(as, A64I_ANDw, dest, dest, tmp); - emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); - emit_dnm(as, A64I_SUBw, dest, dest, tmp); - emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); - emit_dnm(as, A64I_EORw, dest, dest, tmp); - emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest); - emit_dnm(as, A64I_SUBw, tmp, tmp, dest); - emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); - emit_dnm(as, A64I_EORw, tmp, tmp, dest); - if (irt_isnum(kt)) { - emit_dnm(as, A64I_ADDw, dest, dest, dest); - emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); - emit_dm(as, A64I_MOVw, tmp, dest); - emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); - } else { - checkmclim(as); - emit_dm(as, A64I_MOVw, tmp, key); - emit_dnm(as, A64I_EORw, dest, dest, - ra_allock(as, irt_toitype(kt) << 15, allow)); - emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); - emit_dm(as, A64I_MOVx, dest, key); - } - } - } -} - -static void asm_hrefk(ASMState *as, IRIns *ir) -{ - IRIns *kslot = IR(ir->op2); - IRIns *irkey = IR(kslot->op1); - int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); - int32_t kofs = ofs + (int32_t)offsetof(Node, key); - int bigofs = !emit_checkofs(A64I_LDRx, ofs); - RegSet allow = RSET_GPR; - Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; - Reg node = ra_alloc1(as, ir->op1, allow); - Reg key = ra_scratch(as, rset_clear(allow, node)); - Reg idx = node; - uint64_t k; - lua_assert(ofs % sizeof(Node) == 0); - rset_clear(allow, key); - if (bigofs) { - idx = dest; - rset_clear(allow, dest); - kofs = (int32_t)offsetof(Node, key); - } else if (ra_hasreg(dest)) { - emit_opk(as, A64I_ADDx, dest, node, ofs, allow); - } - asm_guardcc(as, CC_NE); - if (irt_ispri(irkey->t)) { - k = ~((int64_t)~irt_toitype(irkey->t) << 47); - } else if (irt_isnum(irkey->t)) { - k = ir_knum(irkey)->u64; - } else { - k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); - } - emit_nm(as, A64I_CMPx, key, ra_allock(as, k, allow)); - emit_lso(as, A64I_LDRx, key, idx, kofs); - if (bigofs) - emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); -} - -static void asm_uref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, A64I_LDRx, dest, v); - } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); - emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP); - emit_opk(as, A64I_ADDx, dest, uv, - (int32_t)offsetof(GCupval, tv), RSET_GPR); - emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); - } else { - emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v)); - } - emit_lso(as, A64I_LDRx, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); - } -} - -static void asm_fref(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); -} - -static void asm_strref(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_GPR; - Reg dest = ra_dest(as, ir, allow); - Reg base = ra_alloc1(as, ir->op1, allow); - IRIns *irr = IR(ir->op2); - int32_t ofs = sizeof(GCstr); - uint32_t m; - rset_clear(allow, base); - if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) { - emit_dn(as, A64I_ADDx^m, dest, base); - } else { - emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest); - emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow)); - } -} - -/* -- Loads and stores ---------------------------------------------------- */ - -static A64Ins asm_fxloadins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: return A64I_LDRB ^ A64I_LS_S; - case IRT_U8: return A64I_LDRB; - case IRT_I16: return A64I_LDRH ^ A64I_LS_S; - case IRT_U16: return A64I_LDRH; - case IRT_NUM: return A64I_LDRd; - case IRT_FLOAT: return A64I_LDRs; - default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw; - } -} - -static A64Ins asm_fxstoreins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return A64I_STRB; - case IRT_I16: case IRT_U16: return A64I_STRH; - case IRT_NUM: return A64I_STRd; - case IRT_FLOAT: return A64I_STRs; - default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw; - } -} - -static void asm_fload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx; - A64Ins ai = asm_fxloadins(ir); - int32_t ofs; - if (ir->op1 == REF_NIL) { - idx = RID_GL; - ofs = (ir->op2 << 2) - GG_OFS(g); - } else { - idx = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx); - return; - } - } - ofs = field_ofs[ir->op2]; - } - emit_lso(as, ai, (dest & 31), idx, ofs); -} - -static void asm_fstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, RSET_GPR); - IRIns *irf = IR(ir->op1); - Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); - int32_t ofs = field_ofs[irf->op2]; - emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs); - } -} - -static void asm_xload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); -} - -static void asm_xstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src)); - } -} - -static void asm_ahuvload(ASMState *as, IRIns *ir) -{ - Reg idx, tmp, type; - int32_t ofs = 0; - RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; - lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || - irt_isint(ir->t)); - if (ra_used(ir)) { - Reg dest = ra_dest(as, ir, allow); - tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest; - if (irt_isaddr(ir->t)) { - emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); - } else if (irt_isnum(ir->t)) { - emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); - } else if (irt_isint(ir->t)) { - emit_dm(as, A64I_MOVw, dest, dest); - } - } else { - tmp = ra_scratch(as, gpr); - } - type = ra_scratch(as, rset_clear(gpr, tmp)); - idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); - /* Always do the type check, even if the load result is unused. */ - asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); - if (irt_type(ir->t) >= IRT_NUM) { - lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), - ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); - } else if (irt_isaddr(ir->t)) { - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); - emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); - } else if (irt_isnil(ir->t)) { - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); - } else { - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), - ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp); - } - if (ofs & FUSE_REG) - emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); - else - emit_lso(as, A64I_LDRx, tmp, idx, ofs); -} - -static void asm_ahustore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - RegSet allow = RSET_GPR; - Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE; - int32_t ofs = 0; - if (irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); - idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd); - if (ofs & FUSE_REG) - emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31)); - else - emit_lso(as, A64I_STRd, (src & 31), idx, ofs); - } else { - if (!irt_ispri(ir->t)) { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - if (irt_isinteger(ir->t)) - type = ra_allock(as, (int64_t)LJ_TISNUM << 47, allow); - else - type = ra_allock(as, irt_toitype(ir->t), allow); - } else { - tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), - A64I_STRx); - if (ofs & FUSE_REG) - emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); - else - emit_lso(as, A64I_STRx, tmp, idx, ofs); - if (ra_hasreg(src)) { - if (irt_isinteger(ir->t)) { - emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src); - } else { - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type); - } - } - } - } -} - -static void asm_sload(ASMState *as, IRIns *ir) -{ - int32_t ofs = 8*((int32_t)ir->op1-2); - IRType1 t = ir->t; - Reg dest = RID_NONE, base; - RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { - dest = ra_scratch(as, RSET_FPR); - asm_tointg(as, ir, dest); - t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else if (ra_used(ir)) { - Reg tmp = RID_NONE; - if ((ir->op2 & IRSLOAD_CONVERT)) - tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); - lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t)); - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); - base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest)); - if (irt_isaddr(t)) { - emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); - } else if ((ir->op2 & IRSLOAD_CONVERT)) { - if (irt_isint(t)) { - emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31)); - /* If value is already loaded for type check, move it to FPR. */ - if ((ir->op2 & IRSLOAD_TYPECHECK)) - emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest); - else - dest = tmp; - t.irt = IRT_NUM; /* Check for original type. */ - } else { - emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp); - dest = tmp; - t.irt = IRT_INT; /* Check for original type. */ - } - } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { - emit_dm(as, A64I_MOVw, dest, dest); - } - goto dotypecheck; - } - base = ra_alloc1(as, REF_BASE, allow); -dotypecheck: - rset_clear(allow, base); - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - Reg tmp; - if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) { - tmp = dest; - } else { - tmp = ra_scratch(as, allow); - rset_clear(allow, tmp); - } - if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT)) - emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); - /* Need type check, even if the load result is unused. */ - asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE); - if (irt_type(t) >= IRT_NUM) { - lua_assert(irt_isinteger(t) || irt_isnum(t)); - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), - ra_allock(as, LJ_TISNUM << 15, allow), tmp); - } else if (irt_isnil(t)) { - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); - } else if (irt_ispri(t)) { - emit_nm(as, A64I_CMPx, - ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); - } else { - Reg type = ra_scratch(as, allow); - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); - emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); - } - emit_lso(as, A64I_LDRx, tmp, base, ofs); - return; - } - if (ra_hasreg(dest)) { - emit_lso(as, irt_isnum(t) ? A64I_LDRd : - (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, ofs); - } -} - -/* -- Allocations --------------------------------------------------------- */ - -#if LJ_HASFFI -static void asm_cnew(ASMState *as, IRIns *ir) -{ - CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID id = (CTypeID)IR(ir->op1)->i; - CTSize sz; - CTInfo info = lj_ctype_info(cts, id, &sz); - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[4]; - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - - as->gcsteps++; - asm_setupresult(as, ir, ci); /* GCcdata * */ - /* Initialize immutable cdata object. */ - if (ir->o == IR_CNEWI) { - int32_t ofs = sizeof(GCcdata); - Reg r = ra_alloc1(as, ir->op2, allow); - lua_assert(sz == 4 || sz == 8); - emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); - } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ - ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* CTypeID id */ - args[2] = ir->op2; /* CTSize sz */ - args[3] = ASMREF_TMP1; /* CTSize align */ - asm_gencall(as, ci, args); - emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); - return; - } - - /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ - { - Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow); - emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); - emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); - emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP); - if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1); - } - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ - asm_gencall(as, ci, args); - ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), - ra_releasetmp(as, ASMREF_TMP1)); -} -#else -#define asm_cnew(as, ir) ((void)0) -#endif - -/* -- Write barriers ------------------------------------------------------ */ - -static void asm_tbar(ASMState *as, IRIns *ir) -{ - Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); - Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); - Reg gr = ra_allock(as, i64ptr(J2G(as->J)), - rset_exclude(rset_exclude(RSET_GPR, tab), link)); - Reg mark = RID_TMP; - MCLabel l_end = emit_label(as); - emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); - emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); - emit_lso(as, A64I_STRx, tab, gr, - (int32_t)offsetof(global_State, gc.grayagain)); - emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); - emit_lso(as, A64I_LDRx, link, gr, - (int32_t)offsetof(global_State, gc.grayagain)); - emit_cond_branch(as, CC_EQ, l_end); - emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark); - emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); -} - -static void asm_obar(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; - IRRef args[2]; - MCLabel l_end; - RegSet allow = RSET_GPR; - Reg obj, val, tmp; - /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ir->op1; /* TValue *tv */ - asm_gencall(as, ci, args); - ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) ); - obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(allow, obj)); - emit_cond_branch(as, CC_EQ, l_end); - emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp); - emit_cond_branch(as, CC_EQ, l_end); - emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); - val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); - emit_lso(as, A64I_LDRB, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); - emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); -} - -/* -- Arithmetic and logic operations ------------------------------------- */ - -static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31)); -} - -static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); - emit_dn(as, ai, (dest & 31), (left & 31)); -} - -static void asm_fpmath(ASMState *as, IRIns *ir) -{ - IRFPMathOp fpm = (IRFPMathOp)ir->op2; - if (fpm == IRFPM_SQRT) { - asm_fpunary(as, ir, A64I_FSQRTd); - } else if (fpm <= IRFPM_TRUNC) { - asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd : - fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd); - } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { - return; - } else { - asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); - } -} - -static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) -{ - IRIns *ir; - if (irref_isk(rref)) - return 0; /* Don't swap constants to the left. */ - if (irref_isk(lref)) - return 1; /* But swap constants to the right. */ - ir = IR(rref); - if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || - (ir->o == IR_ADD && ir->op1 == ir->op2) || - (ir->o == IR_CONV && ir->op2 == ((IRT_I64<o >= IR_BSHL && ir->o <= IR_BSAR) || - (ir->o == IR_ADD && ir->op1 == ir->op2) || - (ir->o == IR_CONV && ir->op2 == ((IRT_I64<op1, rref = ir->op2; - Reg left, dest = ra_dest(as, ir, RSET_GPR); - uint32_t m; - if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) { - IRRef tmp = lref; lref = rref; rref = tmp; - } - left = ra_hintalloc(as, lref, dest, RSET_GPR); - if (irt_is64(ir->t)) ai |= A64I_X; - m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); - if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ - asm_guardcc(as, CC_VS); - ai |= A64I_S; - } - emit_dn(as, ai^m, dest, left); -} - -static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai) -{ - if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ - as->flagmcp = NULL; - as->mcp++; - ai |= A64I_S; - } - asm_intop(as, ir, ai); -} - -static void asm_intneg(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left); -} - -/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ -static void asm_intmul(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - if (irt_isguard(ir->t)) { /* IR_MULOV */ - asm_guardcc(as, CC_NE); - emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ - emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest); - emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest); - emit_dnm(as, A64I_SMULL, dest, right, left); - } else { - emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); - } -} - -static void asm_add(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd)) - asm_fparith(as, ir, A64I_FADDd); - return; - } - asm_intop_s(as, ir, A64I_ADDw); -} - -static void asm_sub(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd)) - asm_fparith(as, ir, A64I_FSUBd); - return; - } - asm_intop_s(as, ir, A64I_SUBw); -} - -static void asm_mul(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, A64I_FMULd); - return; - } - asm_intmul(as, ir); -} - -static void asm_div(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - else -#endif - asm_fparith(as, ir, A64I_FDIVd); -} - -static void asm_pow(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_powi); -} - -#define asm_addov(as, ir) asm_add(as, ir) -#define asm_subov(as, ir) asm_sub(as, ir) -#define asm_mulov(as, ir) asm_mul(as, ir) - -#define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) - -static void asm_mod(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isint(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_modi); -} - -static void asm_neg(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, A64I_FNEGd); - return; - } - asm_intneg(as, ir); -} - -static void asm_band(ASMState *as, IRIns *ir) -{ - A64Ins ai = A64I_ANDw; - if (asm_fuseandshift(as, ir)) - return; - if (as->flagmcp == as->mcp) { - /* Try to drop cmp r, #0. */ - as->flagmcp = NULL; - as->mcp++; - ai = A64I_ANDSw; - } - asm_intop(as, ir, ai); -} - -static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai) -{ - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irl = IR(lref), *irr = IR(rref); - if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) || - (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) { - Reg left, dest = ra_dest(as, ir, RSET_GPR); - uint32_t m; - if (irl->o == IR_BNOT) { - IRRef tmp = lref; lref = rref; rref = tmp; - } - left = ra_alloc1(as, lref, RSET_GPR); - ai |= A64I_ON; - if (irt_is64(ir->t)) ai |= A64I_X; - m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left)); - emit_dn(as, ai^m, dest, left); - } else { - asm_intop(as, ir, ai); - } -} - -static void asm_bor(ASMState *as, IRIns *ir) -{ - if (asm_fuseorshift(as, ir)) - return; - asm_borbxor(as, ir, A64I_ORRw); -} - -#define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw) - -static void asm_bnot(ASMState *as, IRIns *ir) -{ - A64Ins ai = A64I_MVNw; - Reg dest = ra_dest(as, ir, RSET_GPR); - uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); - if (irt_is64(ir->t)) ai |= A64I_X; - emit_d(as, ai^m, dest); -} - -static void asm_bswap(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left); -} - -static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) -{ - int32_t shmask = irt_is64(ir->t) ? 63 : 31; - if (irref_isk(ir->op2)) { /* Constant shifts. */ - Reg left, dest = ra_dest(as, ir, RSET_GPR); - int32_t shift = (IR(ir->op2)->i & shmask); - IRIns *irl = IR(ir->op1); - if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; - - /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */ - if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) { - if (irl->o == IR_BSHL && irref_isk(irl->op2)) { - int32_t shift2 = (IR(irl->op2)->i & shmask); - shift = ((shift - shift2) & shmask); - shmask -= shift2; - ir = irl; - } - } - - left = ra_alloc1(as, ir->op1, RSET_GPR); - switch (sh) { - case A64SH_LSL: - emit_dn(as, ai | A64F_IMMS(shmask-shift) | - A64F_IMMR((shmask-shift+1)&shmask), dest, left); - break; - case A64SH_LSR: case A64SH_ASR: - emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); - break; - case A64SH_ROR: - emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left); - break; - } - } else { /* Variable-length shifts. */ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right); - } -} - -#define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL) -#define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR) -#define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR) -#define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR) -#define asm_brol(as, ir) lua_assert(0) - -static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right); - emit_nm(as, A64I_CMPw, left, right); -} - -static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc) -{ - Reg dest = (ra_dest(as, ir, RSET_FPR) & 31); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = ((left >> 8) & 31); left &= 31; - emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right); - emit_nm(as, A64I_FCMPd, left, right); -} - -static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc) -{ - if (irt_isnum(ir->t)) - asm_fpmin_max(as, ir, fcc); - else - asm_intmin_max(as, ir, cc); -} - -#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) -#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) - -/* -- Comparisons --------------------------------------------------------- */ - -/* Map of comparisons to flags. ORDER IR. */ -static const uint8_t asm_compmap[IR_ABC+1] = { - /* op FP swp int cc FP cc */ - /* LT */ CC_GE + (CC_HS << 4), - /* GE x */ CC_LT + (CC_HI << 4), - /* LE */ CC_GT + (CC_HI << 4), - /* GT x */ CC_LE + (CC_HS << 4), - /* ULT x */ CC_HS + (CC_LS << 4), - /* UGE */ CC_LO + (CC_LO << 4), - /* ULE x */ CC_HI + (CC_LO << 4), - /* UGT */ CC_LS + (CC_LS << 4), - /* EQ */ CC_NE + (CC_NE << 4), - /* NE */ CC_EQ + (CC_EQ << 4), - /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ -}; - -/* FP comparisons. */ -static void asm_fpcomp(ASMState *as, IRIns *ir) -{ - Reg left, right; - A64Ins ai; - int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); - if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { - left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31); - right = 0; - ai = A64I_FCMPZd; - } else { - left = ra_alloc2(as, ir, RSET_FPR); - if (swp) { - right = (left & 31); left = ((left >> 8) & 31); - } else { - right = ((left >> 8) & 31); left &= 31; - } - ai = A64I_FCMPd; - } - asm_guardcc(as, (asm_compmap[ir->o] >> 4)); - emit_nm(as, ai, left, right); -} - -/* Integer comparisons. */ -static void asm_intcomp(ASMState *as, IRIns *ir) -{ - A64CC oldcc, cc = (asm_compmap[ir->o] & 15); - A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw; - IRRef lref = ir->op1, rref = ir->op2; - Reg left; - uint32_t m; - int cmpprev0 = 0; - lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || - irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); - if (asm_swapops(as, lref, rref)) { - IRRef tmp = lref; lref = rref; rref = tmp; - if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ - else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ - } - oldcc = cc; - if (irref_isk(rref) && get_k64val(IR(rref)) == 0) { - IRIns *irl = IR(lref); - if (cc == CC_GE) cc = CC_PL; - else if (cc == CC_LT) cc = CC_MI; - else if (cc > CC_NE) goto nocombine; /* Other conds don't work with tst. */ - cmpprev0 = (irl+1 == ir); - /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */ - if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { - IRRef blref = irl->op1, brref = irl->op2; - uint32_t m2 = 0; - Reg bleft; - if (asm_swapops(as, blref, brref)) { - Reg tmp = blref; blref = brref; brref = tmp; - } - if (irref_isk(brref)) { - uint64_t k = get_k64val(IR(brref)); - if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) { - asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, - ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k)); - return; - } - m2 = emit_isk13(k, irt_is64(irl->t)); - } - bleft = ra_alloc1(as, blref, RSET_GPR); - ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); - if (!m2) - m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); - asm_guardcc(as, cc); - emit_n(as, ai^m2, bleft); - return; - } - if (cc == CC_EQ || cc == CC_NE) { - /* Combine cmp-bcc into cbz/cbnz. */ - ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ; - if (irt_is64(ir->t)) ai |= A64I_X; - asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR)); - return; - } - } -nocombine: - left = ra_alloc1(as, lref, RSET_GPR); - m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); - asm_guardcc(as, cc); - emit_n(as, ai^m, left); - /* Signed comparison with zero and referencing previous ins? */ - if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE)) - as->flagmcp = as->mcp; /* Allow elimination of the compare. */ -} - -static void asm_comp(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) - asm_fpcomp(as, ir); - else - asm_intcomp(as, ir); -} - -#define asm_equal(as, ir) asm_comp(as, ir) - -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ - -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ -static void asm_hiop(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on 64 bit. */ -} - -/* -- Profiling ----------------------------------------------------------- */ - -static void asm_prof(ASMState *as, IRIns *ir) -{ - uint32_t k = emit_isk13(HOOK_PROFILE, 0); - lua_assert(k != 0); - UNUSED(ir); - asm_guardcc(as, CC_NE); - emit_n(as, A64I_TSTw^k, RID_TMP); - emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); -} - -/* -- Stack handling ------------------------------------------------------ */ - -/* Check Lua stack size for overflow. Use exit handler as fallback. */ -static void asm_stack_check(ASMState *as, BCReg topslot, - IRIns *irp, RegSet allow, ExitNo exitno) -{ - Reg pbase; - uint32_t k; - if (irp) { - if (!ra_hasspill(irp->s)) { - pbase = irp->r; - lua_assert(ra_hasreg(pbase)); - } else if (allow) { - pbase = rset_pickbot(allow); - } else { - pbase = RID_RET; - emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */ - } - } else { - pbase = RID_BASE; - } - emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); - k = emit_isk12((8*topslot)); - lua_assert(k); - emit_n(as, A64I_CMPx^k, RID_TMP); - emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); - emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, - (int32_t)offsetof(lua_State, maxstack)); - if (irp) { /* Must not spill arbitrary registers in head of side trace. */ - if (ra_hasspill(irp->s)) - emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); - emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); - if (ra_hasspill(irp->s) && !allow) - emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */ - } else { - emit_getgl(as, RID_TMP, cur_L); - } -} - -/* Restore Lua stack from on-trace state. */ -static void asm_stack_restore(ASMState *as, SnapShot *snap) -{ - SnapEntry *map = &as->T->snapmap[snap->mapofs]; -#ifdef LUA_USE_ASSERT - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; -#endif - MSize n, nent = snap->nent; - /* Store the value of all modified slots to the Lua stack. */ - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1-LJ_FR2); - IRRef ref = snap_ref(sn); - IRIns *ir = IR(ref); - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); - } else { - asm_tvstore64(as, RID_BASE, ofs, ref); - } - checkmclim(as); - } - lua_assert(map + nent == flinks); -} - -/* -- GC handling --------------------------------------------------------- */ - -/* Check GC threshold and do one or more GC steps. */ -static void asm_gc_check(ASMState *as) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; - IRRef args[2]; - MCLabel l_end; - Reg tmp1, tmp2; - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ - asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */ - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ASMREF_TMP2; /* MSize steps */ - asm_gencall(as, ci, args); - tmp1 = ra_releasetmp(as, ASMREF_TMP1); - tmp2 = ra_releasetmp(as, ASMREF_TMP2); - emit_loadi(as, tmp2, as->gcsteps); - /* Jump around GC step if GC total < GC threshold. */ - emit_cond_branch(as, CC_LS, l_end); - emit_nm(as, A64I_CMPx, RID_TMP, tmp2); - emit_lso(as, A64I_LDRx, tmp2, tmp1, - (int32_t)offsetof(global_State, gc.threshold)); - emit_lso(as, A64I_LDRx, RID_TMP, tmp1, - (int32_t)offsetof(global_State, gc.total)); - ra_allockreg(as, i64ptr(J2G(as->J)), tmp1); - as->gcsteps = 0; - checkmclim(as); -} - -/* -- Loop handling ------------------------------------------------------- */ - -/* Fixup the loop branch. */ -static void asm_loop_fixup(ASMState *as) -{ - MCode *p = as->mctop; - MCode *target = as->mcp; - if (as->loopinv) { /* Inverted loop branch? */ - uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu; - ptrdiff_t delta = target - (p - 2); - /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */ - p[-2] |= ((uint32_t)delta & mask) << 5; - } else { - ptrdiff_t delta = target - (p - 1); - p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu); - } -} - -/* -- Head of trace ------------------------------------------------------- */ - -/* Reload L register from g->cur_L. */ -static void asm_head_lreg(ASMState *as) -{ - IRIns *ir = IR(ASMREF_L); - if (ra_used(ir)) { - Reg r = ra_dest(as, ir, RSET_GPR); - emit_getgl(as, r, cur_L); - ra_evictk(as); - } -} - -/* Coalesce BASE register for a root trace. */ -static void asm_head_root_base(ASMState *as) -{ - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - ra_destreg(as, ir, RID_BASE); -} - -/* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) -{ - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - if (ra_hasspill(irp->s)) { - rset_clear(allow, ra_dest(as, ir, allow)); - } else { - Reg r = irp->r; - lua_assert(ra_hasreg(r)); - rset_clear(allow, r); - if (r != ir->r && !rset_test(as->freeset, r)) - ra_restore(as, regcost_ref(as->cost[r])); - ra_destreg(as, ir, r); - } - return allow; -} - -/* -- Tail of trace ------------------------------------------------------- */ - -/* Fixup the tail code. */ -static void asm_tail_fixup(ASMState *as, TraceNo lnk) -{ - MCode *p = as->mctop; - MCode *target; - /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ - int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); - if (spadj == 0) { - *--p = A64I_NOP; - as->mctop = p; - } else { - /* Patch stack adjustment. */ - uint32_t k = emit_isk12(spadj); - lua_assert(k); - p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); - } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu); -} - -/* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) -{ - MCode *p = as->mctop - 1; /* Leave room for exit branch. */ - if (as->loopref) { - as->invmcp = as->mcp = p; - } else { - as->mcp = p-1; /* Leave room for stack pointer adjustment. */ - as->invmcp = NULL; - } - *p = 0; /* Prevent load/store merging. */ -} - -/* -- Trace setup --------------------------------------------------------- */ - -/* Ensure there are enough stack slots for call arguments. */ -static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = CCI_XNARGS(ci); - int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) { - if (args[i] && irt_isfp(IR(args[i])->t)) { - if (nfpr > 0) nfpr--; else nslots += 2; - } else { - if (ngpr > 0) ngpr--; else nslots += 2; - } - } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; - return REGSP_HINT(RID_RET); -} - -static void asm_setup_target(ASMState *as) -{ - /* May need extra exit for asm_stack_check on side traces. */ - asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); -} - -/* -- Trace patching ------------------------------------------------------ */ - -/* Patch exit jumps of existing machine code to a new target. */ -void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) -{ - MCode *p = T->mcode; - MCode *pe = (MCode *)((char *)p + T->szmcode); - MCode *cstart = NULL, *cend = p; - MCode *mcarea = lj_mcode_patch(J, p, 0); - MCode *px = exitstub_trace_addr(T, exitno); - for (; p < pe; p++) { - /* Look for exitstub branch, replace with branch to target. */ - uint32_t ins = *p; - if ((ins & 0xff000000u) == 0x54000000u && - ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { - /* Patch bcc exitstub. */ - *p = (ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u); - cend = p+1; - if (!cstart) cstart = p; - } else if ((ins & 0xfc000000u) == 0x14000000u && - ((ins ^ (px-p)) & 0x03ffffffu) == 0) { - /* Patch b exitstub. */ - *p = (ins & 0xfc000000u) | ((target-p) & 0x03ffffffu); - cend = p+1; - if (!cstart) cstart = p; - } else if ((ins & 0x7e000000u) == 0x34000000u && - ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { - /* Patch cbz/cbnz exitstub. */ - *p = (ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u); - cend = p+1; - if (!cstart) cstart = p; - } else if ((ins & 0x7e000000u) == 0x36000000u && - ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { - /* Patch tbz/tbnz exitstub. */ - *p = (ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u); - cend = p+1; - if (!cstart) cstart = p; - } - } - lua_assert(cstart != NULL); - lj_mcode_sync(cstart, cend); - lj_mcode_patch(J, mcarea, 1); -} - diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h deleted file mode 100644 index affe7d8996..0000000000 --- a/src/lj_asm_mips.h +++ /dev/null @@ -1,2505 +0,0 @@ -/* -** MIPS IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Register allocator extensions --------------------------------------- */ - -/* Allocate a register with a hint. */ -static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!ra_hashint(r) && !iscrossref(as, ref)) - ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ - r = ra_allocref(as, ref, allow); - } - ra_noweak(as, r); - return r; -} - -/* Allocate a register or RID_ZERO. */ -static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(IR(ref)) == 0) - return RID_ZERO; - r = ra_allocref(as, ref, allow); - } else { - ra_noweak(as, r); - } - return r; -} - -/* Allocate two source registers for three-operand instructions. */ -static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - Reg left = irl->r, right = irr->r; - if (ra_hasreg(left)) { - ra_noweak(as, left); - if (ra_noreg(right)) - right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); - else - ra_noweak(as, right); - } else if (ra_hasreg(right)) { - ra_noweak(as, right); - left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); - } else if (ra_hashint(right)) { - right = ra_alloc1z(as, ir->op2, allow); - left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); - } else { - left = ra_alloc1z(as, ir->op1, allow); - right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); - } - return left | (right << 8); -} - -/* -- Guard handling ------------------------------------------------------ */ - -/* Need some spare long-range jump slots, for out-of-range branches. */ -#define MIPS_SPAREJUMP 4 - -/* Setup spare long-range jump slots per mcarea. */ -static void asm_sparejump_setup(ASMState *as) -{ - MCode *mxp = as->mcbot; - /* Assumes sizeof(MCLink) == 8. */ - if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) { - lua_assert(MIPSI_NOP == 0); - memset(mxp+2, 0, MIPS_SPAREJUMP*8); - mxp += MIPS_SPAREJUMP*2; - lua_assert(mxp < as->mctop); - lj_mcode_sync(as->mcbot, mxp); - lj_mcode_commitbot(as->J, mxp); - as->mcbot = mxp; - as->mclim = as->mcbot + MCLIM_REDZONE; - } -} - -/* Setup exit stub after the end of each trace. */ -static void asm_exitstub_setup(ASMState *as) -{ - MCode *mxp = as->mctop; - /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ - *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; - *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); - lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0); - *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; - as->mctop = mxp; -} - -/* Keep this in-sync with exitstub_trace_addr(). */ -#define asm_exitstub_addr(as) ((as)->mctop) - -/* Emit conditional branch to exit for guard. */ -static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt) -{ - MCode *target = asm_exitstub_addr(as); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->invmcp = NULL; - as->loopinv = 1; - as->mcp = p+1; - mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ - target = p; /* Patch target later in asm_loop_fixup. */ - } - emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); - emit_branch(as, mi, rs, rt, target); -} - -/* -- Operand fusion ------------------------------------------------------ */ - -/* Limit linear search to this distance. Avoids O(n^2) behavior. */ -#define CONFLICT_SEARCH_LIM 31 - -/* Check if there's no conflicting instruction between curins and ref. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict) -{ - IRIns *ir = as->ir; - IRRef i = as->curins; - if (i > ref + CONFLICT_SEARCH_LIM) - return 0; /* Give up, ref is too far away. */ - while (--i > ref) - if (ir[i].o == conflict) - return 0; /* Conflict found. */ - return 1; /* Ok, no conflict. */ -} - -/* Fuse the array base of colocated arrays. */ -static int32_t asm_fuseabase(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) - return (int32_t)sizeof(GCtab); - return 0; -} - -/* Fuse array/hash/upvalue reference into register+offset operand. */ -static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r)) { - if (ir->o == IR_AREF) { - if (mayfuse(as, ref)) { - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (checki16(ofs)) { - *ofsp = ofs; - return ra_alloc1(as, refa, allow); - } - } - } - } else if (ir->o == IR_HREFK) { - if (mayfuse(as, ref)) { - int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); - if (checki16(ofs)) { - *ofsp = ofs; - return ra_alloc1(as, ir->op1, allow); - } - } - } else if (ir->o == IR_UREFC) { - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv; - intptr_t jgl = (intptr_t)J2G(as->J); - if ((uintptr_t)(ofs-jgl) < 65536) { - *ofsp = ofs-jgl-32768; - return RID_JGL; - } else { - *ofsp = (int16_t)ofs; - return ra_allock(as, ofs-(int16_t)ofs, allow); - } - } - } - } - *ofsp = 0; - return ra_alloc1(as, ref, allow); -} - -/* Fuse XLOAD/XSTORE reference into load/store operand. */ -static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, - RegSet allow, int32_t ofs) -{ - IRIns *ir = IR(ref); - Reg base; - if (ra_noreg(ir->r) && canfuse(as, ir)) { - if (ir->o == IR_ADD) { - intptr_t ofs2; - if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(IR(ir->op2)), - checki16(ofs2))) { - ref = ir->op1; - ofs = (int32_t)ofs2; - } - } else if (ir->o == IR_STRREF) { - intptr_t ofs2 = 65536; - lua_assert(ofs == 0); - ofs = (int32_t)sizeof(GCstr); - if (irref_isk(ir->op2)) { - ofs2 = ofs + get_kval(IR(ir->op2)); - ref = ir->op1; - } else if (irref_isk(ir->op1)) { - ofs2 = ofs + get_kval(IR(ir->op1)); - ref = ir->op2; - } - if (!checki16(ofs2)) { - /* NYI: Fuse ADD with constant. */ - Reg right, left = ra_alloc2(as, ir, allow); - right = (left >> 8); left &= 255; - emit_hsi(as, mi, rt, RID_TMP, ofs); - emit_dst(as, MIPSI_AADDU, RID_TMP, left, right); - return; - } - ofs = ofs2; - } - } - base = ra_alloc1(as, ref, allow); - emit_hsi(as, mi, rt, base, ofs); -} - -/* -- Calls --------------------------------------------------------------- */ - -/* Generate a call to a C function. */ -static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) -{ - uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = LJ_32 ? 16 : 0; -#if LJ_SOFTFP - Reg gpr = REGARG_FIRSTGPR; -#else - Reg gpr, fpr = REGARG_FIRSTFPR; -#endif - if ((void *)ci->func) - emit_call(as, (void *)ci->func, 1); -#if !LJ_SOFTFP - for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) - as->cost[gpr] = REGCOST(~0u, ASMREF_L); - gpr = REGARG_FIRSTGPR; -#endif - for (n = 0; n < nargs; n++) { /* Setup args. */ - IRRef ref = args[n]; - if (ref) { - IRIns *ir = IR(ref); -#if !LJ_SOFTFP - if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && - !(ci->flags & CCI_VARARG)) { - lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ - ra_leftov(as, fpr, ref); - fpr += LJ_32 ? 2 : 1; - gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1; - } else -#endif - { -#if LJ_32 && !LJ_SOFTFP - fpr = REGARG_LASTFPR+1; -#endif - if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1; - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ -#if !LJ_SOFTFP - if (irt_isfp(ir->t)) { - RegSet of = as->freeset; - Reg r; - /* Workaround to protect argument GPRs from being used for remat. */ - as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); - r = ra_alloc1(as, ref, RSET_FPR); - as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); - if (irt_isnum(ir->t)) { -#if LJ_32 - emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); - emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); - lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ - gpr += 2; -#else - emit_tg(as, MIPSI_DMFC1, gpr, r); - gpr++; fpr++; -#endif - } else if (irt_isfloat(ir->t)) { - emit_tg(as, MIPSI_MFC1, gpr, r); - gpr++; -#if LJ_64 - fpr++; -#endif - } - } else -#endif - { - ra_leftov(as, gpr, ref); - gpr++; -#if LJ_64 - fpr++; -#endif - } - } else { - Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); -#if LJ_32 - if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; - emit_spstore(as, ir, r, ofs); - ofs += irt_isnum(ir->t) ? 8 : 4; -#else - emit_spstore(as, ir, r, ofs + ((LJ_BE && (LJ_SOFTFP || r < RID_MAX_GPR) && !irt_is64(ir->t)) ? 4 : 0)); - ofs += 8; -#endif - } - } - } else { -#if !LJ_SOFTFP - fpr = REGARG_LASTFPR+1; -#endif - if (gpr <= REGARG_LASTGPR) { - gpr++; -#if LJ_64 - fpr++; -#endif - } else { - ofs += LJ_32 ? 4 : 8; - } - } - checkmclim(as); - } -} - -/* Setup result reg/sp for call. Evict scratch regs. */ -static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - RegSet drop = RSET_SCRATCH; -#if LJ_32 - int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); -#endif -#if !LJ_SOFTFP - if ((ci->flags & CCI_NOFPRCLOBBER)) - drop &= ~RSET_FPR; -#endif - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ -#if LJ_32 - if (hiop && ra_hasreg((ir+1)->r)) - rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ -#endif - ra_evictset(as, drop); /* Evictions must be performed first. */ - if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (!LJ_SOFTFP && irt_isfp(ir->t)) { - if ((ci->flags & CCI_CASTU64)) { - int32_t ofs = sps_scale(ir->s); - Reg dest = ir->r; - if (ra_hasreg(dest)) { - ra_free(as, dest); - ra_modified(as, dest); -#if LJ_32 - emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); - emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); -#else - emit_tg(as, MIPSI_DMTC1, RID_RET, dest); -#endif - } - if (ofs) { -#if LJ_32 - emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); - emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); -#else - emit_tsi(as, MIPSI_SD, RID_RET, RID_SP, ofs); -#endif - } - } else { - ra_destreg(as, ir, RID_FPRET); - } -#if LJ_32 - } else if (hiop) { - ra_destpair(as, ir); -#endif - } else { - ra_destreg(as, ir, RID_RET); - } - } -} - -static void asm_callx(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX*2]; - CCallInfo ci; - IRRef func; - IRIns *irf; - ci.flags = asm_callx_flags(as, ir); - asm_collectargs(as, ir, &ci, args); - asm_setupresult(as, ir, &ci); - func = ir->op2; irf = IR(func); - if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } - if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)get_kval(irf); - } else { /* Need specific register for indirect calls. */ - Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); - MCode *p = as->mcp; - if (r == RID_CFUNCADDR) - *--p = MIPSI_NOP; - else - *--p = MIPSI_MOVE | MIPSF_D(RID_CFUNCADDR) | MIPSF_S(r); - *--p = MIPSI_JALR | MIPSF_S(r); - as->mcp = p; - ci.func = (ASMFunction)(void *)0; - } - asm_gencall(as, &ci, args); -} - -#if !LJ_SOFTFP -static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) -{ - /* The modified regs must match with the *.dasc implementation. */ - RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| - RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR); - if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); - ra_evictset(as, drop); - ra_destreg(as, ir, RID_FPRET); - emit_call(as, (void *)lj_ir_callinfo[id].func, 0); - ra_leftov(as, REGARG_FIRSTFPR, ir->op1); -} -#endif - -/* -- Returns ------------------------------------------------------------- */ - -/* Return to lower frame. Guard that it goes to the right spot. */ -static void asm_retf(ASMState *as, IRIns *ir) -{ - Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); - void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); - as->topslot -= (BCReg)delta; - if ((int32_t)as->topslot < 0) as->topslot = 0; - irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - emit_setgl(as, base, jit_base); - emit_addptr(as, base, -8*delta); - asm_guard(as, MIPSI_BNE, RID_TMP, - ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); - emit_tsi(as, MIPSI_AL, RID_TMP, base, -8); -} - -/* -- Type conversions ---------------------------------------------------- */ - -#if !LJ_SOFTFP -static void asm_tointg(ASMState *as, IRIns *ir, Reg left) -{ - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - Reg dest = ra_dest(as, ir, RSET_GPR); - asm_guard(as, MIPSI_BC1F, 0, 0); - emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); - emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, MIPSI_CVT_W_D, tmp, left); -} - -static void asm_tobit(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_FPR; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, allow); - Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); - Reg tmp = ra_scratch(as, rset_clear(allow, right)); - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fgh(as, MIPSI_ADD_D, tmp, left, right); -} -#endif - -static void asm_conv(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); -#if !LJ_SOFTFP - int stfp = (st == IRT_NUM || st == IRT_FLOAT); -#endif -#if LJ_64 - int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); -#endif - IRRef lref = ir->op1; -#if LJ_32 - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ -#endif -#if LJ_32 && LJ_SOFTFP - /* FP conversions are handled by SPLIT. */ - lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); - /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ -#else - lua_assert(irt_type(ir->t) != st); - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ - emit_fg(as, st == IRT_NUM ? MIPSI_CVT_S_D : MIPSI_CVT_D_S, - dest, ra_alloc1(as, lref, RSET_FPR)); - } else if (st == IRT_U32) { /* U32 to FP conversion. */ - /* y = (x ^ 0x8000000) + 2147483648.0 */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - if (irt_isfloat(ir->t)) - emit_fg(as, MIPSI_CVT_S_D, dest, dest); - /* Must perform arithmetic with doubles to keep the precision. */ - emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); - emit_fg(as, MIPSI_CVT_D_W, dest, dest); - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); - emit_tg(as, MIPSI_MTC1, RID_TMP, dest); - emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); - emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); -#if LJ_64 - } else if(st == IRT_U64) { /* U64 to FP conversion. */ - /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - MCLabel l_end = emit_label(as); - if (irt_isfloat(ir->t)) { - emit_fgh(as, MIPSI_ADD_S, dest, dest, tmp); - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63], - rset_exclude(RSET_GPR, left)); - emit_fg(as, MIPSI_CVT_S_L, dest, dest); - } else { - emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63], - rset_exclude(RSET_GPR, left)); - emit_fg(as, MIPSI_CVT_D_L, dest, dest); - } - emit_branch(as, MIPSI_BGEZ, left, RID_ZERO, l_end); - emit_tg(as, MIPSI_DMTC1, RID_TMP, dest); - emit_tsml(as, MIPSI_DEXTM, RID_TMP, left, 30, 0); -#endif - } else { /* Integer to FP conversion. */ - Reg left = ra_alloc1(as, lref, RSET_GPR); -#if LJ_32 - emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, - dest, dest); - emit_tg(as, MIPSI_MTC1, left, dest); -#else - MIPSIns mi = irt_isfloat(ir->t) ? - (st64 ? MIPSI_CVT_S_L : MIPSI_CVT_S_W) : - (st64 ? MIPSI_CVT_D_L : MIPSI_CVT_D_W); - emit_fg(as, mi, dest, dest); - emit_tg(as, st64 ? MIPSI_DMTC1 : MIPSI_MTC1, left, dest); -#endif - } - } else if (stfp) { /* FP to integer conversion. */ - if (irt_isguard(ir->t)) { - /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); - asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, lref, RSET_FPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ - /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ - emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); - emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D, - tmp, tmp); - emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D, - tmp, left, tmp); - if (st == IRT_FLOAT) - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); - else - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); -#if LJ_64 - } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ - MCLabel l_end; - emit_tg(as, MIPSI_DMFC1, dest, tmp); - l_end = emit_label(as); - /* For inputs >= 2^63 add -2^64 and convert again. */ - if (st == IRT_NUM) { - emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp); - emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp); - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_M2P64], - rset_exclude(RSET_GPR, dest)); - emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_2P63], - rset_exclude(RSET_GPR, dest)); - } else { - emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp); - emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp); - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)&as->J->k32[LJ_K32_M2P64], - rset_exclude(RSET_GPR, dest)); - emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P63], - rset_exclude(RSET_GPR, dest)); - } -#endif - } else { -#if LJ_32 - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, - tmp, left); -#else - MIPSIns mi = irt_is64(ir->t) ? - (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) : - (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S); - emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left); - emit_fg(as, mi, left, left); -#endif - } - } - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); - if ((ir->op2 & IRCONV_SEXT)) { - if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { - emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); - } else { - uint32_t shift = st == IRT_I8 ? 24 : 16; - emit_dta(as, MIPSI_SRA, dest, dest, shift); - emit_dta(as, MIPSI_SLL, dest, left, shift); - } - } else { - emit_tsi(as, MIPSI_ANDI, dest, left, - (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); - } - } else { /* 32/64 bit integer conversions. */ -#if LJ_32 - /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ -#else - if (irt_is64(ir->t)) { - if (st64) { - /* 64/64 bit no-op (cast)*/ - ra_leftov(as, dest, lref); - } else { - Reg left = ra_alloc1(as, lref, RSET_GPR); - if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ - emit_dta(as, MIPSI_SLL, dest, left, 0); - } else { /* 32 to 64 bit zero extension. */ - emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); - } - } - } else { - if (st64) { - /* This is either a 32 bit reg/reg mov which zeroes the hiword - ** or a load of the loword from a 64 bit address. - */ - Reg left = ra_alloc1(as, lref, RSET_GPR); - emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); - } else { /* 32/32 bit no-op (cast). */ - /* Do nothing, but may need to move regs. */ - ra_leftov(as, dest, lref); - } - } -#endif - } - } -} - -static void asm_strto(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; - int32_t ofs = 0; -#if LJ_SOFTFP - ra_evictset(as, RSET_SCRATCH); - if (ra_used(ir)) { - if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && - (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { - int i; - for (i = 0; i < 2; i++) { - Reg r = (ir+i)->r; - if (ra_hasreg(r)) { - ra_free(as, r); - ra_modified(as, r); - emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); - } - } - ofs = sps_scale(ir->s & ~1); - } else { - Reg rhi = ra_dest(as, ir+1, RSET_GPR); - Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); - emit_tsi(as, MIPSI_LW, rhi, RID_SP, ofs+(LJ_BE?0:4)); - emit_tsi(as, MIPSI_LW, rlo, RID_SP, ofs+(LJ_BE?4:0)); - } - } -#else - RegSet drop = RSET_SCRATCH; - if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ - ra_evictset(as, drop); - ofs = sps_scale(ir->s); -#endif - asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ - args[0] = ir->op1; /* GCstr *str */ - args[1] = ASMREF_TMP1; /* TValue *n */ - asm_gencall(as, ci, args); - /* Store the result to the spill slot or temp slots. */ - emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), - RID_SP, ofs); -} - -/* -- Memory references --------------------------------------------------- */ - -#if LJ_64 -/* Store tagged value for ref at base+ofs. */ -static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) -{ - RegSet allow = rset_exclude(RSET_GPR, base); - IRIns *ir = IR(ref); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (irref_isk(ref)) { - TValue k; - lj_ir_kvalue(as->J->L, &k, ir); - emit_tsi(as, MIPSI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs); - } else { - Reg src = ra_alloc1(as, ref, allow); - Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, - rset_exclude(allow, src)); - emit_tsi(as, MIPSI_SD, RID_TMP, base, ofs); - if (irt_isinteger(ir->t)) { - emit_dst(as, MIPSI_DADDU, RID_TMP, RID_TMP, type); - emit_tsml(as, MIPSI_DEXT, RID_TMP, src, 31, 0); - } else { - emit_dst(as, MIPSI_DADDU, RID_TMP, src, type); - } - } -} -#endif - -/* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) -{ - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ - ra_allockreg(as, igcptr(ir_knum(ir)), dest); - else /* Otherwise force a spill and use the spill slot. */ - emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir)); - } else { - /* Otherwise use g->tmptv to hold the TValue. */ -#if LJ_32 - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; - emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768)); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_setgl(as, src, tmptv.gcr); - } - if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) - type = ra_alloc1(as, ref+1, allow); - else - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - emit_setgl(as, type, tmptv.it); -#else - asm_tvstore64(as, dest, 0, ref); - emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, - (int32_t)(offsetof(global_State, tmptv)-32768)); -#endif - } -} - -static void asm_aref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx, base; - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (checki16(ofs)) { - base = ra_alloc1(as, refa, RSET_GPR); - emit_tsi(as, MIPSI_AADDIU, dest, base, ofs); - return; - } - } - base = ra_alloc1(as, ir->op1, RSET_GPR); - idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base); - emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); -} - -/* Inlined hash lookup. Specialized for key type and for const keys. -** The equivalent C code is: -** Node *n = hashkey(t, key); -** do { -** if (lj_obj_equal(&n->key, key)) return &n->val; -** } while ((n = nextnode(n))); -** return niltv(L); -*/ -static void asm_href(ASMState *as, IRIns *ir, IROp merge) -{ - RegSet allow = RSET_GPR; - int destused = ra_used(ir); - Reg dest = ra_dest(as, ir, allow); - Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); - int isk = irref_isk(refkey); - IRType1 kt = irkey->t; - uint32_t khash; - MCLabel l_end, l_loop, l_next; - - rset_clear(allow, tab); -#if LJ_32 && LJ_SOFTFP - if (!isk) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - if (irkey[1].o == IR_HIOP) { - if (ra_hasreg((irkey+1)->r)) { - type = tmpnum = (irkey+1)->r; - tmp1 = ra_scratch(as, allow); - rset_clear(allow, tmp1); - ra_noweak(as, tmpnum); - } else { - type = tmpnum = ra_allocref(as, refkey+1, allow); - } - rset_clear(allow, tmpnum); - } else { - type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); - rset_clear(allow, type); - } - } -#else - if (irt_isnum(kt)) { - key = ra_alloc1(as, refkey, RSET_FPR); - tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); - } else if (!irt_ispri(kt)) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); -#if LJ_32 - type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); - rset_clear(allow, type); -#endif - } -#endif - tmp2 = ra_scratch(as, allow); - rset_clear(allow, tmp2); - - /* Key not found in chain: jump to exit (if merged) or load niltv. */ - l_end = emit_label(as); - as->invmcp = NULL; - if (merge == IR_NE) - asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); - /* Follow hash chain until the end. */ - emit_move(as, dest, tmp1); - l_loop = --as->mcp; - emit_tsi(as, MIPSI_AL, tmp1, dest, (int32_t)offsetof(Node, next)); - l_next = emit_label(as); - - /* Type and value comparison. */ - if (merge == IR_EQ) { /* Must match asm_guard(). */ - emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); - l_end = asm_exitstub_addr(as); - } - if (!LJ_SOFTFP && irt_isnum(kt)) { - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); - *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */ - emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); - emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); -#if LJ_32 - emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); - } else { - if (irt_ispri(kt)) { - emit_branch(as, MIPSI_BEQ, tmp1, type, l_end); - } else { - emit_branch(as, MIPSI_BEQ, tmp2, key, l_end); - emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); - emit_branch(as, MIPSI_BNE, tmp1, type, l_next); - } - } - emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); - *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); -#else - emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); - emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); - emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); - } else if (irt_isaddr(kt)) { - Reg refk = tmp2; - if (isk) { - int64_t k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; - refk = ra_allock(as, k, allow); - rset_clear(allow, refk); - } - emit_branch(as, MIPSI_BEQ, tmp1, refk, l_end); - emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); - } else { - Reg pri = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); - rset_clear(allow, pri); - lua_assert(irt_ispri(kt) && !irt_isnil(kt)); - emit_branch(as, MIPSI_BEQ, tmp1, pri, l_end); - emit_tsi(as, MIPSI_LD, tmp1, dest, offsetof(Node, key)); - } - *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); - if (!isk && irt_isaddr(kt)) { - type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); - emit_dst(as, MIPSI_DADDU, tmp2, key, type); - rset_clear(allow, type); - } -#endif - - /* Load main position relative to tab->node into dest. */ - khash = isk ? ir_khash(irkey) : 1; - if (khash == 0) { - emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); - } else { - Reg tmphash = tmp1; - if (isk) - tmphash = ra_allock(as, khash, allow); - emit_dst(as, MIPSI_AADDU, dest, dest, tmp1); - lua_assert(sizeof(Node) == 24); - emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); - emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); - emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); - emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); - emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); - if (isk) { - /* Nothing to do. */ - } else if (irt_isstr(kt)) { - emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); - } else { /* Must match with hash*() in lj_tab.c. */ - emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2); - emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31); - emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); - emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); - emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); -#if LJ_32 - if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { - emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); - if ((as->flags & JIT_F_MIPSXXR2)) { - emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); - } else { - emit_dst(as, MIPSI_OR, dest, dest, tmp1); - emit_dta(as, MIPSI_SLL, tmp1, tmp1, HASH_ROT1); - emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31); - } - emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); -#if LJ_SOFTFP - emit_ds(as, MIPSI_MOVE, tmp1, type); - emit_ds(as, MIPSI_MOVE, tmp2, key); -#else - emit_tg(as, MIPSI_MFC1, tmp2, key); - emit_tg(as, MIPSI_MFC1, tmp1, key+1); -#endif - } else { - emit_dst(as, MIPSI_XOR, tmp2, key, tmp1); - emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); - emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); - } -#else - emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); - emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); - if (irt_isnum(kt)) { - emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); - emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); - emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0); -#if !LJ_SOFTFP - emit_tg(as, MIPSI_DMFC1, tmp1, key); -#endif - } else { - checkmclim(as); - emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); - emit_dta(as, MIPSI_SLL, tmp2, key, 0); - emit_dst(as, MIPSI_DADDU, tmp1, key, type); - } -#endif - } - } -} - -static void asm_hrefk(ASMState *as, IRIns *ir) -{ - IRIns *kslot = IR(ir->op2); - IRIns *irkey = IR(kslot->op1); - int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); - int32_t kofs = ofs + (int32_t)offsetof(Node, key); - Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; - Reg node = ra_alloc1(as, ir->op1, RSET_GPR); - RegSet allow = rset_exclude(RSET_GPR, node); - Reg idx = node; -#if LJ_32 - Reg key = RID_NONE, type = RID_TMP; - int32_t lo, hi; -#else - Reg key = ra_scratch(as, allow); - int64_t k; -#endif - lua_assert(ofs % sizeof(Node) == 0); - if (ofs > 32736) { - idx = dest; - rset_clear(allow, dest); - kofs = (int32_t)offsetof(Node, key); - } else if (ra_hasreg(dest)) { - emit_tsi(as, MIPSI_AADDIU, dest, node, ofs); - } -#if LJ_32 - if (!irt_ispri(irkey->t)) { - key = ra_scratch(as, allow); - rset_clear(allow, key); - } - if (irt_isnum(irkey->t)) { - lo = (int32_t)ir_knum(irkey)->u32.lo; - hi = (int32_t)ir_knum(irkey)->u32.hi; - } else { - lo = irkey->i; - hi = irt_toitype(irkey->t); - if (!ra_hasreg(key)) - goto nolo; - } - asm_guard(as, MIPSI_BNE, key, lo ? ra_allock(as, lo, allow) : RID_ZERO); -nolo: - asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); - if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); - emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); -#else - if (irt_ispri(irkey->t)) { - lua_assert(!irt_isnil(irkey->t)); - k = ~((int64_t)~irt_toitype(irkey->t) << 47); - } else if (irt_isnum(irkey->t)) { - k = (int64_t)ir_knum(irkey)->u64; - } else { - k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); - } - asm_guard(as, MIPSI_BNE, key, ra_allock(as, k, allow)); - emit_tsi(as, MIPSI_LD, key, idx, kofs); -#endif - if (ofs > 32736) - emit_tsi(as, MIPSI_AADDU, dest, node, ra_allock(as, ofs, allow)); -} - -static void asm_uref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR); - } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); - emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); - } else { - emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v)); - } - emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + - (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); - } -} - -static void asm_fref(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); -} - -static void asm_strref(ASMState *as, IRIns *ir) -{ -#if LJ_32 - Reg dest = ra_dest(as, ir, RSET_GPR); - IRRef ref = ir->op2, refk = ir->op1; - int32_t ofs = (int32_t)sizeof(GCstr); - Reg r; - if (irref_isk(ref)) { - IRRef tmp = refk; refk = ref; ref = tmp; - } else if (!irref_isk(refk)) { - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - IRIns *irr = IR(ir->op2); - if (ra_hasreg(irr->r)) { - ra_noweak(as, irr->r); - right = irr->r; - } else if (mayfuse(as, irr->op2) && - irr->o == IR_ADD && irref_isk(irr->op2) && - checki16(ofs + IR(irr->op2)->i)) { - ofs += IR(irr->op2)->i; - right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); - } else { - right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - emit_tsi(as, MIPSI_ADDIU, dest, dest, ofs); - emit_dst(as, MIPSI_ADDU, dest, left, right); - return; - } - r = ra_alloc1(as, ref, RSET_GPR); - ofs += IR(refk)->i; - if (checki16(ofs)) - emit_tsi(as, MIPSI_ADDIU, dest, r, ofs); - else - emit_dst(as, MIPSI_ADDU, dest, r, - ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); -#else - RegSet allow = RSET_GPR; - Reg dest = ra_dest(as, ir, allow); - Reg base = ra_alloc1(as, ir->op1, allow); - IRIns *irr = IR(ir->op2); - int32_t ofs = sizeof(GCstr); - rset_clear(allow, base); - if (irref_isk(ir->op2) && checki16(ofs + irr->i)) { - emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i); - } else { - emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs); - emit_dst(as, MIPSI_DADDU, dest, base, ra_alloc1(as, ir->op2, allow)); - } -#endif -} - -/* -- Loads and stores ---------------------------------------------------- */ - -static MIPSIns asm_fxloadins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: return MIPSI_LB; - case IRT_U8: return MIPSI_LBU; - case IRT_I16: return MIPSI_LH; - case IRT_U16: return MIPSI_LHU; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1; - case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; - default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; - } -} - -static MIPSIns asm_fxstoreins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return MIPSI_SB; - case IRT_I16: case IRT_U16: return MIPSI_SH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1; - case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; - default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; - } -} - -static void asm_fload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - MIPSIns mi = asm_fxloadins(ir); - Reg idx; - int32_t ofs; - if (ir->op1 == REF_NIL) { - idx = RID_JGL; - ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); - } else { - idx = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_tsi(as, MIPSI_AADDIU, dest, idx, ofs); - return; - } - } - ofs = field_ofs[ir->op2]; - } - lua_assert(!irt_isfp(ir->t)); - emit_tsi(as, mi, dest, idx, ofs); -} - -static void asm_fstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1z(as, ir->op2, RSET_GPR); - IRIns *irf = IR(ir->op1); - Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); - int32_t ofs = field_ofs[irf->op2]; - MIPSIns mi = asm_fxstoreins(ir); - lua_assert(!irt_isfp(ir->t)); - emit_tsi(as, mi, src, idx, ofs); - } -} - -static void asm_xload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, - (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); -} - -static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1z(as, ir->op2, - (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src), ofs); - } -} - -#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) - -static void asm_ahuvload(ASMState *as, IRIns *ir) -{ - int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); - Reg dest = RID_NONE, type = RID_TMP, idx; - RegSet allow = RSET_GPR; - int32_t ofs = 0; - IRType1 t = ir->t; - if (hiop) { - t.irt = IRT_NUM; - if (ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); - rset_clear(allow, type); - } - } - if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); - rset_clear(allow, dest); -#if LJ_64 - if (irt_isaddr(t)) - emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); - else if (irt_isint(t)) - emit_dta(as, MIPSI_SLL, dest, dest, 0); -#endif - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - rset_clear(allow, idx); - if (irt_isnum(t)) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); - } else { - asm_guard(as, MIPSI_BNE, type, - ra_allock(as, (int32_t)irt_toitype(t), allow)); - } -#if LJ_32 - if (ra_hasreg(dest)) { - if (!LJ_SOFTFP && irt_isnum(t)) - emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); - else - emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); - } - emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); -#else - if (ra_hasreg(dest)) { - if (!LJ_SOFTFP && irt_isnum(t)) { - emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); - dest = type; - } - } else { - dest = type; - } - emit_dta(as, MIPSI_DSRA32, type, dest, 15); - emit_tsi(as, MIPSI_LD, dest, idx, ofs); -#endif -} - -static void asm_ahustore(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_GPR; - Reg idx, src = RID_NONE, type = RID_NONE; - int32_t ofs = 0; - if (ir->r == RID_SINK) - return; - if (!LJ_SOFTFP && irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - emit_hsi(as, MIPSI_SDC1, src, idx, ofs); - } else { -#if LJ_32 - if (!irt_ispri(ir->t)) { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - } - if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) - type = ra_alloc1(as, (ir+1)->op2, allow); - else - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - rset_clear(allow, type); - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (ra_hasreg(src)) - emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); - emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); -#else - Reg tmp = RID_TMP; - if (irt_ispri(ir->t)) { - tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); - rset_clear(allow, tmp); - } else { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); - rset_clear(allow, type); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - emit_tsi(as, MIPSI_SD, tmp, idx, ofs); - if (ra_hasreg(src)) { - if (irt_isinteger(ir->t)) { - emit_dst(as, MIPSI_DADDU, tmp, tmp, type); - emit_tsml(as, MIPSI_DEXT, tmp, src, 31, 0); - } else { - emit_dst(as, MIPSI_DADDU, tmp, src, type); - } - } -#endif - } -} - -static void asm_sload(ASMState *as, IRIns *ir) -{ - Reg dest = RID_NONE, type = RID_NONE, base; - RegSet allow = RSET_GPR; - IRType1 t = ir->t; -#if LJ_32 - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); - int hiop = (LJ_32 && LJ_SOFTFP && (ir+1)->o == IR_HIOP); - if (hiop) - t.irt = IRT_NUM; -#else - int32_t ofs = 8*((int32_t)ir->op1-2); -#endif - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); -#if LJ_32 && LJ_SOFTFP - lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ - if (hiop && ra_used(ir+1)) { - type = ra_dest(as, ir+1, allow); - rset_clear(allow, type); - } -#else - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { - dest = ra_scratch(as, RSET_FPR); - asm_tointg(as, ir, dest); - t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else -#endif - if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); - dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); - rset_clear(allow, dest); - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); - if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { - if (irt_isint(t)) { - Reg tmp = ra_scratch(as, RSET_FPR); - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp); - dest = tmp; - t.irt = IRT_NUM; /* Check for original type. */ - } else { - Reg tmp = ra_scratch(as, RSET_GPR); - emit_fg(as, MIPSI_CVT_D_W, dest, dest); - emit_tg(as, MIPSI_MTC1, tmp, dest); - dest = tmp; - t.irt = IRT_INT; /* Check for original type. */ - } - } -#if LJ_64 - else if (irt_isaddr(t)) { - /* Clear type from pointers. */ - emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); - } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { - /* Sign-extend integers. */ - emit_dta(as, MIPSI_SLL, dest, dest, 0); - } -#endif - goto dotypecheck; - } - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); -dotypecheck: -#if LJ_32 - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - if (ra_noreg(type)) - type = RID_TMP; - if (irt_isnum(t)) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); - } else { - Reg ktype = ra_allock(as, irt_toitype(t), allow); - asm_guard(as, MIPSI_BNE, type, ktype); - } - } - if (ra_hasreg(dest)) { - if (!LJ_SOFTFP && irt_isnum(t)) - emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - else - emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); - } - if (ra_hasreg(type)) - emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); -#else - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - type = dest < RID_MAX_GPR ? dest : RID_TMP; - if (irt_ispri(t)) { - asm_guard(as, MIPSI_BNE, type, - ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); - } else { - if (irt_isnum(t)) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); - if (ra_hasreg(dest)) - emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - } else { - asm_guard(as, MIPSI_BNE, RID_TMP, - ra_allock(as, (int32_t)irt_toitype(t), allow)); - } - emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 15); - } - emit_tsi(as, MIPSI_LD, type, base, ofs); - } else if (ra_hasreg(dest)) { - if (irt_isnum(t)) - emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - else - emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base, - ofs ^ ((LJ_BE && irt_isint(t)) ? 4 : 0)); - } -#endif -} - -/* -- Allocations --------------------------------------------------------- */ - -#if LJ_HASFFI -static void asm_cnew(ASMState *as, IRIns *ir) -{ - CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID id = (CTypeID)IR(ir->op1)->i; - CTSize sz; - CTInfo info = lj_ctype_info(cts, id, &sz); - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[4]; - RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - - as->gcsteps++; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); - if (ra_used(ir)) - ra_destreg(as, ir, RID_RET); /* GCcdata * */ - - /* Initialize immutable cdata object. */ - if (ir->o == IR_CNEWI) { - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); -#if LJ_32 - int32_t ofs = sizeof(GCcdata); - if (sz == 8) { - ofs += 4; - lua_assert((ir+1)->o == IR_HIOP); - if (LJ_LE) ir++; - } - for (;;) { - Reg r = ra_alloc1z(as, ir->op2, allow); - emit_tsi(as, MIPSI_SW, r, RID_RET, ofs); - rset_clear(allow, r); - if (ofs == sizeof(GCcdata)) break; - ofs -= 4; if (LJ_BE) ir++; else ir--; - } -#else - emit_tsi(as, MIPSI_SD, ra_alloc1(as, ir->op2, allow), - RID_RET, sizeof(GCcdata)); -#endif - lua_assert(sz == 4 || sz == 8); - } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ - ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* CTypeID id */ - args[2] = ir->op2; /* CTSize sz */ - args[3] = ASMREF_TMP1; /* CTSize align */ - asm_gencall(as, ci, args); - emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); - return; - } - - /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ - emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); - emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); - emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); - emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ - asm_gencall(as, ci, args); - ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), - ra_releasetmp(as, ASMREF_TMP1)); -} -#else -#define asm_cnew(as, ir) ((void)0) -#endif - -/* -- Write barriers ------------------------------------------------------ */ - -static void asm_tbar(ASMState *as, IRIns *ir) -{ - Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); - Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); - Reg link = RID_TMP; - MCLabel l_end = emit_label(as); - emit_tsi(as, MIPSI_AS, link, tab, (int32_t)offsetof(GCtab, gclist)); - emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); - emit_setgl(as, tab, gc.grayagain); - emit_getgl(as, link, gc.grayagain); - emit_dst(as, MIPSI_XOR, mark, mark, RID_TMP); /* Clear black bit. */ - emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); - emit_tsi(as, MIPSI_ANDI, RID_TMP, mark, LJ_GC_BLACK); - emit_tsi(as, MIPSI_LBU, mark, tab, (int32_t)offsetof(GCtab, marked)); -} - -static void asm_obar(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; - IRRef args[2]; - MCLabel l_end; - Reg obj, val, tmp; - /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ir->op1; /* TValue *tv */ - asm_gencall(as, ci, args); - emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); - obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); - emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); - emit_tsi(as, MIPSI_ANDI, tmp, tmp, LJ_GC_BLACK); - emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); - emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES); - val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); - emit_tsi(as, MIPSI_LBU, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); - emit_tsi(as, MIPSI_LBU, RID_TMP, val, (int32_t)offsetof(GChead, marked)); -} - -/* -- Arithmetic and logic operations ------------------------------------- */ - -#if !LJ_SOFTFP -static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - emit_fgh(as, mi, dest, left, right); -} - -static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); - emit_fg(as, mi, dest, left); -} - -static void asm_fpmath(ASMState *as, IRIns *ir) -{ - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; - if (ir->op2 <= IRFPM_TRUNC) - asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); - else if (ir->op2 == IRFPM_SQRT) - asm_fpunary(as, ir, MIPSI_SQRT_D); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); -} -#endif - -static void asm_add(ASMState *as, IRIns *ir) -{ - IRType1 t = ir->t; -#if !LJ_SOFTFP - if (irt_isnum(t)) { - asm_fparith(as, ir, MIPSI_ADD_D); - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); - if (checki16(k)) { - emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest, - left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dst(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDU : MIPSI_ADDU, dest, - left, right); - } -} - -static void asm_sub(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, MIPSI_SUB_D); - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, - left, right); - } -} - -static void asm_mul(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, MIPSI_MUL_D); - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (LJ_64 && irt_is64(ir->t)) { - emit_dst(as, MIPSI_MFLO, dest, 0, 0); - emit_dst(as, MIPSI_DMULT, 0, left, right); - } else { - emit_dst(as, MIPSI_MUL, dest, left, right); - } - } -} - -static void asm_mod(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isint(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_modi); -} - -#if !LJ_SOFTFP -static void asm_pow(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_powi); -} - -static void asm_div(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - else -#endif - asm_fparith(as, ir, MIPSI_DIV_D); -} -#endif - -static void asm_neg(ASMState *as, IRIns *ir) -{ -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, MIPSI_NEG_D); - } else -#endif - { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, - RID_ZERO, left); - } -} - -#define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) - -static void asm_arithov(ASMState *as, IRIns *ir) -{ - Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); - lua_assert(!irt_is64(ir->t)); - if (irref_isk(ir->op2)) { - int k = IR(ir->op2)->i; - if (ir->o == IR_SUBOV) k = -k; - if (checki16(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ - left = ra_alloc1(as, ir->op1, RSET_GPR); - asm_guard(as, k >= 0 ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_dst(as, MIPSI_SLT, RID_TMP, dest, dest == left ? RID_TMP : left); - emit_tsi(as, MIPSI_ADDIU, dest, left, k); - if (dest == left) emit_move(as, RID_TMP, left); - return; - } - } - left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), - right), dest)); - asm_guard(as, MIPSI_BLTZ, RID_TMP, 0); - emit_dst(as, MIPSI_AND, RID_TMP, RID_TMP, tmp); - if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */ - emit_dst(as, MIPSI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right); - } else { /* ((dest^left) & (dest^~right)) < 0 */ - emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, dest); - emit_dst(as, MIPSI_NOR, RID_TMP, dest == right ? RID_TMP : right, RID_ZERO); - } - emit_dst(as, MIPSI_XOR, tmp, dest, dest == left ? RID_TMP : left); - emit_dst(as, ir->o == IR_ADDOV ? MIPSI_ADDU : MIPSI_SUBU, dest, left, right); - if (dest == left || dest == right) - emit_move(as, RID_TMP, dest == left ? left : right); -} - -#define asm_addov(as, ir) asm_arithov(as, ir) -#define asm_subov(as, ir) asm_arithov(as, ir) - -static void asm_mulov(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), - right), dest)); - asm_guard(as, MIPSI_BNE, RID_TMP, tmp); - emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31); - emit_dst(as, MIPSI_MFHI, tmp, 0, 0); - emit_dst(as, MIPSI_MFLO, dest, 0, 0); - emit_dst(as, MIPSI_MULT, 0, left, right); -} - -#if LJ_32 && LJ_HASFFI -static void asm_add64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (k == 0) { - emit_dst(as, MIPSI_ADDU, dest, left, RID_TMP); - goto loarith; - } else if (checki16(k)) { - emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP); - emit_tsi(as, MIPSI_ADDIU, dest, left, k); - goto loarith; - } - } - emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP); - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dst(as, MIPSI_ADDU, dest, left, right); -loarith: - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (k == 0) { - if (dest != left) - emit_move(as, dest, left); - return; - } else if (checki16(k)) { - if (dest == left) { - Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, left)); - emit_move(as, dest, tmp); - dest = tmp; - } - emit_dst(as, MIPSI_SLTU, RID_TMP, dest, left); - emit_tsi(as, MIPSI_ADDIU, dest, left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - if (dest == left && dest == right) { - Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right)); - emit_move(as, dest, tmp); - dest = tmp; - } - emit_dst(as, MIPSI_SLTU, RID_TMP, dest, dest == left ? right : left); - emit_dst(as, MIPSI_ADDU, dest, left, right); -} - -static void asm_sub64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP); - emit_dst(as, MIPSI_SUBU, dest, left, right); - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (dest == left) { - Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right)); - emit_move(as, dest, tmp); - dest = tmp; - } - emit_dst(as, MIPSI_SLTU, RID_TMP, left, dest); - emit_dst(as, MIPSI_SUBU, dest, left, right); -} - -static void asm_neg64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP); - emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_dst(as, MIPSI_SLTU, RID_TMP, RID_ZERO, dest); - emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); -} -#endif - -static void asm_bnot(ASMState *as, IRIns *ir) -{ - Reg left, right, dest = ra_dest(as, ir, RSET_GPR); - IRIns *irl = IR(ir->op1); - if (mayfuse(as, ir->op1) && irl->o == IR_BOR) { - left = ra_alloc2(as, irl, RSET_GPR); - right = (left >> 8); left &= 255; - } else { - left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - right = RID_ZERO; - } - emit_dst(as, MIPSI_NOR, dest, left, right); -} - -static void asm_bswap(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); -#if LJ_32 - if ((as->flags & JIT_F_MIPSXXR2)) { - emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); - emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); - } else { - Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), dest)); - emit_dst(as, MIPSI_OR, dest, dest, tmp); - emit_dst(as, MIPSI_OR, dest, dest, RID_TMP); - emit_tsi(as, MIPSI_ANDI, dest, dest, 0xff00); - emit_dta(as, MIPSI_SLL, RID_TMP, RID_TMP, 8); - emit_dta(as, MIPSI_SRL, dest, left, 8); - emit_tsi(as, MIPSI_ANDI, RID_TMP, left, 0xff00); - emit_dst(as, MIPSI_OR, tmp, tmp, RID_TMP); - emit_dta(as, MIPSI_SRL, tmp, left, 24); - emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); - } -#else - if (irt_is64(ir->t)) { - emit_dst(as, MIPSI_DSHD, dest, 0, RID_TMP); - emit_dst(as, MIPSI_DSBH, RID_TMP, 0, left); - } else { - emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); - emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); - } -#endif -} - -static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); - if (checku16(k)) { - emit_tsi(as, mik, dest, left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dst(as, mi, dest, left, right); -} - -#define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI) -#define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI) -#define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI) - -static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op2)) { /* Constant shifts. */ - uint32_t shift = (uint32_t)IR(ir->op2)->i; - if (LJ_64 && irt_is64(ir->t)) mik |= (shift & 32) ? MIPSI_D32 : MIPSI_D; - emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), - (shift & 31)); - } else { - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (LJ_64 && irt_is64(ir->t)) mi |= MIPSI_DV; - emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ - } -} - -#define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL) -#define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL) -#define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA) -#define asm_brol(as, ir) lua_assert(0) - -static void asm_bror(ASMState *as, IRIns *ir) -{ - if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { - asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op2)) { /* Constant shifts. */ - uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_rotr(as, dest, left, RID_TMP, shift); - } else { - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_OR, dest, dest, RID_TMP); - emit_dst(as, MIPSI_SRLV, dest, right, left); - emit_dst(as, MIPSI_SLLV, RID_TMP, RID_TMP, left); - emit_dst(as, MIPSI_SUBU, RID_TMP, ra_allock(as, 32, RSET_GPR), right); - } - } -} - -#if LJ_32 && LJ_SOFTFP -static void asm_sfpmin_max(ASMState *as, IRIns *ir) -{ - CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; - IRRef args[4]; - args[0^LJ_BE] = ir->op1; - args[1^LJ_BE] = (ir+1)->op1; - args[2^LJ_BE] = ir->op2; - args[3^LJ_BE] = (ir+1)->op2; - asm_setupresult(as, ir, &ci); - emit_call(as, (void *)ci.func, 0); - ci.func = NULL; - asm_gencall(as, &ci, args); -} -#endif - -static void asm_min_max(ASMState *as, IRIns *ir, int ismax) -{ - if (!LJ_SOFTFP && irt_isnum(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - if (dest == left) { - emit_fg(as, MIPSI_MOVT_D, dest, right); - } else { - emit_fg(as, MIPSI_MOVF_D, dest, left); - if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); - } - emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (dest == left) { - emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); - } else { - emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); - if (dest != right) emit_move(as, dest, right); - } - emit_dst(as, MIPSI_SLT, RID_TMP, - ismax ? left : right, ismax ? right : left); - } -} - -#define asm_min(as, ir) asm_min_max(as, ir, 0) -#define asm_max(as, ir) asm_min_max(as, ir, 1) - -/* -- Comparisons --------------------------------------------------------- */ - -#if LJ_32 && LJ_SOFTFP -/* SFP comparisons. */ -static void asm_sfpcomp(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; - RegSet drop = RSET_SCRATCH; - Reg r; - IRRef args[4]; - args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1; - args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2; - - for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { - if (!rset_test(as->freeset, r) && - regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) - rset_clear(drop, r); - } - ra_evictset(as, drop); - - asm_setupresult(as, ir, ci); - - switch ((IROp)ir->o) { - case IR_LT: - asm_guard(as, MIPSI_BGEZ, RID_RET, 0); - break; - case IR_ULT: - asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); - emit_loadi(as, RID_TMP, 1); - asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); - break; - case IR_GE: - asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); - emit_loadi(as, RID_TMP, 2); - asm_guard(as, MIPSI_BLTZ, RID_RET, 0); - break; - case IR_LE: - asm_guard(as, MIPSI_BGTZ, RID_RET, 0); - break; - case IR_GT: - asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); - emit_loadi(as, RID_TMP, 2); - asm_guard(as, MIPSI_BLEZ, RID_RET, 0); - break; - case IR_UGE: - asm_guard(as, MIPSI_BLTZ, RID_RET, 0); - break; - case IR_ULE: - asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); - emit_loadi(as, RID_TMP, 1); - break; - case IR_UGT: case IR_ABC: - asm_guard(as, MIPSI_BLEZ, RID_RET, 0); - break; - case IR_EQ: case IR_NE: - asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_RET, RID_ZERO); - default: - break; - } - asm_gencall(as, ci, args); -} -#endif - -static void asm_comp(ASMState *as, IRIns *ir) -{ - /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ - IROp op = ir->o; - if (!LJ_SOFTFP && irt_isnum(ir->t)) { - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); - emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); - } else { - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - if (op == IR_ABC) op = IR_UGT; - if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(IR(ir->op2)) == 0) { - MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : - ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); - asm_guard(as, mi, left, 0); - } else { - if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); - if ((op&2)) k++; - if (checki16(k)) { - asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, (op&4) ? MIPSI_SLTIU : MIPSI_SLTI, - RID_TMP, left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT, - RID_TMP, (op&2) ? right : left, (op&2) ? left : right); - } - } -} - -static void asm_equal(ASMState *as, IRIns *ir) -{ - Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? - RSET_FPR : RSET_GPR); - right = (left >> 8); left &= 255; - if (!LJ_SOFTFP && irt_isnum(ir->t)) { - asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); - emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); - } else { - asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); - } -} - -#if LJ_32 && LJ_HASFFI -/* 64 bit integer comparisons. */ -static void asm_comp64(ASMState *as, IRIns *ir) -{ - /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ - IROp op = (ir-1)->o; - MCLabel l_end; - Reg rightlo, leftlo, righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); - righthi = (lefthi >> 8); lefthi &= 255; - leftlo = ra_alloc2(as, ir-1, - rset_exclude(rset_exclude(RSET_GPR, lefthi), righthi)); - rightlo = (leftlo >> 8); leftlo &= 255; - asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); - l_end = emit_label(as); - if (lefthi != righthi) - emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT, RID_TMP, - (op&2) ? righthi : lefthi, (op&2) ? lefthi : righthi); - emit_dst(as, MIPSI_SLTU, RID_TMP, - (op&2) ? rightlo : leftlo, (op&2) ? leftlo : rightlo); - if (lefthi != righthi) - emit_branch(as, MIPSI_BEQ, lefthi, righthi, l_end); -} - -static void asm_comp64eq(ASMState *as, IRIns *ir) -{ - Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - asm_guard(as, ((ir-1)->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO); - tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right)); - emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp); - emit_dst(as, MIPSI_XOR, tmp, left, right); - left = ra_alloc2(as, ir-1, RSET_GPR); - right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_XOR, RID_TMP, left, right); -} -#endif - -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ - -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ -static void asm_hiop(ASMState *as, IRIns *ir) -{ -#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP) - /* HIOP is marked as a store because it needs its own DCE logic. */ - int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ - if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; - if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ - as->curins--; /* Always skip the CONV. */ -#if LJ_HASFFI && !LJ_SOFTFP - if (usehi || uselo) - asm_conv64(as, ir); - return; -#endif - } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */ - as->curins--; /* Always skip the loword comparison. */ -#if LJ_SOFTFP - if (!irt_isint(ir->t)) { - asm_sfpcomp(as, ir-1); - return; - } -#endif -#if LJ_HASFFI - asm_comp64(as, ir); -#endif - return; - } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ - as->curins--; /* Always skip the loword comparison. */ -#if LJ_SOFTFP - if (!irt_isint(ir->t)) { - asm_sfpcomp(as, ir-1); - return; - } -#endif -#if LJ_HASFFI - asm_comp64eq(as, ir); -#endif - return; -#if LJ_SOFTFP - } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { - as->curins--; /* Always skip the loword min/max. */ - if (uselo || usehi) - asm_sfpmin_max(as, ir-1); - return; -#endif - } else if ((ir-1)->o == IR_XSTORE) { - as->curins--; /* Handle both stores here. */ - if ((ir-1)->r != RID_SINK) { - asm_xstore_(as, ir, LJ_LE ? 4 : 0); - asm_xstore_(as, ir-1, LJ_LE ? 0 : 4); - } - return; - } - if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ - switch ((ir-1)->o) { -#if LJ_HASFFI - case IR_ADD: as->curins--; asm_add64(as, ir); break; - case IR_SUB: as->curins--; asm_sub64(as, ir); break; - case IR_NEG: as->curins--; asm_neg64(as, ir); break; -#endif -#if LJ_SOFTFP - case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - case IR_STRTO: - if (!uselo) - ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ - break; -#endif - case IR_CALLN: - case IR_CALLS: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; -#if LJ_SOFTFP - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: -#endif - case IR_CNEWI: - /* Nothing to do here. Handled by lo op itself. */ - break; - default: lua_assert(0); break; - } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ -#endif -} - -/* -- Profiling ----------------------------------------------------------- */ - -static void asm_prof(ASMState *as, IRIns *ir) -{ - UNUSED(ir); - asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); - emit_lsglptr(as, MIPSI_LBU, RID_TMP, - (int32_t)offsetof(global_State, hookmask)); -} - -/* -- Stack handling ------------------------------------------------------ */ - -/* Check Lua stack size for overflow. Use exit handler as fallback. */ -static void asm_stack_check(ASMState *as, BCReg topslot, - IRIns *irp, RegSet allow, ExitNo exitno) -{ - /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */ - Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; - ExitNo oldsnap = as->snapno; - rset_clear(allow, pbase); -#if LJ_32 - tmp = allow ? rset_pickbot(allow) : - (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); -#else - tmp = allow ? rset_pickbot(allow) : RID_RET; -#endif - as->snapno = exitno; - asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); - as->snapno = oldsnap; - if (allow == RSET_EMPTY) /* Restore temp. register. */ - emit_tsi(as, MIPSI_AL, tmp, RID_SP, 0); - else - ra_modified(as, tmp); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); - emit_dst(as, MIPSI_ASUBU, RID_TMP, tmp, pbase); - emit_tsi(as, MIPSI_AL, tmp, tmp, offsetof(lua_State, maxstack)); - if (pbase == RID_TMP) - emit_getgl(as, RID_TMP, jit_base); - emit_getgl(as, tmp, cur_L); - if (allow == RSET_EMPTY) /* Spill temp. register. */ - emit_tsi(as, MIPSI_AS, tmp, RID_SP, 0); -} - -/* Restore Lua stack from on-trace state. */ -static void asm_stack_restore(ASMState *as, SnapShot *snap) -{ - SnapEntry *map = &as->T->snapmap[snap->mapofs]; -#if LJ_32 || defined(LUA_USE_ASSERT) - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; -#endif - MSize n, nent = snap->nent; - /* Store the value of all modified slots to the Lua stack. */ - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1-LJ_FR2); - IRRef ref = snap_ref(sn); - IRIns *ir = IR(ref); - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { -#if LJ_SOFTFP - Reg tmp; - RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ - tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); - emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); - if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); - tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); - emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); -#else - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); -#endif - } else { -#if LJ_32 - RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - Reg type; - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - rset_clear(allow, src); - emit_tsi(as, MIPSI_SW, src, RID_BASE, ofs+(LJ_BE?4:0)); - } - if ((sn & (SNAP_CONT|SNAP_FRAME))) { - if (s == 0) continue; /* Do not overwrite link to previous frame. */ - type = ra_allock(as, (int32_t)(*flinks--), allow); -#if LJ_SOFTFP - } else if ((sn & SNAP_SOFTFPNUM)) { - type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); -#endif - } else { - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - } - emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); -#else - asm_tvstore64(as, RID_BASE, ofs, ref); -#endif - } - checkmclim(as); - } - lua_assert(map + nent == flinks); -} - -/* -- GC handling --------------------------------------------------------- */ - -/* Check GC threshold and do one or more GC steps. */ -static void asm_gc_check(ASMState *as) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; - IRRef args[2]; - MCLabel l_end; - Reg tmp; - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ - /* Assumes asm_snap_prep() already done. */ - asm_guard(as, MIPSI_BNE, RID_RET, RID_ZERO); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ASMREF_TMP2; /* MSize steps */ - asm_gencall(as, ci, args); - emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); - tmp = ra_releasetmp(as, ASMREF_TMP2); - emit_loadi(as, tmp, as->gcsteps); - /* Jump around GC step if GC total < GC threshold. */ - emit_branch(as, MIPSI_BNE, RID_TMP, RID_ZERO, l_end); - emit_dst(as, MIPSI_SLTU, RID_TMP, RID_TMP, tmp); - emit_getgl(as, tmp, gc.threshold); - emit_getgl(as, RID_TMP, gc.total); - as->gcsteps = 0; - checkmclim(as); -} - -/* -- Loop handling ------------------------------------------------------- */ - -/* Fixup the loop branch. */ -static void asm_loop_fixup(ASMState *as) -{ - MCode *p = as->mctop; - MCode *target = as->mcp; - p[-1] = MIPSI_NOP; - if (as->loopinv) { /* Inverted loop branch? */ - /* asm_guard already inverted the cond branch. Only patch the target. */ - p[-3] |= ((target-p+2) & 0x0000ffffu); - } else { - p[-2] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); - } -} - -/* -- Head of trace ------------------------------------------------------- */ - -/* Coalesce BASE register for a root trace. */ -static void asm_head_root_base(ASMState *as) -{ - IRIns *ir = IR(REF_BASE); - Reg r = ir->r; - if (as->loopinv) as->mctop--; - if (ra_hasreg(r)) { - ra_free(as, r); - if (rset_test(as->modset, r) || irt_ismarked(ir->t)) - ir->r = RID_INIT; /* No inheritance for modified BASE register. */ - if (r != RID_BASE) - emit_move(as, r, RID_BASE); - } -} - -/* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) -{ - IRIns *ir = IR(REF_BASE); - Reg r = ir->r; - if (as->loopinv) as->mctop--; - if (ra_hasreg(r)) { - ra_free(as, r); - if (rset_test(as->modset, r) || irt_ismarked(ir->t)) - ir->r = RID_INIT; /* No inheritance for modified BASE register. */ - if (irp->r == r) { - rset_clear(allow, r); /* Mark same BASE register as coalesced. */ - } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { - rset_clear(allow, irp->r); - emit_move(as, r, irp->r); /* Move from coalesced parent reg. */ - } else { - emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ - } - } - return allow; -} - -/* -- Tail of trace ------------------------------------------------------- */ - -/* Fixup the tail code. */ -static void asm_tail_fixup(ASMState *as, TraceNo lnk) -{ - MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; - int32_t spadj = as->T->spadjust; - MCode *p = as->mctop-1; - *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; - p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); -} - -/* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) -{ - as->mcp = as->mctop-2; /* Leave room for branch plus nop or stack adj. */ - as->invmcp = as->loopref ? as->mcp : NULL; -} - -/* -- Trace setup --------------------------------------------------------- */ - -/* Ensure there are enough stack slots for call arguments. */ -static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = CCI_XNARGS(ci); -#if LJ_32 - int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; -#else - int nslots = 0, ngpr = REGARG_NUMGPR; -#endif - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) { -#if LJ_32 - if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) && - nfpr > 0 && !(ci->flags & CCI_VARARG)) { - nfpr--; - ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1; - } else if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) { - nfpr = 0; - ngpr = ngpr & ~1; - if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1; - } else { - nfpr = 0; - if (ngpr > 0) ngpr--; else nslots++; - } -#else - if (ngpr > 0) ngpr--; else nslots += 2; -#endif - } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; - return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); -} - -static void asm_setup_target(ASMState *as) -{ - asm_sparejump_setup(as); - asm_exitstub_setup(as); -} - -/* -- Trace patching ------------------------------------------------------ */ - -/* Patch exit jumps of existing machine code to a new target. */ -void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) -{ - MCode *p = T->mcode; - MCode *pe = (MCode *)((char *)p + T->szmcode); - MCode *px = exitstub_trace_addr(T, exitno); - MCode *cstart = NULL, *cstop = NULL; - MCode *mcarea = lj_mcode_patch(J, p, 0); - MCode exitload = MIPSI_LI | MIPSF_T(RID_TMP) | exitno; - MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); - for (p++; p < pe; p++) { - if (*p == exitload) { /* Look for load of exit number. */ - if (((p[-1] ^ (px-p)) & 0xffffu) == 0) { /* Look for exitstub branch. */ - ptrdiff_t delta = target - p; - if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ - patchbranch: - p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu); - *p = MIPSI_NOP; /* Replace the load of the exit number. */ - cstop = p; - if (!cstart) cstart = p-1; - } else { /* Branch out of range. Use spare jump slot in mcarea. */ - int i; - for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) { - if (mcarea[i] == tjump) { - delta = mcarea+i - p; - goto patchbranch; - } else if (mcarea[i] == MIPSI_NOP) { - mcarea[i] = tjump; - cstart = mcarea+i; - delta = mcarea+i - p; - goto patchbranch; - } - } - /* Ignore jump slot overflow. Child trace is simply not attached. */ - } - } else if (p+1 == pe) { - /* Patch NOP after code for inverted loop branch. Use of J is ok. */ - lua_assert(p[1] == MIPSI_NOP); - p[1] = tjump; - *p = MIPSI_NOP; /* Replace the load of the exit number. */ - cstop = p+2; - if (!cstart) cstart = p+1; - } - } - } - if (cstart) lj_mcode_sync(cstart, cstop); - lj_mcode_patch(J, mcarea, 1); -} - diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h deleted file mode 100644 index 6daa861b91..0000000000 --- a/src/lj_asm_ppc.h +++ /dev/null @@ -1,2016 +0,0 @@ -/* -** PPC IR assembler (SSA IR -> machine code). -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Register allocator extensions --------------------------------------- */ - -/* Allocate a register with a hint. */ -static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) -{ - Reg r = IR(ref)->r; - if (ra_noreg(r)) { - if (!ra_hashint(r) && !iscrossref(as, ref)) - ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ - r = ra_allocref(as, ref, allow); - } - ra_noweak(as, r); - return r; -} - -/* Allocate two source registers for three-operand instructions. */ -static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) -{ - IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - Reg left = irl->r, right = irr->r; - if (ra_hasreg(left)) { - ra_noweak(as, left); - if (ra_noreg(right)) - right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); - else - ra_noweak(as, right); - } else if (ra_hasreg(right)) { - ra_noweak(as, right); - left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); - } else if (ra_hashint(right)) { - right = ra_allocref(as, ir->op2, allow); - left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); - } else { - left = ra_allocref(as, ir->op1, allow); - right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); - } - return left | (right << 8); -} - -/* -- Guard handling ------------------------------------------------------ */ - -/* Setup exit stubs after the end of each trace. */ -static void asm_exitstub_setup(ASMState *as, ExitNo nexits) -{ - ExitNo i; - MCode *mxp = as->mctop; - if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) - asm_mclimit(as); - /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */ - for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2); - *--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */ - mxp--; - *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2); - *--mxp = PPCI_MFLR|PPCF_T(RID_TMP); - as->mctop = mxp; -} - -static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) -{ - /* Keep this in-sync with exitstub_trace_addr(). */ - return as->mctop + exitno + 3; -} - -/* Emit conditional branch to exit for guard. */ -static void asm_guardcc(ASMState *as, PPCCC cc) -{ - MCode *target = asm_exitstub_addr(as, as->snapno); - MCode *p = as->mcp; - if (LJ_UNLIKELY(p == as->invmcp)) { - as->loopinv = 1; - *p = PPCI_B | (((target-p) & 0x00ffffffu) << 2); - emit_condbranch(as, PPCI_BC, cc^4, p); - return; - } - emit_condbranch(as, PPCI_BC, cc, target); -} - -/* -- Operand fusion ------------------------------------------------------ */ - -/* Limit linear search to this distance. Avoids O(n^2) behavior. */ -#define CONFLICT_SEARCH_LIM 31 - -/* Check if there's no conflicting instruction between curins and ref. */ -static int noconflict(ASMState *as, IRRef ref, IROp conflict) -{ - IRIns *ir = as->ir; - IRRef i = as->curins; - if (i > ref + CONFLICT_SEARCH_LIM) - return 0; /* Give up, ref is too far away. */ - while (--i > ref) - if (ir[i].o == conflict) - return 0; /* Conflict found. */ - return 1; /* Ok, no conflict. */ -} - -/* Fuse the array base of colocated arrays. */ -static int32_t asm_fuseabase(ASMState *as, IRRef ref) -{ - IRIns *ir = IR(ref); - if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && - !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) - return (int32_t)sizeof(GCtab); - return 0; -} - -/* Indicates load/store indexed is ok. */ -#define AHUREF_LSX ((int32_t)0x80000000) - -/* Fuse array/hash/upvalue reference into register+offset operand. */ -static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) -{ - IRIns *ir = IR(ref); - if (ra_noreg(ir->r)) { - if (ir->o == IR_AREF) { - if (mayfuse(as, ref)) { - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (checki16(ofs)) { - *ofsp = ofs; - return ra_alloc1(as, refa, allow); - } - } - if (*ofsp == AHUREF_LSX) { - Reg base = ra_alloc1(as, ir->op1, allow); - Reg idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - return base | (idx << 8); - } - } - } else if (ir->o == IR_HREFK) { - if (mayfuse(as, ref)) { - int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); - if (checki16(ofs)) { - *ofsp = ofs; - return ra_alloc1(as, ir->op1, allow); - } - } - } else if (ir->o == IR_UREFC) { - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); - int32_t jgl = (intptr_t)J2G(as->J); - if ((uint32_t)(ofs-jgl) < 65536) { - *ofsp = ofs-jgl-32768; - return RID_JGL; - } else { - *ofsp = (int16_t)ofs; - return ra_allock(as, ofs-(int16_t)ofs, allow); - } - } - } - } - *ofsp = 0; - return ra_alloc1(as, ref, allow); -} - -/* Fuse XLOAD/XSTORE reference into load/store operand. */ -static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, - RegSet allow, int32_t ofs) -{ - IRIns *ir = IR(ref); - Reg base; - if (ra_noreg(ir->r) && canfuse(as, ir)) { - if (ir->o == IR_ADD) { - int32_t ofs2; - if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { - ofs = ofs2; - ref = ir->op1; - } else if (ofs == 0) { - Reg right, left = ra_alloc2(as, ir, allow); - right = (left >> 8); left &= 255; - emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right); - return; - } - } else if (ir->o == IR_STRREF) { - lua_assert(ofs == 0); - ofs = (int32_t)sizeof(GCstr); - if (irref_isk(ir->op2)) { - ofs += IR(ir->op2)->i; - ref = ir->op1; - } else if (irref_isk(ir->op1)) { - ofs += IR(ir->op1)->i; - ref = ir->op2; - } else { - /* NYI: Fuse ADD with constant. */ - Reg tmp, right, left = ra_alloc2(as, ir, allow); - right = (left >> 8); left &= 255; - tmp = ra_scratch(as, rset_exclude(rset_exclude(allow, left), right)); - emit_fai(as, pi, rt, tmp, ofs); - emit_tab(as, PPCI_ADD, tmp, left, right); - return; - } - if (!checki16(ofs)) { - Reg left = ra_alloc1(as, ref, allow); - Reg right = ra_allock(as, ofs, rset_exclude(allow, left)); - emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right); - return; - } - } - } - base = ra_alloc1(as, ref, allow); - emit_fai(as, pi, rt, base, ofs); -} - -/* Fuse XLOAD/XSTORE reference into indexed-only load/store operand. */ -static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, - RegSet allow) -{ - IRIns *ira = IR(ref); - Reg right, left; - if (canfuse(as, ira) && ira->o == IR_ADD && ra_noreg(ira->r)) { - left = ra_alloc2(as, ira, allow); - right = (left >> 8); left &= 255; - } else { - right = ra_alloc1(as, ref, allow); - left = RID_R0; - } - emit_tab(as, pi, rt, left, right); -} - -/* Fuse to multiply-add/sub instruction. */ -static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) -{ - IRRef lref = ir->op1, rref = ir->op2; - IRIns *irm; - if (lref != rref && - ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && - ra_noreg(irm->r)) || - (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && - (rref = lref, pi = pir, ra_noreg(irm->r))))) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg add = ra_alloc1(as, rref, RSET_FPR); - Reg right, left = ra_alloc2(as, irm, rset_exclude(RSET_FPR, add)); - right = (left >> 8); left &= 255; - emit_facb(as, pi, dest, left, right, add); - return 1; - } - return 0; -} - -/* -- Calls --------------------------------------------------------------- */ - -/* Generate a call to a C function. */ -static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) -{ - uint32_t n, nargs = CCI_XNARGS(ci); - int32_t ofs = 8; - Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; - if ((void *)ci->func) - emit_call(as, (void *)ci->func); - for (n = 0; n < nargs; n++) { /* Setup args. */ - IRRef ref = args[n]; - if (ref) { - IRIns *ir = IR(ref); - if (irt_isfp(ir->t)) { - if (fpr <= REGARG_LASTFPR) { - lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ - ra_leftov(as, fpr, ref); - fpr++; - } else { - Reg r = ra_alloc1(as, ref, RSET_FPR); - if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; - emit_spstore(as, ir, r, ofs); - ofs += irt_isnum(ir->t) ? 8 : 4; - } - } else { - if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ - ra_leftov(as, gpr, ref); - gpr++; - } else { - Reg r = ra_alloc1(as, ref, RSET_GPR); - emit_spstore(as, ir, r, ofs); - ofs += 4; - } - } - } else { - if (gpr <= REGARG_LASTGPR) - gpr++; - else - ofs += 4; - } - checkmclim(as); - } - if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ - emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); -} - -/* Setup result reg/sp for call. Evict scratch regs. */ -static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - RegSet drop = RSET_SCRATCH; - int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); - if ((ci->flags & CCI_NOFPRCLOBBER)) - drop &= ~RSET_FPR; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - if (hiop && ra_hasreg((ir+1)->r)) - rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ - ra_evictset(as, drop); /* Evictions must be performed first. */ - if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (irt_isfp(ir->t)) { - if ((ci->flags & CCI_CASTU64)) { - /* Use spill slot or temp slots. */ - int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; - Reg dest = ir->r; - if (ra_hasreg(dest)) { - ra_free(as, dest); - ra_modified(as, dest); - emit_fai(as, PPCI_LFD, dest, RID_SP, ofs); - } - emit_tai(as, PPCI_STW, RID_RETHI, RID_SP, ofs); - emit_tai(as, PPCI_STW, RID_RETLO, RID_SP, ofs+4); - } else { - ra_destreg(as, ir, RID_FPRET); - } -#if LJ_32 - } else if (hiop) { - ra_destpair(as, ir); -#endif - } else { - ra_destreg(as, ir, RID_RET); - } - } -} - -static void asm_callx(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX*2]; - CCallInfo ci; - IRRef func; - IRIns *irf; - ci.flags = asm_callx_flags(as, ir); - asm_collectargs(as, ir, &ci, args); - asm_setupresult(as, ir, &ci); - func = ir->op2; irf = IR(func); - if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } - if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)(intptr_t)(irf->i); - } else { /* Need a non-argument register for indirect calls. */ - RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); - Reg freg = ra_alloc1(as, func, allow); - *--as->mcp = PPCI_BCTRL; - *--as->mcp = PPCI_MTCTR | PPCF_T(freg); - ci.func = (ASMFunction)(void *)0; - } - asm_gencall(as, &ci, args); -} - -/* -- Returns ------------------------------------------------------------- */ - -/* Return to lower frame. Guard that it goes to the right spot. */ -static void asm_retf(ASMState *as, IRIns *ir) -{ - Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); - void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); - as->topslot -= (BCReg)delta; - if ((int32_t)as->topslot < 0) as->topslot = 0; - irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - emit_setgl(as, base, jit_base); - emit_addptr(as, base, -8*delta); - asm_guardcc(as, CC_NE); - emit_ab(as, PPCI_CMPW, RID_TMP, - ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); - emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); -} - -/* -- Type conversions ---------------------------------------------------- */ - -static void asm_tointg(ASMState *as, IRIns *ir, Reg left) -{ - RegSet allow = RSET_FPR; - Reg tmp = ra_scratch(as, rset_clear(allow, left)); - Reg fbias = ra_scratch(as, rset_clear(allow, tmp)); - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg hibias = ra_allock(as, 0x43300000, rset_exclude(RSET_GPR, dest)); - asm_guardcc(as, CC_NE); - emit_fab(as, PPCI_FCMPU, 0, tmp, left); - emit_fab(as, PPCI_FSUB, tmp, tmp, fbias); - emit_fai(as, PPCI_LFD, tmp, RID_SP, SPOFS_TMP); - emit_tai(as, PPCI_STW, RID_TMP, RID_SP, SPOFS_TMPLO); - emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); - emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - emit_lsptr(as, PPCI_LFS, (fbias & 31), - (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, tmp, left); -} - -static void asm_tobit(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_FPR; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, allow); - Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); - Reg tmp = ra_scratch(as, rset_clear(allow, right)); - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fab(as, PPCI_FADD, tmp, left, right); -} - -static void asm_conv(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); - int stfp = (st == IRT_NUM || st == IRT_FLOAT); - IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ - if (irt_isfp(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - if (stfp) { /* FP to FP conversion. */ - if (st == IRT_NUM) /* double -> float conversion. */ - emit_fb(as, PPCI_FRSP, dest, ra_alloc1(as, lref, RSET_FPR)); - else /* float -> double conversion is a no-op on PPC. */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } else { /* Integer to FP conversion. */ - /* IRT_INT: Flip hibit, bias with 2^52, subtract 2^52+2^31. */ - /* IRT_U32: Bias with 2^52, subtract 2^52. */ - RegSet allow = RSET_GPR; - Reg left = ra_alloc1(as, lref, allow); - Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); - Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); - emit_fab(as, PPCI_FSUB, dest, dest, fbias); - emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); - emit_lsptr(as, PPCI_LFS, (fbias & 31), - &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31], - rset_clear(allow, hibias)); - emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, - RID_SP, SPOFS_TMPLO); - emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); - if (st != IRT_U32) emit_asi(as, PPCI_XORIS, RID_TMP, left, 0x8000); - } - } else if (stfp) { /* FP to integer conversion. */ - if (irt_isguard(ir->t)) { - /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); - asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, lref, RSET_FPR); - Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - if (irt_isu32(ir->t)) { - /* Convert both x and x-2^31 to int and merge results. */ - Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest)); - emit_asb(as, PPCI_OR, dest, dest, tmpi); /* Select with mask idiom. */ - emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP); - emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP); - emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO); /* tmp = (int)(x) */ - emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000); /* dest += 2^31 */ - emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31); /* mask = -(dest < 0) */ - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_tai(as, PPCI_LWZ, dest, - RID_SP, SPOFS_TMPLO); /* dest = (int)(x-2^31) */ - emit_fb(as, PPCI_FCTIWZ, tmp, left); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, tmp, tmp); - emit_fab(as, PPCI_FSUB, tmp, left, tmp); - emit_lsptr(as, PPCI_LFS, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); - } else { - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, tmp, left); - } - } - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); - if ((ir->op2 & IRCONV_SEXT)) - emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); - else - emit_rot(as, PPCI_RLWINM, dest, left, 0, st == IRT_U8 ? 24 : 16, 31); - } else { /* 32/64 bit integer conversions. */ - /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ - ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ - } - } -} - -static void asm_strto(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; - IRRef args[2]; - int32_t ofs; - RegSet drop = RSET_SCRATCH; - if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ - ra_evictset(as, drop); - asm_guardcc(as, CC_EQ); - emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ - args[0] = ir->op1; /* GCstr *str */ - args[1] = ASMREF_TMP1; /* TValue *n */ - asm_gencall(as, ci, args); - /* Store the result to the spill slot or temp slots. */ - ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; - emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); -} - -/* -- Memory references --------------------------------------------------- */ - -/* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) -{ - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i32ptr(ir_knum(ir)), dest); - else /* Otherwise force a spill and use the spill slot. */ - emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); - } else { - /* Otherwise use g->tmptv to hold the TValue. */ - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; - emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_setgl(as, src, tmptv.gcr); - } - type = ra_allock(as, irt_toitype(ir->t), allow); - emit_setgl(as, type, tmptv.it); - } -} - -static void asm_aref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx, base; - if (irref_isk(ir->op2)) { - IRRef tab = IR(ir->op1)->op1; - int32_t ofs = asm_fuseabase(as, tab); - IRRef refa = ofs ? tab : ir->op1; - ofs += 8*IR(ir->op2)->i; - if (checki16(ofs)) { - base = ra_alloc1(as, refa, RSET_GPR); - emit_tai(as, PPCI_ADDI, dest, base, ofs); - return; - } - } - base = ra_alloc1(as, ir->op1, RSET_GPR); - idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_tab(as, PPCI_ADD, dest, RID_TMP, base); - emit_slwi(as, RID_TMP, idx, 3); -} - -/* Inlined hash lookup. Specialized for key type and for const keys. -** The equivalent C code is: -** Node *n = hashkey(t, key); -** do { -** if (lj_obj_equal(&n->key, key)) return &n->val; -** } while ((n = nextnode(n))); -** return niltv(L); -*/ -static void asm_href(ASMState *as, IRIns *ir, IROp merge) -{ - RegSet allow = RSET_GPR; - int destused = ra_used(ir); - Reg dest = ra_dest(as, ir, allow); - Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = RID_NONE, tmp1 = RID_TMP, tmp2; - Reg tisnum = RID_NONE, tmpnum = RID_NONE; - IRRef refkey = ir->op2; - IRIns *irkey = IR(refkey); - IRType1 kt = irkey->t; - uint32_t khash; - MCLabel l_end, l_loop, l_next; - - rset_clear(allow, tab); - if (irt_isnum(kt)) { - key = ra_alloc1(as, refkey, RSET_FPR); - tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); - tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); - rset_clear(allow, tisnum); - } else if (!irt_ispri(kt)) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } - tmp2 = ra_scratch(as, allow); - rset_clear(allow, tmp2); - - /* Key not found in chain: jump to exit (if merged) or load niltv. */ - l_end = emit_label(as); - as->invmcp = NULL; - if (merge == IR_NE) - asm_guardcc(as, CC_EQ); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); - - /* Follow hash chain until the end. */ - l_loop = --as->mcp; - emit_ai(as, PPCI_CMPWI, dest, 0); - emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(Node, next)); - l_next = emit_label(as); - - /* Type and value comparison. */ - if (merge == IR_EQ) - asm_guardcc(as, CC_EQ); - else - emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); - if (irt_isnum(kt)) { - emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); - emit_condbranch(as, PPCI_BC, CC_GE, l_next); - emit_ab(as, PPCI_CMPLW, tmp1, tisnum); - emit_fai(as, PPCI_LFD, tmpnum, dest, (int32_t)offsetof(Node, key.n)); - } else { - if (!irt_ispri(kt)) { - emit_ab(as, PPCI_CMPW, tmp2, key); - emit_condbranch(as, PPCI_BC, CC_NE, l_next); - } - emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); - if (!irt_ispri(kt)) - emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); - } - emit_tai(as, PPCI_LWZ, tmp1, dest, (int32_t)offsetof(Node, key.it)); - *l_loop = PPCI_BC | PPCF_Y | PPCF_CC(CC_NE) | - (((char *)as->mcp-(char *)l_loop) & 0xffffu); - - /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; - if (khash == 0) { - emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); - } else { - Reg tmphash = tmp1; - if (irref_isk(refkey)) - tmphash = ra_allock(as, khash, allow); - emit_tab(as, PPCI_ADD, dest, dest, tmp1); - emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); - emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); - emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); - if (irref_isk(refkey)) { - /* Nothing to do. */ - } else if (irt_isstr(kt)) { - emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); - } else { /* Must match with hash*() in lj_tab.c. */ - emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); - emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); - emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); - emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); - emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); - if (irt_isnum(kt)) { - int32_t ofs = ra_spill(as, irkey); - emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); - emit_rotlwi(as, dest, tmp1, HASH_ROT1); - emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); - emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); - emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); - } else { - emit_asb(as, PPCI_XOR, tmp2, key, tmp1); - emit_rotlwi(as, dest, tmp1, HASH_ROT1); - emit_tai(as, PPCI_ADDI, tmp1, tmp2, HASH_BIAS); - emit_tai(as, PPCI_ADDIS, tmp2, key, (HASH_BIAS + 32768)>>16); - } - } - } -} - -static void asm_hrefk(ASMState *as, IRIns *ir) -{ - IRIns *kslot = IR(ir->op2); - IRIns *irkey = IR(kslot->op1); - int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); - int32_t kofs = ofs + (int32_t)offsetof(Node, key); - Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; - Reg node = ra_alloc1(as, ir->op1, RSET_GPR); - Reg key = RID_NONE, type = RID_TMP, idx = node; - RegSet allow = rset_exclude(RSET_GPR, node); - lua_assert(ofs % sizeof(Node) == 0); - if (ofs > 32736) { - idx = dest; - rset_clear(allow, dest); - kofs = (int32_t)offsetof(Node, key); - } else if (ra_hasreg(dest)) { - emit_tai(as, PPCI_ADDI, dest, node, ofs); - } - asm_guardcc(as, CC_NE); - if (!irt_ispri(irkey->t)) { - key = ra_scratch(as, allow); - rset_clear(allow, key); - } - rset_clear(allow, type); - if (irt_isnum(irkey->t)) { - emit_cmpi(as, key, (int32_t)ir_knum(irkey)->u32.lo); - asm_guardcc(as, CC_NE); - emit_cmpi(as, type, (int32_t)ir_knum(irkey)->u32.hi); - } else { - if (ra_hasreg(key)) { - emit_cmpi(as, key, irkey->i); /* May use RID_TMP, i.e. type. */ - asm_guardcc(as, CC_NE); - } - emit_ai(as, PPCI_CMPWI, type, irt_toitype(irkey->t)); - } - if (ra_hasreg(key)) emit_tai(as, PPCI_LWZ, key, idx, kofs+4); - emit_tai(as, PPCI_LWZ, type, idx, kofs); - if (ofs > 32736) { - emit_tai(as, PPCI_ADDIS, dest, dest, (ofs + 32768) >> 16); - emit_tai(as, PPCI_ADDI, dest, node, ofs); - } -} - -static void asm_uref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op1)) { - GCfunc *fn = ir_kfunc(IR(ir->op1)); - MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR); - } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); - emit_ai(as, PPCI_CMPWI, RID_TMP, 1); - emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv)); - emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); - } else { - emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v)); - } - emit_tai(as, PPCI_LWZ, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); - } -} - -static void asm_fref(ASMState *as, IRIns *ir) -{ - UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); -} - -static void asm_strref(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - IRRef ref = ir->op2, refk = ir->op1; - int32_t ofs = (int32_t)sizeof(GCstr); - Reg r; - if (irref_isk(ref)) { - IRRef tmp = refk; refk = ref; ref = tmp; - } else if (!irref_isk(refk)) { - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - IRIns *irr = IR(ir->op2); - if (ra_hasreg(irr->r)) { - ra_noweak(as, irr->r); - right = irr->r; - } else if (mayfuse(as, irr->op2) && - irr->o == IR_ADD && irref_isk(irr->op2) && - checki16(ofs + IR(irr->op2)->i)) { - ofs += IR(irr->op2)->i; - right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); - } else { - right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - emit_tai(as, PPCI_ADDI, dest, dest, ofs); - emit_tab(as, PPCI_ADD, dest, left, right); - return; - } - r = ra_alloc1(as, ref, RSET_GPR); - ofs += IR(refk)->i; - if (checki16(ofs)) - emit_tai(as, PPCI_ADDI, dest, r, ofs); - else - emit_tab(as, PPCI_ADD, dest, r, - ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); -} - -/* -- Loads and stores ---------------------------------------------------- */ - -static PPCIns asm_fxloadins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ - case IRT_U8: return PPCI_LBZ; - case IRT_I16: return PPCI_LHA; - case IRT_U16: return PPCI_LHZ; - case IRT_NUM: return PPCI_LFD; - case IRT_FLOAT: return PPCI_LFS; - default: return PPCI_LWZ; - } -} - -static PPCIns asm_fxstoreins(IRIns *ir) -{ - switch (irt_type(ir->t)) { - case IRT_I8: case IRT_U8: return PPCI_STB; - case IRT_I16: case IRT_U16: return PPCI_STH; - case IRT_NUM: return PPCI_STFD; - case IRT_FLOAT: return PPCI_STFS; - default: return PPCI_STW; - } -} - -static void asm_fload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - PPCIns pi = asm_fxloadins(ir); - Reg idx; - int32_t ofs; - if (ir->op1 == REF_NIL) { - idx = RID_JGL; - ofs = (ir->op2 << 2) - 32768; - } else { - idx = ra_alloc1(as, ir->op1, RSET_GPR); - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_tai(as, PPCI_ADDI, dest, idx, ofs); - return; - } - } - ofs = field_ofs[ir->op2]; - } - lua_assert(!irt_isi8(ir->t)); - emit_tai(as, pi, dest, idx, ofs); -} - -static void asm_fstore(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - Reg src = ra_alloc1(as, ir->op2, RSET_GPR); - IRIns *irf = IR(ir->op1); - Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); - int32_t ofs = field_ofs[irf->op2]; - PPCIns pi = asm_fxstoreins(ir); - emit_tai(as, pi, src, idx, ofs); - } -} - -static void asm_xload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - if (irt_isi8(ir->t)) - emit_as(as, PPCI_EXTSB, dest, dest); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); -} - -static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) -{ - IRIns *irb; - if (ir->r == RID_SINK) - return; - if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && - ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { - /* Fuse BSWAP with XSTORE to stwbrx. */ - Reg src = ra_alloc1(as, irb->op1, RSET_GPR); - asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); - } else { - Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, - rset_exclude(RSET_GPR, src), ofs); - } -} - -#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) - -static void asm_ahuvload(ASMState *as, IRIns *ir) -{ - IRType1 t = ir->t; - Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; - RegSet allow = RSET_GPR; - int32_t ofs = AHUREF_LSX; - if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - if (!irt_isnum(t)) ofs = 0; - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); - rset_clear(allow, dest); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (irt_isnum(t)) { - Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx)); - asm_guardcc(as, CC_GE); - emit_ab(as, PPCI_CMPLW, type, tisnum); - if (ra_hasreg(dest)) { - if (ofs == AHUREF_LSX) { - tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, - (idx&255)), (idx>>8))); - emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); - } else { - emit_fai(as, PPCI_LFD, dest, idx, ofs); - } - } - } else { - asm_guardcc(as, CC_NE); - emit_ai(as, PPCI_CMPWI, type, irt_toitype(t)); - if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, idx, ofs+4); - } - if (ofs == AHUREF_LSX) { - emit_tab(as, PPCI_LWZX, type, (idx&255), tmp); - emit_slwi(as, tmp, (idx>>8), 3); - } else { - emit_tai(as, PPCI_LWZ, type, idx, ofs); - } -} - -static void asm_ahustore(ASMState *as, IRIns *ir) -{ - RegSet allow = RSET_GPR; - Reg idx, src = RID_NONE, type = RID_NONE; - int32_t ofs = AHUREF_LSX; - if (ir->r == RID_SINK) - return; - if (irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); - } else { - if (!irt_ispri(ir->t)) { - src = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, src); - ofs = 0; - } - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - rset_clear(allow, type); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (irt_isnum(ir->t)) { - if (ofs == AHUREF_LSX) { - emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); - emit_slwi(as, RID_TMP, (idx>>8), 3); - } else { - emit_fai(as, PPCI_STFD, src, idx, ofs); - } - } else { - if (ra_hasreg(src)) - emit_tai(as, PPCI_STW, src, idx, ofs+4); - if (ofs == AHUREF_LSX) { - emit_tab(as, PPCI_STWX, type, (idx&255), RID_TMP); - emit_slwi(as, RID_TMP, (idx>>8), 3); - } else { - emit_tai(as, PPCI_STW, type, idx, ofs); - } - } -} - -static void asm_sload(ASMState *as, IRIns *ir) -{ - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 0 : 4); - IRType1 t = ir->t; - Reg dest = RID_NONE, type = RID_NONE, base; - RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(LJ_DUALNUM || - !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); - if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { - dest = ra_scratch(as, RSET_FPR); - asm_tointg(as, ir, dest); - t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); - rset_clear(allow, dest); - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); - if ((ir->op2 & IRSLOAD_CONVERT)) { - if (irt_isint(t)) { - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - dest = ra_scratch(as, RSET_FPR); - emit_fai(as, PPCI_STFD, dest, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, dest, dest); - t.irt = IRT_NUM; /* Check for original type. */ - } else { - Reg tmp = ra_scratch(as, allow); - Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, tmp)); - Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - emit_fab(as, PPCI_FSUB, dest, dest, fbias); - emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); - emit_lsptr(as, PPCI_LFS, (fbias & 31), - (void *)&as->J->k32[LJ_K32_2P52_2P31], - rset_clear(allow, hibias)); - emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); - emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); - emit_asi(as, PPCI_XORIS, tmp, tmp, 0x8000); - dest = tmp; - t.irt = IRT_INT; /* Check for original type. */ - } - } - goto dotypecheck; - } - base = ra_alloc1(as, REF_BASE, allow); - rset_clear(allow, base); -dotypecheck: - if (irt_isnum(t)) { - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); - asm_guardcc(as, CC_GE); - emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); - type = RID_TMP; - } - if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); - } else { - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - asm_guardcc(as, CC_NE); - emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t)); - type = RID_TMP; - } - if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, base, ofs); - } - if (ra_hasreg(type)) emit_tai(as, PPCI_LWZ, type, base, ofs-4); -} - -/* -- Allocations --------------------------------------------------------- */ - -#if LJ_HASFFI -static void asm_cnew(ASMState *as, IRIns *ir) -{ - CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID id = (CTypeID)IR(ir->op1)->i; - CTSize sz; - CTInfo info = lj_ctype_info(cts, id, &sz); - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[4]; - RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); - - as->gcsteps++; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); - if (ra_used(ir)) - ra_destreg(as, ir, RID_RET); /* GCcdata * */ - - /* Initialize immutable cdata object. */ - if (ir->o == IR_CNEWI) { - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); - int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); - if (sz == 8) { - ofs += 4; - lua_assert((ir+1)->o == IR_HIOP); - } - for (;;) { - Reg r = ra_alloc1(as, ir->op2, allow); - emit_tai(as, PPCI_STW, r, RID_RET, ofs); - rset_clear(allow, r); - if (ofs == sizeof(GCcdata)) break; - ofs -= 4; ir++; - } - } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ - ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* CTypeID id */ - args[2] = ir->op2; /* CTSize sz */ - args[3] = ASMREF_TMP1; /* CTSize align */ - asm_gencall(as, ci, args); - emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); - return; - } - - /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ - emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); - emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); - emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); - emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ - asm_gencall(as, ci, args); - ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), - ra_releasetmp(as, ASMREF_TMP1)); -} -#else -#define asm_cnew(as, ir) ((void)0) -#endif - -/* -- Write barriers ------------------------------------------------------ */ - -static void asm_tbar(ASMState *as, IRIns *ir) -{ - Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); - Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); - Reg link = RID_TMP; - MCLabel l_end = emit_label(as); - emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); - emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); - emit_setgl(as, tab, gc.grayagain); - lua_assert(LJ_GC_BLACK == 0x04); - emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ - emit_getgl(as, link, gc.grayagain); - emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); - emit_asi(as, PPCI_ANDIDOT, RID_TMP, mark, LJ_GC_BLACK); - emit_tai(as, PPCI_LBZ, mark, tab, (int32_t)offsetof(GCtab, marked)); -} - -static void asm_obar(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; - IRRef args[2]; - MCLabel l_end; - Reg obj, val, tmp; - /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ir->op1; /* TValue *tv */ - asm_gencall(as, ci, args); - emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); - obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); - emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); - emit_asi(as, PPCI_ANDIDOT, tmp, tmp, LJ_GC_BLACK); - emit_condbranch(as, PPCI_BC, CC_EQ, l_end); - emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, LJ_GC_WHITES); - val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); - emit_tai(as, PPCI_LBZ, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); - emit_tai(as, PPCI_LBZ, RID_TMP, val, (int32_t)offsetof(GChead, marked)); -} - -/* -- Arithmetic and logic operations ------------------------------------- */ - -static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - if (pi == PPCI_FMUL) - emit_fac(as, pi, dest, left, right); - else - emit_fab(as, pi, dest, left, right); -} - -static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) -{ - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); - emit_fb(as, pi, dest, left); -} - -static void asm_fpmath(ASMState *as, IRIns *ir) -{ - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; - if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) - asm_fpunary(as, ir, PPCI_FSQRT); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); -} - -static void asm_add(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) - asm_fparith(as, ir, PPCI_FADD); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - PPCIns pi; - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (checki16(k)) { - pi = PPCI_ADDI; - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi = PPCI_ADDICDOT; - } - emit_tai(as, pi, dest, left, k); - return; - } else if ((k & 0xffff) == 0) { - emit_tai(as, PPCI_ADDIS, dest, left, (k >> 16)); - return; - } else if (!as->sectref) { - emit_tai(as, PPCI_ADDIS, dest, dest, (k + 32768) >> 16); - emit_tai(as, PPCI_ADDI, dest, left, k); - return; - } - } - pi = PPCI_ADD; - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_tab(as, pi, dest, left, right); - } -} - -static void asm_sub(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) - asm_fparith(as, ir, PPCI_FSUB); - } else { - PPCIns pi = PPCI_SUBF; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left, right; - if (irref_isk(ir->op1)) { - int32_t k = IR(ir->op1)->i; - if (checki16(k)) { - right = ra_alloc1(as, ir->op2, RSET_GPR); - emit_tai(as, PPCI_SUBFIC, dest, right, k); - return; - } - } - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_tab(as, pi, dest, right, left); /* Subtract right _from_ left. */ - } -} - -static void asm_mul(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, PPCI_FMUL); - } else { - PPCIns pi = PPCI_MULLW; - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (checki16(k)) { - emit_tai(as, PPCI_MULLI, dest, left, k); - return; - } - } - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_tab(as, pi, dest, left, right); - } -} - -#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV) -#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) -#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) - -static void asm_neg(ASMState *as, IRIns *ir) -{ - if (irt_isnum(ir->t)) { - asm_fpunary(as, ir, PPCI_FNEG); - } else { - Reg dest, left; - PPCIns pi = PPCI_NEG; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - dest = ra_dest(as, ir, RSET_GPR); - left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_tab(as, pi, dest, left, 0); - } -} - -#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) - -static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) -{ - Reg dest, left, right; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - } - asm_guardcc(as, CC_SO); - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (pi == PPCI_SUBFO) { Reg tmp = left; left = right; right = tmp; } - emit_tab(as, pi|PPCF_DOT, dest, left, right); -} - -#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO) -#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO) -#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO) - -#if LJ_HASFFI -static void asm_add64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); - PPCIns pi = PPCI_ADDE; - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (k == 0) - pi = PPCI_ADDZE; - else if (k == -1) - pi = PPCI_ADDME; - else - goto needright; - right = 0; - } else { - needright: - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - } - emit_tab(as, pi, dest, left, right); - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (checki16(k)) { - emit_tai(as, PPCI_ADDIC, dest, left, k); - return; - } - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_tab(as, PPCI_ADDC, dest, left, right); -} - -static void asm_sub64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left, right = ra_alloc1(as, ir->op2, RSET_GPR); - PPCIns pi = PPCI_SUBFE; - if (irref_isk(ir->op1)) { - int32_t k = IR(ir->op1)->i; - if (k == 0) - pi = PPCI_SUBFZE; - else if (k == -1) - pi = PPCI_SUBFME; - else - goto needleft; - left = 0; - } else { - needleft: - left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right)); - } - emit_tab(as, pi, dest, right, left); /* Subtract right _from_ left. */ - ir--; - dest = ra_dest(as, ir, RSET_GPR); - right = ra_alloc1(as, ir->op2, RSET_GPR); - if (irref_isk(ir->op1)) { - int32_t k = IR(ir->op1)->i; - if (checki16(k)) { - emit_tai(as, PPCI_SUBFIC, dest, right, k); - return; - } - } - left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right)); - emit_tab(as, PPCI_SUBFC, dest, right, left); -} - -static void asm_neg64(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_tab(as, PPCI_SUBFZE, dest, left, 0); - ir--; - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - emit_tai(as, PPCI_SUBFIC, dest, left, 0); -} -#endif - -static void asm_bnot(ASMState *as, IRIns *ir) -{ - Reg dest, left, right; - PPCIns pi = PPCI_NOR; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - dest = ra_dest(as, ir, RSET_GPR); - if (mayfuse(as, ir->op1)) { - IRIns *irl = IR(ir->op1); - if (irl->o == IR_BAND) - pi ^= (PPCI_NOR ^ PPCI_NAND); - else if (irl->o == IR_BXOR) - pi ^= (PPCI_NOR ^ PPCI_EQV); - else if (irl->o != IR_BOR) - goto nofuse; - left = ra_hintalloc(as, irl->op1, dest, RSET_GPR); - right = ra_alloc1(as, irl->op2, rset_exclude(RSET_GPR, left)); - } else { -nofuse: - left = right = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - } - emit_asb(as, pi, dest, left, right); -} - -static void asm_bswap(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - IRIns *irx; - if (mayfuse(as, ir->op1) && (irx = IR(ir->op1))->o == IR_XLOAD && - ra_noreg(irx->r) && (irt_isint(irx->t) || irt_isu32(irx->t))) { - /* Fuse BSWAP with XLOAD to lwbrx. */ - asm_fusexrefx(as, PPCI_LWBRX, dest, irx->op1, RSET_GPR); - } else { - Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - Reg tmp = dest; - if (tmp == left) { - tmp = RID_TMP; - emit_mr(as, dest, RID_TMP); - } - emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23); - emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7); - emit_rotlwi(as, tmp, left, 8); - } -} - -/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ -static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) -{ - IRIns *ir; - Reg left; - if (mayfuse(as, ref) && (ir = IR(ref), ra_noreg(ir->r)) && - irref_isk(ir->op2) && ir->o >= IR_BSHL && ir->o <= IR_BROR) { - int32_t sh = (IR(ir->op2)->i & 31); - switch (ir->o) { - case IR_BSHL: - if ((mask & ((1u<>sh))) goto nofuse; - sh = ((32-sh)&31); - break; - case IR_BROL: - break; - default: - goto nofuse; - } - left = ra_alloc1(as, ir->op1, RSET_GPR); - *--as->mcp = pi | PPCF_T(left) | PPCF_B(sh); - return; - } -nofuse: - left = ra_alloc1(as, ref, RSET_GPR); - *--as->mcp = pi | PPCF_T(left); -} - -static void asm_band(ASMState *as, IRIns *ir) -{ - Reg dest, left, right; - IRRef lref = ir->op1; - PPCIns dot = 0; - IRRef op2; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - dot = PPCF_DOT; - } - dest = ra_dest(as, ir, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - if (k) { - /* First check for a contiguous bitmask as used by rlwinm. */ - uint32_t s1 = lj_ffs((uint32_t)k); - uint32_t k1 = ((uint32_t)k >> s1); - if ((k1 & (k1+1)) == 0) { - asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) | - PPCF_MB(31-lj_fls((uint32_t)k)) | PPCF_ME(31-s1), - k, lref); - return; - } - if (~(uint32_t)k) { - uint32_t s2 = lj_ffs(~(uint32_t)k); - uint32_t k2 = (~(uint32_t)k >> s2); - if ((k2 & (k2+1)) == 0) { - asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) | - PPCF_MB(32-s2) | PPCF_ME(30-lj_fls(~(uint32_t)k)), - k, lref); - return; - } - } - } - if (checku16(k)) { - left = ra_alloc1(as, lref, RSET_GPR); - emit_asi(as, PPCI_ANDIDOT, dest, left, k); - return; - } else if ((k & 0xffff) == 0) { - left = ra_alloc1(as, lref, RSET_GPR); - emit_asi(as, PPCI_ANDISDOT, dest, left, (k >> 16)); - return; - } - } - op2 = ir->op2; - if (mayfuse(as, op2) && IR(op2)->o == IR_BNOT && ra_noreg(IR(op2)->r)) { - dot ^= (PPCI_AND ^ PPCI_ANDC); - op2 = IR(op2)->op1; - } - left = ra_hintalloc(as, lref, dest, RSET_GPR); - right = ra_alloc1(as, op2, rset_exclude(RSET_GPR, left)); - emit_asb(as, PPCI_AND ^ dot, dest, left, right); -} - -static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - Reg tmp = left; - if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { - if (!checku16(k)) { - emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); - if ((k & 0xffff) == 0) return; - } - emit_asi(as, pik, dest, left, k); - return; - } - } - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_asb(as, pi, dest, left, right); -} - -#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI) -#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI) - -static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) -{ - Reg dest, left; - Reg dot = 0; - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - dot = PPCF_DOT; - } - dest = ra_dest(as, ir, RSET_GPR); - left = ra_alloc1(as, ir->op1, RSET_GPR); - if (irref_isk(ir->op2)) { /* Constant shifts. */ - int32_t shift = (IR(ir->op2)->i & 31); - if (pik == 0) /* SLWI */ - emit_rot(as, PPCI_RLWINM|dot, dest, left, shift, 0, 31-shift); - else if (pik == 1) /* SRWI */ - emit_rot(as, PPCI_RLWINM|dot, dest, left, (32-shift)&31, shift, 31); - else - emit_asb(as, pik|dot, dest, left, shift); - } else { - Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_asb(as, pi|dot, dest, left, right); - } -} - -#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0) -#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1) -#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI) -#define asm_brol(as, ir) \ - asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ - PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) -#define asm_bror(as, ir) lua_assert(0) - -static void asm_min_max(ASMState *as, IRIns *ir, int ismax) -{ - if (irt_isnum(ir->t)) { - Reg dest = ra_dest(as, ir, RSET_FPR); - Reg tmp = dest; - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - if (tmp == left || tmp == right) - tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, - dest), left), right)); - emit_facb(as, PPCI_FSEL, dest, tmp, - ismax ? left : right, ismax ? right : left); - emit_fab(as, PPCI_FSUB, tmp, left, right); - } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg tmp1 = RID_TMP, tmp2 = dest; - Reg right, left = ra_alloc2(as, ir, RSET_GPR); - right = (left >> 8); left &= 255; - if (tmp2 == left || tmp2 == right) - tmp2 = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, - dest), left), right)); - emit_tab(as, PPCI_ADD, dest, tmp2, right); - emit_asb(as, ismax ? PPCI_ANDC : PPCI_AND, tmp2, tmp2, tmp1); - emit_tab(as, PPCI_SUBFE, tmp1, tmp1, tmp1); - emit_tab(as, PPCI_SUBFC, tmp2, tmp2, tmp1); - emit_asi(as, PPCI_XORIS, tmp2, right, 0x8000); - emit_asi(as, PPCI_XORIS, tmp1, left, 0x8000); - } -} - -#define asm_min(as, ir) asm_min_max(as, ir, 0) -#define asm_max(as, ir) asm_min_max(as, ir, 1) - -/* -- Comparisons --------------------------------------------------------- */ - -#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ -#define CC_TWO 0x80 /* Check two flags for FP comparison. */ - -/* Map of comparisons to flags. ORDER IR. */ -static const uint8_t asm_compmap[IR_ABC+1] = { - /* op int cc FP cc */ - /* LT */ CC_GE + (CC_GE<<4), - /* GE */ CC_LT + (CC_LE<<4) + CC_TWO, - /* LE */ CC_GT + (CC_GE<<4) + CC_TWO, - /* GT */ CC_LE + (CC_LE<<4), - /* ULT */ CC_GE + CC_UNSIGNED + (CC_GT<<4) + CC_TWO, - /* UGE */ CC_LT + CC_UNSIGNED + (CC_LT<<4), - /* ULE */ CC_GT + CC_UNSIGNED + (CC_GT<<4), - /* UGT */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO, - /* EQ */ CC_NE + (CC_NE<<4), - /* NE */ CC_EQ + (CC_EQ<<4), - /* ABC */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO /* Same as UGT. */ -}; - -static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) -{ - Reg right, left = ra_alloc1(as, lref, RSET_GPR); - if (irref_isk(rref)) { - int32_t k = IR(rref)->i; - if ((cc & CC_UNSIGNED) == 0) { /* Signed comparison with constant. */ - if (checki16(k)) { - emit_tai(as, PPCI_CMPWI, cr, left, k); - /* Signed comparison with zero and referencing previous ins? */ - if (k == 0 && lref == as->curins-1) - as->flagmcp = as->mcp; /* Allow elimination of the compare. */ - return; - } else if ((cc & 3) == (CC_EQ & 3)) { /* Use CMPLWI for EQ or NE. */ - if (checku16(k)) { - emit_tai(as, PPCI_CMPLWI, cr, left, k); - return; - } else if (!as->sectref && ra_noreg(IR(rref)->r)) { - emit_tai(as, PPCI_CMPLWI, cr, RID_TMP, k); - emit_asi(as, PPCI_XORIS, RID_TMP, left, (k >> 16)); - return; - } - } - } else { /* Unsigned comparison with constant. */ - if (checku16(k)) { - emit_tai(as, PPCI_CMPLWI, cr, left, k); - return; - } - } - } - right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left)); - emit_tab(as, (cc & CC_UNSIGNED) ? PPCI_CMPLW : PPCI_CMPW, cr, left, right); -} - -static void asm_comp(ASMState *as, IRIns *ir) -{ - PPCCC cc = asm_compmap[ir->o]; - if (irt_isnum(ir->t)) { - Reg right, left = ra_alloc2(as, ir, RSET_FPR); - right = (left >> 8); left &= 255; - asm_guardcc(as, (cc >> 4)); - if ((cc & CC_TWO)) - emit_tab(as, PPCI_CROR, ((cc>>4)&3), ((cc>>4)&3), (CC_EQ&3)); - emit_fab(as, PPCI_FCMPU, 0, left, right); - } else { - IRRef lref = ir->op1, rref = ir->op2; - if (irref_isk(lref) && !irref_isk(rref)) { - /* Swap constants to the right (only for ABC). */ - IRRef tmp = lref; lref = rref; rref = tmp; - if ((cc & 2) == 0) cc ^= 1; /* LT <-> GT, LE <-> GE */ - } - asm_guardcc(as, cc); - asm_intcomp_(as, lref, rref, 0, cc); - } -} - -#define asm_equal(as, ir) asm_comp(as, ir) - -#if LJ_HASFFI -/* 64 bit integer comparisons. */ -static void asm_comp64(ASMState *as, IRIns *ir) -{ - PPCCC cc = asm_compmap[(ir-1)->o]; - if ((cc&3) == (CC_EQ&3)) { - asm_guardcc(as, cc); - emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CROR, - (CC_EQ&3), (CC_EQ&3), 4+(CC_EQ&3)); - } else { - asm_guardcc(as, CC_EQ); - emit_tab(as, PPCI_CROR, (CC_EQ&3), (CC_EQ&3), ((cc^~(cc>>2))&1)); - emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CRANDC, - (CC_EQ&3), (CC_EQ&3), 4+(cc&3)); - } - /* Loword comparison sets cr1 and is unsigned, except for equality. */ - asm_intcomp_(as, (ir-1)->op1, (ir-1)->op2, 4, - cc | ((cc&3) == (CC_EQ&3) ? 0 : CC_UNSIGNED)); - /* Hiword comparison sets cr0. */ - asm_intcomp_(as, ir->op1, ir->op2, 0, cc); - as->flagmcp = NULL; /* Doesn't work here. */ -} -#endif - -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ - -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ -static void asm_hiop(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - /* HIOP is marked as a store because it needs its own DCE logic. */ - int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ - if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; - if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ - as->curins--; /* Always skip the CONV. */ - if (usehi || uselo) - asm_conv64(as, ir); - return; - } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ - as->curins--; /* Always skip the loword comparison. */ - asm_comp64(as, ir); - return; - } else if ((ir-1)->o == IR_XSTORE) { - as->curins--; /* Handle both stores here. */ - if ((ir-1)->r != RID_SINK) { - asm_xstore_(as, ir, 0); - asm_xstore_(as, ir-1, 4); - } - return; - } - if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ - switch ((ir-1)->o) { - case IR_ADD: as->curins--; asm_add64(as, ir); break; - case IR_SUB: as->curins--; asm_sub64(as, ir); break; - case IR_NEG: as->curins--; asm_neg64(as, ir); break; - case IR_CALLN: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; - case IR_CNEWI: - /* Nothing to do here. Handled by lo op itself. */ - break; - default: lua_assert(0); break; - } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ -#endif -} - -/* -- Profiling ----------------------------------------------------------- */ - -static void asm_prof(ASMState *as, IRIns *ir) -{ - UNUSED(ir); - asm_guardcc(as, CC_NE); - emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE); - emit_lsglptr(as, PPCI_LBZ, RID_TMP, - (int32_t)offsetof(global_State, hookmask)); -} - -/* -- Stack handling ------------------------------------------------------ */ - -/* Check Lua stack size for overflow. Use exit handler as fallback. */ -static void asm_stack_check(ASMState *as, BCReg topslot, - IRIns *irp, RegSet allow, ExitNo exitno) -{ - /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */ - Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; - rset_clear(allow, pbase); - tmp = allow ? rset_pickbot(allow) : - (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); - emit_condbranch(as, PPCI_BC, CC_LT, asm_exitstub_addr(as, exitno)); - if (allow == RSET_EMPTY) /* Restore temp. register. */ - emit_tai(as, PPCI_LWZ, tmp, RID_SP, SPOFS_TMPW); - else - ra_modified(as, tmp); - emit_ai(as, PPCI_CMPLWI, RID_TMP, (int32_t)(8*topslot)); - emit_tab(as, PPCI_SUBF, RID_TMP, pbase, tmp); - emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); - if (pbase == RID_TMP) - emit_getgl(as, RID_TMP, jit_base); - emit_getgl(as, tmp, cur_L); - if (allow == RSET_EMPTY) /* Spill temp. register. */ - emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); -} - -/* Restore Lua stack from on-trace state. */ -static void asm_stack_restore(ASMState *as, SnapShot *snap) -{ - SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; - MSize n, nent = snap->nent; - /* Store the value of all modified slots to the Lua stack. */ - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); - IRRef ref = snap_ref(sn); - IRIns *ir = IR(ref); - if ((sn & SNAP_NORESTORE)) - continue; - if (irt_isnum(ir->t)) { - Reg src = ra_alloc1(as, ref, RSET_FPR); - emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); - } else { - Reg type; - RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - rset_clear(allow, src); - emit_tai(as, PPCI_STW, src, RID_BASE, ofs+4); - } - if ((sn & (SNAP_CONT|SNAP_FRAME))) { - if (s == 0) continue; /* Do not overwrite link to previous frame. */ - type = ra_allock(as, (int32_t)(*flinks--), allow); - } else { - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); - } - emit_tai(as, PPCI_STW, type, RID_BASE, ofs); - } - checkmclim(as); - } - lua_assert(map + nent == flinks); -} - -/* -- GC handling --------------------------------------------------------- */ - -/* Check GC threshold and do one or more GC steps. */ -static void asm_gc_check(ASMState *as) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; - IRRef args[2]; - MCLabel l_end; - Reg tmp; - ra_evictset(as, RSET_SCRATCH); - l_end = emit_label(as); - /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ - asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ - emit_ai(as, PPCI_CMPWI, RID_RET, 0); - args[0] = ASMREF_TMP1; /* global_State *g */ - args[1] = ASMREF_TMP2; /* MSize steps */ - asm_gencall(as, ci, args); - emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); - tmp = ra_releasetmp(as, ASMREF_TMP2); - emit_loadi(as, tmp, as->gcsteps); - /* Jump around GC step if GC total < GC threshold. */ - emit_condbranch(as, PPCI_BC|PPCF_Y, CC_LT, l_end); - emit_ab(as, PPCI_CMPLW, RID_TMP, tmp); - emit_getgl(as, tmp, gc.threshold); - emit_getgl(as, RID_TMP, gc.total); - as->gcsteps = 0; - checkmclim(as); -} - -/* -- Loop handling ------------------------------------------------------- */ - -/* Fixup the loop branch. */ -static void asm_loop_fixup(ASMState *as) -{ - MCode *p = as->mctop; - MCode *target = as->mcp; - if (as->loopinv) { /* Inverted loop branch? */ - /* asm_guardcc already inverted the cond branch and patched the final b. */ - p[-2] = (p[-2] & (0xffff0000u & ~PPCF_Y)) | (((target-p+2) & 0x3fffu) << 2); - } else { - p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); - } -} - -/* -- Head of trace ------------------------------------------------------- */ - -/* Coalesce BASE register for a root trace. */ -static void asm_head_root_base(ASMState *as) -{ - IRIns *ir = IR(REF_BASE); - Reg r = ir->r; - if (ra_hasreg(r)) { - ra_free(as, r); - if (rset_test(as->modset, r) || irt_ismarked(ir->t)) - ir->r = RID_INIT; /* No inheritance for modified BASE register. */ - if (r != RID_BASE) - emit_mr(as, r, RID_BASE); - } -} - -/* Coalesce BASE register for a side trace. */ -static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) -{ - IRIns *ir = IR(REF_BASE); - Reg r = ir->r; - if (ra_hasreg(r)) { - ra_free(as, r); - if (rset_test(as->modset, r) || irt_ismarked(ir->t)) - ir->r = RID_INIT; /* No inheritance for modified BASE register. */ - if (irp->r == r) { - rset_clear(allow, r); /* Mark same BASE register as coalesced. */ - } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { - rset_clear(allow, irp->r); - emit_mr(as, r, irp->r); /* Move from coalesced parent reg. */ - } else { - emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ - } - } - return allow; -} - -/* -- Tail of trace ------------------------------------------------------- */ - -/* Fixup the tail code. */ -static void asm_tail_fixup(ASMState *as, TraceNo lnk) -{ - MCode *p = as->mctop; - MCode *target; - int32_t spadj = as->T->spadjust; - if (spadj == 0) { - *--p = PPCI_NOP; - *--p = PPCI_NOP; - as->mctop = p; - } else { - /* Patch stack adjustment. */ - lua_assert(checki16(CFRAME_SIZE+spadj)); - p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); - p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; - } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); -} - -/* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) -{ - MCode *p = as->mctop - 1; /* Leave room for exit branch. */ - if (as->loopref) { - as->invmcp = as->mcp = p; - } else { - as->mcp = p-2; /* Leave room for stack pointer adjustment. */ - as->invmcp = NULL; - } -} - -/* -- Trace setup --------------------------------------------------------- */ - -/* Ensure there are enough stack slots for call arguments. */ -static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) -{ - IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = CCI_XNARGS(ci); - int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; - asm_collectargs(as, ir, ci, args); - for (i = 0; i < nargs; i++) - if (args[i] && irt_isfp(IR(args[i])->t)) { - if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; - } else { - if (ngpr > 0) ngpr--; else nslots++; - } - if (nslots > as->evenspill) /* Leave room for args in stack slots. */ - as->evenspill = nslots; - return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); -} - -static void asm_setup_target(ASMState *as) -{ - asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); -} - -/* -- Trace patching ------------------------------------------------------ */ - -/* Patch exit jumps of existing machine code to a new target. */ -void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) -{ - MCode *p = T->mcode; - MCode *pe = (MCode *)((char *)p + T->szmcode); - MCode *px = exitstub_trace_addr(T, exitno); - MCode *cstart = NULL; - MCode *mcarea = lj_mcode_patch(J, p, 0); - int clearso = 0; - for (; p < pe; p++) { - /* Look for exitstub branch, try to replace with branch to target. */ - uint32_t ins = *p; - if ((ins & 0xfc000000u) == 0x40000000u && - ((ins ^ ((char *)px-(char *)p)) & 0xffffu) == 0) { - ptrdiff_t delta = (char *)target - (char *)p; - if (((ins >> 16) & 3) == (CC_SO&3)) { - clearso = sizeof(MCode); - delta -= sizeof(MCode); - } - /* Many, but not all short-range branches can be patched directly. */ - if (((delta + 0x8000) >> 16) == 0) { - *p = (ins & 0xffdf0000u) | ((uint32_t)delta & 0xffffu) | - ((delta & 0x8000) * (PPCF_Y/0x8000)); - if (!cstart) cstart = p; - } - } else if ((ins & 0xfc000000u) == PPCI_B && - ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { - ptrdiff_t delta = (char *)target - (char *)p; - lua_assert(((delta + 0x02000000) >> 26) == 0); - *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); - if (!cstart) cstart = p; - } - } - { /* Always patch long-range branch in exit stub itself. */ - ptrdiff_t delta = (char *)target - (char *)px - clearso; - lua_assert(((delta + 0x02000000) >> 26) == 0); - *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); - } - if (!cstart) cstart = px; - lj_mcode_sync(cstart, px+1); - if (clearso) { /* Extend the current trace. Ugly workaround. */ - MCode *pp = J->cur.mcode; - J->cur.szmcode += sizeof(MCode); - *--pp = PPCI_MCRXR; /* Clear SO flag. */ - J->cur.mcode = pp; - lj_mcode_sync(pp, pp+1); - } - lj_mcode_patch(J, mcarea, 1); -} - diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index fceb187758..fa2f0e073a 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -21,14 +21,6 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) } /* Push the high byte of the exitno for each exit stub group. */ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); -#if !LJ_GC64 - /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ - *mxp++ = XI_MOVmi; - *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); - *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); - *mxp++ = 2*sizeof(void *); - *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; -#endif /* Jump to exit handler which fills in the ExitState. */ *mxp++ = XI_JMP; mxp += 4; *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); @@ -64,13 +56,13 @@ static void asm_guardcc(ASMState *as, int cc) target = p; cc ^= 1; if (as->realign) { - if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) + if (LJ_UNLIKELY(as->mrm.base == RID_RIP)) as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */ emit_sjcc(as, cc, target); return; } } - if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) + if (LJ_UNLIKELY(as->mrm.base == RID_RIP)) as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */ emit_jcc(as, cc, target); } @@ -85,7 +77,6 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) { if (irref_isk(ref)) { IRIns *ir = IR(ref); -#if LJ_GC64 if (ir->o == IR_KNULL || !irt_is64(ir->t)) { *k = ir->i; return 1; @@ -93,15 +84,6 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) *k = (int32_t)ir_k64(ir)->u64; return 1; } -#else - if (ir->o != IR_KINT64) { - *k = ir->i; - return 1; - } else if (checki32((int64_t)ir_kint64(ir)->u64)) { - *k = (int32_t)ir_kint64(ir)->u64; - return 1; - } -#endif } return 0; } @@ -162,14 +144,7 @@ static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow) /* Fuse a constant ADD (e.g. t[i+1]) into the offset. ** Doesn't help much without ABCelim, but reduces register pressure. */ - if (!LJ_64 && /* Has bad effects with negative index on x64. */ - mayfuse(as, ir->op2) && ra_noreg(irx->r) && - irx->o == IR_ADD && irref_isk(irx->op2)) { - as->mrm.ofs += 8*IR(irx->op2)->i; - as->mrm.idx = (uint8_t)ra_alloc1(as, irx->op1, allow); - } else { - as->mrm.idx = (uint8_t)ra_alloc1(as, ir->op2, allow); - } + as->mrm.idx = (uint8_t)ra_alloc1(as, ir->op2, allow); } } @@ -201,7 +176,6 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; -#if LJ_GC64 int64_t ofs = dispofs(as, &uv->tv); if (checki32(ofs) && checki32(ofs+4)) { as->mrm.ofs = (int32_t)ofs; @@ -209,11 +183,6 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) as->mrm.idx = RID_NONE; return; } -#else - as->mrm.ofs = ptr2addr(&uv->tv); - as->mrm.base = as->mrm.idx = RID_NONE; - return; -#endif } break; default: @@ -233,19 +202,13 @@ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); as->mrm.idx = RID_NONE; if (ir->op1 == REF_NIL) { -#if LJ_GC64 as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch); as->mrm.base = RID_DISPATCH; -#else - as->mrm.ofs = (int32_t)(ir->op2 << 2) + ptr2addr(J2GG(as->J)); - as->mrm.base = RID_NONE; -#endif return; } as->mrm.ofs = field_ofs[ir->op2]; if (irref_isk(ir->op1)) { IRIns *op1 = IR(ir->op1); -#if LJ_GC64 if (ir->op1 == REF_NIL) { as->mrm.ofs -= GG_OFS(dispatch); as->mrm.base = RID_DISPATCH; @@ -258,11 +221,6 @@ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) return; } } -#else - as->mrm.ofs += op1->i; - as->mrm.base = RID_NONE; - return; -#endif } as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); } @@ -275,26 +233,16 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) as->mrm.base = as->mrm.idx = RID_NONE; as->mrm.scale = XM_SCALE1; as->mrm.ofs = sizeof(GCstr); - if (!LJ_GC64 && irref_isk(ir->op1)) { - as->mrm.ofs += IR(ir->op1)->i; - } else { - Reg r = ra_alloc1(as, ir->op1, allow); - rset_clear(allow, r); - as->mrm.base = (uint8_t)r; - } + Reg r = ra_alloc1(as, ir->op1, allow); + rset_clear(allow, r); + as->mrm.base = (uint8_t)r; irr = IR(ir->op2); if (irref_isk(ir->op2)) { as->mrm.ofs += irr->i; } else { Reg r; /* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */ - if (!LJ_64 && /* Has bad effects with negative index on x64. */ - mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) { - as->mrm.ofs += IR(irr->op2)->i; - r = ra_alloc1(as, irr->op1, allow); - } else { - r = ra_alloc1(as, ir->op2, allow); - } + r = ra_alloc1(as, ir->op2, allow); if (as->mrm.base == RID_NONE) as->mrm.base = (uint8_t)r; else @@ -307,7 +255,6 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) IRIns *ir = IR(ref); as->mrm.idx = RID_NONE; if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { -#if LJ_GC64 intptr_t ofs = dispofs(as, ir_kptr(ir)); if (checki32(ofs)) { as->mrm.ofs = (int32_t)ofs; @@ -315,12 +262,6 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) return; } } if (0) { -#else - as->mrm.ofs = ir->i; - as->mrm.base = RID_NONE; - } else if (ir->o == IR_STRREF) { - asm_fusestrref(as, ir, allow); -#endif } else { as->mrm.ofs = 0; if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { @@ -367,10 +308,9 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) { const uint64_t *k = &ir_k64(ir)->u64; - if (!LJ_GC64 || checki32((intptr_t)k)) { + if (checki32((intptr_t)k)) { as->mrm.ofs = ptr2addr(k); as->mrm.base = RID_NONE; -#if LJ_GC64 } else if (checki32(dispofs(as, k))) { as->mrm.ofs = (int32_t)dispofs(as, k); as->mrm.base = RID_DISPATCH; @@ -387,10 +327,10 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) ir->i = (int32_t)(as->mctop - as->mcbot); as->mcbot += 8; as->mclim = as->mcbot + MCLIM_REDZONE; + lj_mcode_commitbot(as->J, as->mcbot); } as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); as->mrm.base = RID_RIP; -#endif } as->mrm.idx = RID_NONE; return RID_MRM; @@ -427,13 +367,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) lua_assert(allow != RSET_EMPTY); if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ if (ref == REF_BASE) { -#if LJ_GC64 as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->jit_base); as->mrm.base = RID_DISPATCH; -#else - as->mrm.ofs = ptr2addr(&J2G(as->J)->jit_base); - as->mrm.base = RID_NONE; -#endif as->mrm.idx = RID_NONE; return RID_MRM; } else { @@ -445,7 +380,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) if (ir->o == IR_SLOAD) { if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && noconflict(as, ref, IR_RETF, 0) && - !(LJ_GC64 && irt_isaddr(ir->t))) { + !irt_isaddr(ir->t)) { as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); @@ -461,7 +396,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) } } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) && - !(LJ_GC64 && irt_isaddr(ir->t))) { + !irt_isaddr(ir->t)) { asm_fuseahuref(as, ir->op1, xallow); return RID_MRM; } @@ -474,7 +409,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) asm_fusexref(as, ir->op1, xallow); return RID_MRM; } - } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) { + } else if (ir->o == IR_VLOAD && !irt_isaddr(ir->t)) { asm_fuseahuref(as, ir->op1, xallow); return RID_MRM; } @@ -489,7 +424,6 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) return ra_allocref(as, ref, allow); } -#if LJ_64 /* Don't fuse a 32 bit load into a 64 bit operation. */ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64) { @@ -497,9 +431,6 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64) return ra_alloc1(as, ref, allow); return asm_fuseload(as, ref, allow); } -#else -#define asm_fuseloadm(as, ref, allow, is64) asm_fuseload(as, (ref), (allow)) -#endif /* -- Calls --------------------------------------------------------------- */ @@ -508,7 +439,6 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) { uint32_t i, nargs = CCI_XNARGS(ci); int nslots = 0; -#if LJ_64 if (LJ_ABI_WIN) { nslots = (int)(nargs*2); /* Only matters for more than four args. */ } else { @@ -520,19 +450,6 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) if (ngpr > 0) ngpr--; else nslots += 2; } } -#else - int ngpr = 0; - if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL) - ngpr = 2; - else if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL) - ngpr = 1; - for (i = 0; i < nargs; i++) - if (args[i] && irt_isfp(IR(args[i])->t)) { - nslots += irt_isnum(IR(args[i])->t) ? 2 : 1; - } else { - if (ngpr > 0) ngpr--; else nslots++; - } -#endif return nslots; } @@ -541,68 +458,30 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { uint32_t n, nargs = CCI_XNARGS(ci); int32_t ofs = STACKARG_OFS; -#if LJ_64 uint32_t gprs = REGARG_GPRS; Reg fpr = REGARG_FIRSTFPR; -#if !LJ_ABI_WIN MCode *patchnfpr = NULL; -#endif -#else - uint32_t gprs = 0; - if ((ci->flags & CCI_CC_MASK) != CCI_CC_CDECL) { - if ((ci->flags & CCI_CC_MASK) == CCI_CC_THISCALL) - gprs = (REGARG_GPRS & 31); - else if ((ci->flags & CCI_CC_MASK) == CCI_CC_FASTCALL) - gprs = REGARG_GPRS; - } -#endif if ((void *)ci->func) emit_call(as, ci->func); -#if LJ_64 if ((ci->flags & CCI_VARARG)) { /* Special handling for vararg calls. */ -#if LJ_ABI_WIN - for (n = 0; n < 4 && n < nargs; n++) { - IRIns *ir = IR(args[n]); - if (irt_isfp(ir->t)) /* Duplicate FPRs in GPRs. */ - emit_rr(as, XO_MOVDto, (irt_isnum(ir->t) ? REX_64 : 0) | (fpr+n), - ((gprs >> (n*5)) & 31)); /* Either MOVD or MOVQ. */ - } -#else patchnfpr = --as->mcp; /* Indicate number of used FPRs in register al. */ *--as->mcp = XI_MOVrib | RID_EAX; -#endif } -#endif for (n = 0; n < nargs; n++) { /* Setup args. */ IRRef ref = args[n]; IRIns *ir = IR(ref); Reg r; -#if LJ_64 && LJ_ABI_WIN - /* Windows/x64 argument registers are strictly positional. */ - r = irt_isfp(ir->t) ? (fpr <= REGARG_LASTFPR ? fpr : 0) : (gprs & 31); - fpr++; gprs >>= 5; -#elif LJ_64 /* POSIX/x64 argument registers are used in order of appearance. */ if (irt_isfp(ir->t)) { r = fpr <= REGARG_LASTFPR ? fpr++ : 0; } else { r = gprs & 31; gprs >>= 5; } -#else - if (ref && irt_isfp(ir->t)) { - r = 0; - } else { - r = gprs & 31; gprs >>= 5; - if (!ref) continue; - } -#endif if (r) { /* Argument is in a register. */ if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { -#if LJ_64 - if (LJ_GC64 ? !(ir->o == IR_KINT || ir->o == IR_KNULL) : ir->o == IR_KINT64) + if (!(ir->o == IR_KINT || ir->o == IR_KNULL)) emit_loadu64(as, r, ir_k64(ir)->u64); else -#endif emit_loadi(as, r, ir->i); } else { lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ @@ -615,48 +494,32 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) } } else if (irt_isfp(ir->t)) { /* FP argument is on stack. */ lua_assert(!(irt_isfloat(ir->t) && irref_isk(ref))); /* No float k. */ - if (LJ_32 && (ofs & 4) && irref_isk(ref)) { - /* Split stores for unaligned FP consts. */ - emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); - emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi); - } else { - r = ra_alloc1(as, ref, RSET_FPR); - emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, - r, RID_ESP, ofs); - } - ofs += (LJ_32 && irt_isfloat(ir->t)) ? 4 : 8; + r = ra_alloc1(as, ref, RSET_FPR); + emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, + r, RID_ESP, ofs); + ofs += 8; } else { /* Non-FP argument is on stack. */ - if (LJ_32 && ref < ASMREF_TMP1) { - emit_movmroi(as, RID_ESP, ofs, ir->i); - } else { - r = ra_alloc1(as, ref, RSET_GPR); - emit_movtomro(as, REX_64 + r, RID_ESP, ofs); - } + r = ra_alloc1(as, ref, RSET_GPR); + emit_movtomro(as, REX_64 + r, RID_ESP, ofs); ofs += sizeof(intptr_t); } checkmclim(as); } -#if LJ_64 && !LJ_ABI_WIN if (patchnfpr) *patchnfpr = fpr - REGARG_FIRSTFPR; -#endif } /* Setup result reg/sp for call. Evict scratch regs. */ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) { RegSet drop = RSET_SCRATCH; - int hiop = (LJ_32 && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); if ((ci->flags & CCI_NOFPRCLOBBER)) drop &= ~RSET_FPR; if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ - if (hiop && ra_hasreg((ir+1)->r)) - rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { if (irt_isfp(ir->t)) { int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ -#if LJ_64 if ((ci->flags & CCI_CASTU64)) { Reg dest = ir->r; if (ra_hasreg(dest)) { @@ -668,44 +531,16 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) } else { ra_destreg(as, ir, RID_FPRET); } -#else - /* Number result is in x87 st0 for x86 calling convention. */ - Reg dest = ir->r; - if (ra_hasreg(dest)) { - ra_free(as, dest); - ra_modified(as, dest); - emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, - dest, RID_ESP, ofs); - } - if ((ci->flags & CCI_CASTU64)) { - emit_movtomro(as, RID_RETLO, RID_ESP, ofs); - emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4); - } else { - emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, - irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); - } -#endif -#if LJ_32 - } else if (hiop) { - ra_destpair(as, ir); -#endif } else { lua_assert(!irt_ispri(ir->t)); ra_destreg(as, ir, RID_RET); } - } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) { - emit_x87op(as, XI_FPOP); /* Pop unused result from x87 st0. */ } } /* Return a constant function pointer or NULL for indirect calls. */ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) { -#if LJ_32 - UNUSED(as); - if (irref_isk(func)) - return (void *)irf->i; -#else if (irref_isk(func)) { MCode *p; if (irf->o == IR_KINT64) @@ -716,7 +551,6 @@ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) return p; /* Call target is still in +-2GB range. */ /* Avoid the indirect case of emit_call(). Try to hoist func addr. */ } -#endif return NULL; } @@ -726,15 +560,9 @@ static void asm_callx(ASMState *as, IRIns *ir) CCallInfo ci; IRRef func; IRIns *irf; - int32_t spadj = 0; ci.flags = asm_callx_flags(as, ir); asm_collectargs(as, ir, &ci, args); asm_setupresult(as, ir, &ci); -#if LJ_32 - /* Have to readjust stack after non-cdecl calls due to callee cleanup. */ - if ((ci.flags & CCI_CC_MASK) != CCI_CC_CDECL) - spadj = 4 * asm_count_call_slots(as, &ci, args); -#endif func = ir->op2; irf = IR(func); if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } ci.func = (ASMFunction)asm_callx_func(as, irf, func); @@ -742,10 +570,7 @@ static void asm_callx(ASMState *as, IRIns *ir) /* Use a (hoistable) non-scratch register for indirect calls. */ RegSet allow = (RSET_GPR & ~RSET_SCRATCH); Reg r = ra_alloc1(as, func, allow); - if (LJ_32) emit_spsub(as, spadj); /* Above code may cause restores! */ emit_rr(as, XO_GROUP5, XOg_CALL, r); - } else if (LJ_32) { - emit_spsub(as, spadj); } asm_gencall(as, &ci, args); } @@ -756,9 +581,7 @@ static void asm_callx(ASMState *as, IRIns *ir) static void asm_retf(ASMState *as, IRIns *ir) { Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); -#if LJ_FR2 Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base)); -#endif void *pc = ir_kptr(IR(ir->op2)); int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); as->topslot -= (BCReg)delta; @@ -767,12 +590,8 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_setgl(as, base, jit_base); emit_addptr(as, base, -8*delta); asm_guardcc(as, CC_NE); -#if LJ_FR2 emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8); emit_loadu64(as, rpc, u64ptr(pc)); -#else - emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); -#endif } /* -- Type conversions ---------------------------------------------------- */ @@ -806,33 +625,21 @@ static void asm_tobit(ASMState *as, IRIns *ir) static void asm_conv(ASMState *as, IRIns *ir) { IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); - int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64)); + int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); int stfp = (st == IRT_NUM || st == IRT_FLOAT); IRRef lref = ir->op1; lua_assert(irt_type(ir->t) != st); - lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */ if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ Reg left = asm_fuseload(as, lref, RSET_FPR); emit_mrm(as, st == IRT_NUM ? XO_CVTSD2SS : XO_CVTSS2SD, dest, left); if (left == dest) return; /* Avoid the XO_XORPS. */ - } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ - /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ - cTValue *k = &as->J->k64[LJ_K64_TOBIT]; - Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - if (irt_isfloat(ir->t)) - emit_rr(as, XO_CVTSD2SS, dest, dest); - emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ - emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ - emit_rma(as, XO_MOVSD, bias, k); - emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); - return; } else { /* Integer to FP conversion. */ - Reg left = (LJ_64 && (st == IRT_U32 || st == IRT_U64)) ? + Reg left = (st == IRT_U32 || st == IRT_U64) ? ra_alloc1(as, lref, RSET_GPR) : asm_fuseloadm(as, lref, RSET_GPR, st64); - if (LJ_64 && st == IRT_U64) { + if (st == IRT_U64) { MCLabel l_end = emit_label(as); cTValue *k = &as->J->k64[LJ_K64_2P64]; emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */ @@ -840,7 +647,7 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */ } emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, - dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); + dest|((st64 || st == IRT_U32) ? REX_64 : 0), left); } emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ } else if (stfp) { /* FP to integer conversion. */ @@ -851,14 +658,11 @@ static void asm_conv(ASMState *as, IRIns *ir) } else { Reg dest = ra_dest(as, ir, RSET_GPR); x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI; - if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { + if (irt_isu64(ir->t)) { /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ - /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : ra_scratch(as, RSET_FPR); MCLabel l_end = emit_label(as); - if (LJ_32) - emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); emit_rr(as, op, dest|REX_64, tmp); if (st == IRT_NUM) emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]); @@ -869,11 +673,10 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_rr(as, op, dest|REX_64, tmp); ra_left(as, tmp, lref); } else { - if (LJ_64 && irt_isu32(ir->t)) + if (irt_isu32(ir->t)) emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ emit_mrm(as, op, - dest|((LJ_64 && - (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), + dest|((irt_is64(ir->t) || irt_isu32(ir->t)) ? REX_64 : 0), asm_fuseload(as, lref, RSET_FPR)); } } @@ -893,18 +696,9 @@ static void asm_conv(ASMState *as, IRIns *ir) } left = asm_fuseload(as, lref, allow); /* Add extra MOV if source is already in wrong register. */ - if (!LJ_64 && left != RID_MRM && !rset_test(allow, left)) { - Reg tmp = ra_scratch(as, allow); - emit_rr(as, op, dest, tmp); - emit_rr(as, XO_MOV, tmp, left); - } else { - emit_mrm(as, op, dest, left); - } + emit_mrm(as, op, dest, left); } else { /* 32/64 bit integer conversions. */ - if (LJ_32) { /* Only need to handle 32/32 bit no-op (cast) on x86. */ - Reg dest = ra_dest(as, ir, RSET_GPR); - ra_left(as, dest, lref); /* Do nothing, but may need to move regs. */ - } else if (irt_is64(ir->t)) { + if (irt_is64(ir->t)) { Reg dest = ra_dest(as, ir, RSET_GPR); if (st64 || !(ir->op2 & IRCONV_SEXT)) { /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ @@ -928,96 +722,6 @@ static void asm_conv(ASMState *as, IRIns *ir) } } -#if LJ_32 && LJ_HASFFI -/* No SSE conversions to/from 64 bit on x86, so resort to ugly x87 code. */ - -/* 64 bit integer to FP conversion in 32 bit mode. */ -static void asm_conv_fp_int64(ASMState *as, IRIns *ir) -{ - Reg hi = ra_alloc1(as, ir->op1, RSET_GPR); - Reg lo = ra_alloc1(as, (ir-1)->op1, rset_exclude(RSET_GPR, hi)); - int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ - Reg dest = ir->r; - if (ra_hasreg(dest)) { - ra_free(as, dest); - ra_modified(as, dest); - emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs); - } - emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, - irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); - if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { - /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ - MCLabel l_end = emit_label(as); - emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]); - emit_sjcc(as, CC_NS, l_end); - emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ - } else { - lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64); - } - emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0); - /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */ - emit_rmro(as, XO_MOVto, hi, RID_ESP, 4); - emit_rmro(as, XO_MOVto, lo, RID_ESP, 0); -} - -/* FP to 64 bit integer conversion in 32 bit mode. */ -static void asm_conv_int64_fp(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); - IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); - Reg lo, hi; - lua_assert(st == IRT_NUM || st == IRT_FLOAT); - lua_assert(dt == IRT_I64 || dt == IRT_U64); - hi = ra_dest(as, ir, RSET_GPR); - lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); - if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); - /* NYI: Avoid wide-to-narrow store-to-load forwarding stall. */ - if (!(as->flags & JIT_F_SSE3)) { /* Set FPU rounding mode to default. */ - emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 4); - emit_rmro(as, XO_MOVto, lo, RID_ESP, 4); - emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff); - } - if (dt == IRT_U64) { - /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ - MCLabel l_pop, l_end = emit_label(as); - emit_x87op(as, XI_FPOP); - l_pop = emit_label(as); - emit_sjmp(as, l_end); - emit_rmro(as, XO_MOV, hi, RID_ESP, 4); - if ((as->flags & JIT_F_SSE3)) - emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); - else - emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); - emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]); - emit_sjcc(as, CC_NS, l_pop); - emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ - } - emit_rmro(as, XO_MOV, hi, RID_ESP, 4); - if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */ - emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); - } else { /* Otherwise set FPU rounding mode to truncate before the store. */ - emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); - emit_rmro(as, XO_FLDCW, XOg_FLDCW, RID_ESP, 0); - emit_rmro(as, XO_MOVtow, lo, RID_ESP, 0); - emit_rmro(as, XO_ARITHw(XOg_OR), lo, RID_ESP, 0); - emit_loadi(as, lo, 0xc00); - emit_rmro(as, XO_FNSTCW, XOg_FNSTCW, RID_ESP, 0); - } - if (dt == IRT_U64) - emit_x87op(as, XI_FDUP); - emit_mrm(as, st == IRT_NUM ? XO_FLDq : XO_FLDd, - st == IRT_NUM ? XOg_FLDq: XOg_FLDd, - asm_fuseload(as, ir->op1, RSET_EMPTY)); -} - -static void asm_conv64(ASMState *as, IRIns *ir) -{ - if (irt_isfp(ir->t)) - asm_conv_fp_int64(as, ir); - else - asm_conv_int64_fp(as, ir); -} -#endif static void asm_strto(ASMState *as, IRIns *ir) { @@ -1052,7 +756,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir)); } else { /* Otherwise use g->tmptv to hold the TValue. */ -#if LJ_GC64 if (irref_isk(ref)) { TValue k; lj_ir_kvalue(as->J->L, &k, ir); @@ -1070,16 +773,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) } emit_movtomro(as, REX_64IR(ir, src), dest, 0); } -#else - if (!irref_isk(ref)) { - Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); - emit_movtomro(as, REX_64IR(ir, src), dest, 0); - } else if (!irt_ispri(ir->t)) { - emit_movmroi(as, dest, 0, ir->i); - } - if (!(LJ_64 && irt_islightud(ir->t))) - emit_movmroi(as, dest, 4, irt_toitype(ir->t)); -#endif emit_loada(as, dest, &J2G(as->J)->tmptv); } } @@ -1118,8 +811,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) if (!isk) { rset_clear(allow, tab); key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); - if (LJ_GC64 || !irt_isstr(kt)) - tmp = ra_scratch(as, rset_exclude(allow, key)); + tmp = ra_scratch(as, rset_exclude(allow, key)); } /* Key not found in chain: jump to exit (if merged) or load niltv. */ @@ -1153,19 +845,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); emit_sjcc(as, CC_AE, l_next); /* The type check avoids NaN penalties and complaints from Valgrind. */ -#if LJ_64 && !LJ_GC64 - emit_u32(as, LJ_TISNUM); - emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); -#else emit_i8(as, LJ_TISNUM); emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); -#endif } -#if LJ_64 && !LJ_GC64 - } else if (irt_islightud(kt)) { - emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); -#endif -#if LJ_GC64 } else if (irt_isaddr(kt)) { if (isk) { TValue k; @@ -1182,30 +864,13 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) lua_assert(irt_ispri(kt) && !irt_isnil(kt)); emit_u32(as, (irt_toitype(kt)<<15)|0x7fff); emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); -#else - } else { - if (!irt_ispri(kt)) { - lua_assert(irt_isaddr(kt)); - if (isk) - emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr), - ptr2addr(ir_kgc(irkey))); - else - emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr)); - emit_sjcc(as, CC_NE, l_next); - } - lua_assert(!irt_isnil(kt)); - emit_i8(as, irt_toitype(kt)); - emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); -#endif } emit_sfixup(as, l_loop); checkmclim(as); -#if LJ_GC64 if (!isk && irt_isaddr(kt)) { emit_rr(as, XO_OR, tmp|REX_64, key); emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47); } -#endif /* Load main position relative to tab->node into dest. */ khash = isk ? ir_khash(irkey) : 1; @@ -1237,17 +902,11 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rr(as, XO_ARITH(XOg_XOR), tmp, dest); if (irt_isnum(kt)) { emit_rr(as, XO_ARITH(XOg_ADD), dest, dest); -#if LJ_64 emit_shifti(as, XOg_SHR|REX_64, dest, 32); emit_rr(as, XO_MOV, tmp, dest); emit_rr(as, XO_MOVDto, key|REX_64, dest); -#else - emit_rmro(as, XO_MOV, dest, RID_ESP, ra_spill(as, irkey)+4); - emit_rr(as, XO_MOVDto, key, tmp); -#endif } else { emit_rr(as, XO_MOV, tmp, key); -#if LJ_GC64 checkmclim(as); emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15); if ((as->flags & JIT_F_BMI2)) { @@ -1257,9 +916,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_shifti(as, XOg_SHR|REX_64, dest, 32); emit_rr(as, XO_MOV, dest|REX_64, key|REX_64); } -#else - emit_rmro(as, XO_LEA, dest, key, HASH_BIAS); -#endif } } } @@ -1272,9 +928,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); Reg dest = ra_used(ir) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; Reg node = ra_alloc1(as, ir->op1, RSET_GPR); -#if !LJ_64 - MCLabel l_exit; -#endif lua_assert(ofs % sizeof(Node) == 0); if (ra_hasreg(dest)) { if (ofs != 0) { @@ -1287,7 +940,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) } } asm_guardcc(as, CC_NE); -#if LJ_64 if (!irt_ispri(irkey->t)) { Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node)); emit_rmro(as, XO_CMP, key|REX_64, node, @@ -1295,50 +947,14 @@ static void asm_hrefk(ASMState *as, IRIns *ir) lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); /* Assumes -0.0 is already canonicalized to +0.0. */ emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : -#if LJ_GC64 ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey)); -#else - ((uint64_t)irt_toitype(irkey->t) << 32) | - (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); -#endif } else { lua_assert(!irt_isnil(irkey->t)); -#if LJ_GC64 emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff); emit_rmro(as, XO_ARITHi, XOg_CMP, node, ofs + (int32_t)offsetof(Node, key.it)); -#else - emit_i8(as, irt_toitype(irkey->t)); - emit_rmro(as, XO_ARITHi8, XOg_CMP, node, - ofs + (int32_t)offsetof(Node, key.it)); -#endif } -#else - l_exit = emit_label(as); - if (irt_isnum(irkey->t)) { - /* Assumes -0.0 is already canonicalized to +0.0. */ - emit_gmroi(as, XG_ARITHi(XOg_CMP), node, - ofs + (int32_t)offsetof(Node, key.u32.lo), - (int32_t)ir_knum(irkey)->u32.lo); - emit_sjcc(as, CC_NE, l_exit); - emit_gmroi(as, XG_ARITHi(XOg_CMP), node, - ofs + (int32_t)offsetof(Node, key.u32.hi), - (int32_t)ir_knum(irkey)->u32.hi); - } else { - if (!irt_ispri(irkey->t)) { - lua_assert(irt_isgcv(irkey->t)); - emit_gmroi(as, XG_ARITHi(XOg_CMP), node, - ofs + (int32_t)offsetof(Node, key.gcr), - ptr2addr(ir_kgc(irkey))); - emit_sjcc(as, CC_NE, l_exit); - } - lua_assert(!irt_isnil(irkey->t)); - emit_i8(as, irt_toitype(irkey->t)); - emit_rmro(as, XO_ARITHi8, XOg_CMP, node, - ofs + (int32_t)offsetof(Node, key.it)); - } -#endif } static void asm_uref(ASMState *as, IRIns *ir) @@ -1403,7 +1019,7 @@ static void asm_fxload(ASMState *as, IRIns *ir) case IRT_NUM: xo = XO_MOVSD; break; case IRT_FLOAT: xo = XO_MOVSS; break; default: - if (LJ_64 && irt_is64(ir->t)) + if (irt_is64(ir->t)) dest |= REX_64; else lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); @@ -1431,17 +1047,12 @@ static void asm_fxstore(ASMState *as, IRIns *ir) RegSet allow8 = irt_isfp(ir->t) ? RSET_FPR : (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR; src = osrc = ra_alloc1(as, ir->op2, allow8); - if (!LJ_64 && !rset_test(allow8, src)) { /* Already in wrong register. */ - rset_clear(allow, osrc); - src = ra_scratch(as, allow8); - } rset_clear(allow, src); } if (ir->o == IR_FSTORE) { asm_fusefref(as, IR(ir->op1), allow); } else { asm_fusexref(as, ir->op1, allow); - if (LJ_32 && ir->o == IR_HIOP) as->mrm.ofs += 4; } if (ra_hasreg(src)) { x86Op xo; @@ -1450,11 +1061,8 @@ static void asm_fxstore(ASMState *as, IRIns *ir) case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; case IRT_NUM: xo = XO_MOVSDto; break; case IRT_FLOAT: xo = XO_MOVSSto; break; -#if LJ_64 && !LJ_GC64 - case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ -#endif default: - if (LJ_64 && irt_is64(ir->t)) + if (irt_is64(ir->t)) src |= REX_64; else lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); @@ -1462,10 +1070,6 @@ static void asm_fxstore(ASMState *as, IRIns *ir) break; } emit_mrm(as, xo, src, RID_MRM); - if (!LJ_64 && src != osrc) { - ra_noweak(as, osrc); - emit_rr(as, XO_MOV, src, osrc); - } } else { if (irt_isi8(ir->t) || irt_isu8(ir->t)) { emit_i8(as, k); @@ -1482,48 +1086,15 @@ static void asm_fxstore(ASMState *as, IRIns *ir) #define asm_fstore(as, ir) asm_fxstore(as, ir) #define asm_xstore(as, ir) asm_fxstore(as, ir) -#if LJ_64 && !LJ_GC64 -static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) -{ - if (ra_used(ir) || typecheck) { - Reg dest = ra_dest(as, ir, RSET_GPR); - if (typecheck) { - Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, dest)); - asm_guardcc(as, CC_NE); - emit_i8(as, -2); - emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); - emit_shifti(as, XOg_SAR|REX_64, tmp, 47); - emit_rr(as, XO_MOV, tmp|REX_64, dest); - } - return dest; - } else { - return RID_NONE; - } -} -#endif static void asm_ahuvload(ASMState *as, IRIns *ir) { -#if LJ_GC64 Reg tmp = RID_NONE; -#endif - lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || - (LJ_DUALNUM && irt_isint(ir->t))); -#if LJ_64 && !LJ_GC64 - if (irt_islightud(ir->t)) { - Reg dest = asm_load_lightud64(as, ir, 1); - if (ra_hasreg(dest)) { - asm_fuseahuref(as, ir->op1, RSET_GPR); - emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); - } - return; - } else -#endif + lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t)); if (ra_used(ir)) { RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); asm_fuseahuref(as, ir->op1, RSET_GPR); -#if LJ_GC64 if (irt_isaddr(ir->t)) { emit_shifti(as, XOg_SHR|REX_64, dest, 17); asm_guardcc(as, CC_NE); @@ -1539,30 +1110,22 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) } return; } else -#endif emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); } else { RegSet gpr = RSET_GPR; -#if LJ_GC64 if (irt_isaddr(ir->t)) { tmp = ra_scratch(as, RSET_GPR); gpr = rset_exclude(gpr, tmp); } -#endif asm_fuseahuref(as, ir->op1, gpr); } /* Always do the type check, even if the load result is unused. */ as->mrm.ofs += 4; asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); - if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { + if (irt_type(ir->t) >= IRT_NUM) { lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); -#if LJ_GC64 emit_u32(as, LJ_TISNUM << 15); -#else - emit_u32(as, LJ_TISNUM); -#endif emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); -#if LJ_GC64 } else if (irt_isaddr(ir->t)) { as->mrm.ofs -= 4; emit_i8(as, irt_toitype(ir->t)); @@ -1576,11 +1139,6 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) } else { emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff); emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); -#else - } else { - emit_i8(as, irt_toitype(ir->t)); - emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); -#endif } } @@ -1592,13 +1150,6 @@ static void asm_ahustore(ASMState *as, IRIns *ir) Reg src = ra_alloc1(as, ir->op2, RSET_FPR); asm_fuseahuref(as, ir->op1, RSET_GPR); emit_mrm(as, XO_MOVSDto, src, RID_MRM); -#if LJ_64 && !LJ_GC64 - } else if (irt_islightud(ir->t)) { - Reg src = ra_alloc1(as, ir->op2, RSET_GPR); - asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); - emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); -#endif -#if LJ_GC64 } else if (irref_isk(ir->op2)) { TValue k; lj_ir_kvalue(as->J->L, &k, IR(ir->op2)); @@ -1613,7 +1164,6 @@ static void asm_ahustore(ASMState *as, IRIns *ir) emit_u32(as, k.u32.hi); emit_mrm(as, XO_MOVmi, 0, RID_MRM); } -#endif } else { IRIns *irr = IR(ir->op2); RegSet allow = RSET_GPR; @@ -1624,31 +1174,17 @@ static void asm_ahustore(ASMState *as, IRIns *ir) } asm_fuseahuref(as, ir->op1, allow); if (ra_hasreg(src)) { -#if LJ_GC64 - if (!(LJ_DUALNUM && irt_isinteger(ir->t))) { - /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ - as->mrm.ofs += 4; - emit_u32(as, irt_toitype(ir->t) << 15); - emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM); - as->mrm.ofs -= 4; - emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); - return; - } -#endif - emit_mrm(as, XO_MOVto, src, RID_MRM); - } else if (!irt_ispri(irr->t)) { - lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); + /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ + as->mrm.ofs += 4; + emit_u32(as, irt_toitype(ir->t) << 15); + emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM); + as->mrm.ofs -= 4; + emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); + } else { + lua_assert(!irt_ispri(irr->t) && irt_isaddr(ir->t)); emit_i32(as, irr->i); emit_mrm(as, XO_MOVmi, 0, RID_MRM); } - as->mrm.ofs += 4; -#if LJ_GC64 - lua_assert(LJ_DUALNUM && irt_isinteger(ir->t)); - emit_i32(as, LJ_TNUMX << 15); -#else - emit_i32(as, (int32_t)irt_toitype(ir->t)); -#endif - emit_mrm(as, XO_MOVmi, 0, RID_MRM); } } @@ -1660,23 +1196,13 @@ static void asm_sload(ASMState *as, IRIns *ir) Reg base; lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(LJ_DUALNUM || - !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); + lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { Reg left = ra_scratch(as, RSET_FPR); asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ base = ra_alloc1(as, REF_BASE, RSET_GPR); emit_rmro(as, XO_MOVSD, left, base, ofs); t.irt = IRT_NUM; /* Continue with a regular number type check. */ -#if LJ_64 && !LJ_GC64 - } else if (irt_islightud(t)) { - Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); - if (ra_hasreg(dest)) { - base = ra_alloc1(as, REF_BASE, RSET_GPR); - emit_rmro(as, XO_MOV, dest|REX_64, base, ofs); - } - return; -#endif } else if (ra_used(ir)) { RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); @@ -1686,7 +1212,6 @@ static void asm_sload(ASMState *as, IRIns *ir) t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); } else { -#if LJ_GC64 if (irt_isaddr(t)) { /* LJ_GC64 type check + tag removal without BMI2 and with BMI2: ** @@ -1715,7 +1240,6 @@ static void asm_sload(ASMState *as, IRIns *ir) } return; } else -#endif emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); } } else { @@ -1726,15 +1250,10 @@ static void asm_sload(ASMState *as, IRIns *ir) if ((ir->op2 & IRSLOAD_TYPECHECK)) { /* Need type check, even if the load result is unused. */ asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); - if (LJ_64 && irt_type(t) >= IRT_NUM) { + if (irt_type(t) >= IRT_NUM) { lua_assert(irt_isinteger(t) || irt_isnum(t)); -#if LJ_GC64 emit_u32(as, LJ_TISNUM << 15); -#else - emit_u32(as, LJ_TISNUM); -#endif emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); -#if LJ_GC64 } else if (irt_isnil(t)) { /* LJ_GC64 type check for nil: ** @@ -1758,19 +1277,13 @@ static void asm_sload(ASMState *as, IRIns *ir) emit_i8(as, irt_toitype(t)); emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); emit_shifti(as, XOg_SAR|REX_64, tmp, 47); - emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs+4); -#else - } else { - emit_i8(as, irt_toitype(t)); - emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); -#endif + emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs); } } } /* -- Allocations --------------------------------------------------------- */ -#if LJ_HASFFI static void asm_cnew(ASMState *as, IRIns *ir) { CTState *cts = ctype_ctsG(J2G(as->J)); @@ -1787,12 +1300,11 @@ static void asm_cnew(ASMState *as, IRIns *ir) /* Initialize immutable cdata object. */ if (ir->o == IR_CNEWI) { RegSet allow = (RSET_GPR & ~RSET_SCRATCH); -#if LJ_64 Reg r64 = sz == 8 ? REX_64 : 0; if (irref_isk(ir->op2)) { IRIns *irk = IR(ir->op2); - uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 : - (uint64_t)(uint32_t)irk->i; + uint64_t k = (irk->o == IR_KINT64 || irk->o == IR_KPTR || irk->o == IR_KKPTR) + ? ir_k64(irk)->u64 : (uint64_t)(uint32_t)irk->i; if (sz == 4 || checki32((int64_t)k)) { emit_i32(as, (int32_t)k); emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata)); @@ -1804,24 +1316,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) Reg r = ra_alloc1(as, ir->op2, allow); emit_movtomro(as, r + r64, RID_RET, sizeof(GCcdata)); } -#else - int32_t ofs = sizeof(GCcdata); - if (sz == 8) { - ofs += 4; ir++; - lua_assert(ir->o == IR_HIOP); - } - do { - if (irref_isk(ir->op2)) { - emit_movmroi(as, RID_RET, ofs, IR(ir->op2)->i); - } else { - Reg r = ra_alloc1(as, ir->op2, allow); - emit_movtomro(as, r, RID_RET, ofs); - rset_clear(allow, r); - } - if (ofs == sizeof(GCcdata)) break; - ofs -= 4; ir--; - } while (1); -#endif lua_assert(sz == 4 || sz == 8); } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; @@ -1846,9 +1340,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) asm_gencall(as, ci, args); emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); } -#else -#define asm_cnew(as, ir) ((void)0) -#endif /* -- Write barriers ------------------------------------------------------ */ @@ -1938,7 +1429,7 @@ static void asm_fpmath(ASMState *as, IRIns *ir) /* Round down/up/trunc == 1001/1010/1011. */ emit_i8(as, 0x09 + fpm); emit_mrm(as, XO_ROUNDSD, dest, left); - if (LJ_64 && as->mcp[1] != (MCode)(XO_ROUNDSD >> 16)) { + if (as->mcp[1] != (MCode)(XO_ROUNDSD >> 16)) { as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */ } *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */ @@ -1993,12 +1484,10 @@ static void asm_fppowi(ASMState *as, IRIns *ir) static void asm_pow(ASMState *as, IRIns *ir) { -#if LJ_64 && LJ_HASFFI if (!irt_isnum(ir->t)) asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : IRCALL_lj_carith_powu64); else -#endif asm_fppowi(as, ir); } @@ -2058,7 +1547,7 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) Reg dest, right; int32_t k = 0; if (as->flagmcp == as->mcp) { /* Drop test r,r instruction. */ - MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2); + MCode *p = as->mcp + ((*as->mcp < XI_TESTb) ? 3 : 2); if ((p[1] & 15) < 14) { if ((p[1] & 15) >= 12) p[1] -= 4; /* L <->S, NL <-> NS */ as->flagmcp = NULL; @@ -2151,20 +1640,6 @@ static int asm_lea(ASMState *as, IRIns *ir) } else { return 0; } - } else if (ir->op1 != ir->op2 && irl->o == IR_ADD && mayfuse(as, ir->op1) && - (irref_isk(ir->op2) || irref_isk(irl->op2))) { - Reg idx, base = ra_alloc1(as, irl->op1, allow); - rset_clear(allow, base); - as->mrm.base = (uint8_t)base; - if (irref_isk(ir->op2)) { - as->mrm.ofs = irr->i; - idx = ra_alloc1(as, irl->op2, allow); - } else { - as->mrm.ofs = IR(irl->op2)->i; - idx = ra_alloc1(as, ir->op2, allow); - } - rset_clear(allow, idx); - as->mrm.idx = (uint8_t)idx; } else { return 0; } @@ -2200,23 +1675,19 @@ static void asm_mul(ASMState *as, IRIns *ir) static void asm_div(ASMState *as, IRIns *ir) { -#if LJ_64 && LJ_HASFFI if (!irt_isnum(ir->t)) asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : IRCALL_lj_carith_divu64); else -#endif asm_fparith(as, ir, XO_DIVSD); } static void asm_mod(ASMState *as, IRIns *ir) { -#if LJ_64 && LJ_HASFFI if (!irt_isint(ir->t)) asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : IRCALL_lj_carith_modu64); else -#endif asm_callid(as, ir, IRCALL_lj_vm_modi); } @@ -2502,13 +1973,7 @@ static void asm_comp(ASMState *as, IRIns *ir) lua_assert(ir->o == IR_EQ || ir->o == IR_NE); xo = XO_TESTb; if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) { - if (LJ_64) { - left |= FORCE_REX; - } else { - emit_i32(as, 0xff); - emit_mrm(as, XO_GROUP3, XOg_TEST, left); - return; - } + left |= FORCE_REX; } } emit_rr(as, xo, r64 + left, left); @@ -2529,147 +1994,13 @@ static void asm_comp(ASMState *as, IRIns *ir) #define asm_equal(as, ir) asm_comp(as, ir) -#if LJ_32 && LJ_HASFFI -/* 64 bit integer comparisons in 32 bit mode. */ -static void asm_comp_int64(ASMState *as, IRIns *ir) -{ - uint32_t cc = asm_compmap[(ir-1)->o]; - RegSet allow = RSET_GPR; - Reg lefthi = RID_NONE, leftlo = RID_NONE; - Reg righthi = RID_NONE, rightlo = RID_NONE; - MCLabel l_around; - x86ModRM mrm; - - as->curins--; /* Skip loword ins. Avoids failing in noconflict(), too. */ - - /* Allocate/fuse hiword operands. */ - if (irref_isk(ir->op2)) { - lefthi = asm_fuseload(as, ir->op1, allow); - } else { - lefthi = ra_alloc1(as, ir->op1, allow); - rset_clear(allow, lefthi); - righthi = asm_fuseload(as, ir->op2, allow); - if (righthi == RID_MRM) { - if (as->mrm.base != RID_NONE) rset_clear(allow, as->mrm.base); - if (as->mrm.idx != RID_NONE) rset_clear(allow, as->mrm.idx); - } else { - rset_clear(allow, righthi); - } - } - mrm = as->mrm; /* Save state for hiword instruction. */ - - /* Allocate/fuse loword operands. */ - if (irref_isk((ir-1)->op2)) { - leftlo = asm_fuseload(as, (ir-1)->op1, allow); - } else { - leftlo = ra_alloc1(as, (ir-1)->op1, allow); - rset_clear(allow, leftlo); - rightlo = asm_fuseload(as, (ir-1)->op2, allow); - } - - /* All register allocations must be performed _before_ this point. */ - l_around = emit_label(as); - as->invmcp = as->flagmcp = NULL; /* Cannot use these optimizations. */ - - /* Loword comparison and branch. */ - asm_guardcc(as, cc >> 4); /* Always use unsigned compare for loword. */ - if (ra_noreg(rightlo)) { - int32_t imm = IR((ir-1)->op2)->i; - if (imm == 0 && ((cc >> 4) & 0xa) != 0x2 && leftlo != RID_MRM) - emit_rr(as, XO_TEST, leftlo, leftlo); - else - emit_gmrmi(as, XG_ARITHi(XOg_CMP), leftlo, imm); - } else { - emit_mrm(as, XO_CMP, leftlo, rightlo); - } - - /* Hiword comparison and branches. */ - if ((cc & 15) != CC_NE) - emit_sjcc(as, CC_NE, l_around); /* Hiword unequal: skip loword compare. */ - if ((cc & 15) != CC_E) - asm_guardcc(as, cc >> 8); /* Hiword compare without equality check. */ - as->mrm = mrm; /* Restore state. */ - if (ra_noreg(righthi)) { - int32_t imm = IR(ir->op2)->i; - if (imm == 0 && (cc & 0xa) != 0x2 && lefthi != RID_MRM) - emit_rr(as, XO_TEST, lefthi, lefthi); - else - emit_gmrmi(as, XG_ARITHi(XOg_CMP), lefthi, imm); - } else { - emit_mrm(as, XO_CMP, lefthi, righthi); - } -} -#endif /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { -#if LJ_32 && LJ_HASFFI - /* HIOP is marked as a store because it needs its own DCE logic. */ - int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ - if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; - if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ - as->curins--; /* Always skip the CONV. */ - if (usehi || uselo) - asm_conv64(as, ir); - return; - } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ - asm_comp_int64(as, ir); - return; - } else if ((ir-1)->o == IR_XSTORE) { - if ((ir-1)->r != RID_SINK) - asm_fxstore(as, ir); - return; - } - if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ - switch ((ir-1)->o) { - case IR_ADD: - as->flagmcp = NULL; - as->curins--; - asm_intarith(as, ir, XOg_ADC); - asm_intarith(as, ir-1, XOg_ADD); - break; - case IR_SUB: - as->flagmcp = NULL; - as->curins--; - asm_intarith(as, ir, XOg_SBB); - asm_intarith(as, ir-1, XOg_SUB); - break; - case IR_NEG: { - Reg dest = ra_dest(as, ir, RSET_GPR); - emit_rr(as, XO_GROUP3, XOg_NEG, dest); - emit_i8(as, 0); - emit_rr(as, XO_ARITHi8, XOg_ADC, dest); - ra_left(as, dest, ir->op1); - as->curins--; - asm_neg_not(as, ir-1, XOg_NEG); - break; - } - case IR_CALLN: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; - case IR_CNEWI: - /* Nothing to do here. Handled by CNEWI itself. */ - break; - default: lua_assert(0); break; - } -#else UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */ -#endif -} - -/* -- Profiling ----------------------------------------------------------- */ - -static void asm_prof(ASMState *as, IRIns *ir) -{ - UNUSED(ir); - asm_guardcc(as, CC_NE); - emit_i8(as, HOOK_PROFILE); - emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask); } /* -- Stack handling ------------------------------------------------------ */ @@ -2690,13 +2021,8 @@ static void asm_stack_check(ASMState *as, BCReg topslot, if (ra_hasreg(pbase) && pbase != r) emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase); else -#if LJ_GC64 emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, &J2G(as->J)->jit_base)); -#else - emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, - ptr2addr(&J2G(as->J)->jit_base)); -#endif emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack)); emit_getgl(as, r, cur_L); if (allow == RSET_EMPTY) /* Spill temp. register. */ @@ -2724,23 +2050,17 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) Reg src = ra_alloc1(as, ref, RSET_FPR); emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); } else { - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || - (LJ_DUALNUM && irt_isinteger(ir->t))); + lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); if (!irref_isk(ref)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); -#if LJ_GC64 if (irt_is64(ir->t)) { /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ emit_u32(as, irt_toitype(ir->t) << 15); emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4); - } else if (LJ_DUALNUM && irt_isinteger(ir->t)) { - emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15); } else { emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff); } -#endif emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); -#if LJ_GC64 } else { TValue k; lj_ir_kvalue(as->J->L, &k, ir); @@ -2751,21 +2071,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi); emit_movmroi(as, RID_BASE, ofs, k.u32.lo); } -#else - } else if (!irt_ispri(ir->t)) { - emit_movmroi(as, RID_BASE, ofs, ir->i); -#endif } if ((sn & (SNAP_CONT|SNAP_FRAME))) { -#if !LJ_FR2 - if (s != 0) /* Do not overwrite link to previous frame. */ - emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); -#endif -#if !LJ_GC64 - } else { - if (!(LJ_64 && irt_islightud(ir->t))) - emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); -#endif } } checkmclim(as); @@ -2791,11 +2098,7 @@ static void asm_gc_check(ASMState *as) args[1] = ASMREF_TMP2; /* MSize steps */ asm_gencall(as, ci, args); tmp = ra_releasetmp(as, ASMREF_TMP1); -#if LJ_GC64 emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G); -#else - emit_loada(as, tmp, J2G(as->J)); -#endif emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); /* Jump around GC step if GC total < GC threshold. */ emit_sjcc(as, CC_B, l_end); @@ -2893,14 +2196,14 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) /* -- Tail of trace ------------------------------------------------------- */ /* Fixup the tail code. */ -static void asm_tail_fixup(ASMState *as, TraceNo lnk) +static void asm_tail_fixup(ASMState *as, TraceNo lnk, int track) { /* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */ MCode *p = as->mctop; MCode *target, *q; int32_t spadj = as->T->spadjust; if (spadj == 0) { - p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); + p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + 1; } else { MCode *p1; /* Patch stack adjustment. */ @@ -2913,22 +2216,20 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) *(int32_t *)p1 = spadj; } if ((as->flags & JIT_F_LEA_AGU)) { -#if LJ_64 p1[-4] = 0x48; -#endif p1[-3] = (MCode)XI_LEA; p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); } else { -#if LJ_64 p1[-3] = 0x48; -#endif p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); } } /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; + target = (lnk ? traceref(as->J, lnk)->mcode : + (track ? (MCode *)lj_vm_exit_interp : + (MCode *)lj_vm_exit_interp_notrack)); *(int32_t *)(p-4) = jmprel(p, target); p[-5] = XI_JMP; /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ @@ -2956,7 +2257,7 @@ static void asm_tail_prep(ASMState *as) as->invmcp = as->mcp = p; } else { /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ - as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); + as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + 1); as->invmcp = NULL; } } @@ -2972,11 +2273,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) nslots = asm_count_call_slots(as, ci, args); if (nslots > as->evenspill) /* Leave room for args in stack slots. */ as->evenspill = nslots; -#if LJ_64 return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); -#else - return irt_isfp(ir->t) ? REGSP_INIT : REGSP_HINT(RID_RET); -#endif } /* Target-specific setup. */ @@ -2993,21 +2290,13 @@ static const uint8_t map_op1[256] = { 0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51, 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, -#if LJ_64 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14, -#else -0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, -#endif 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, 0x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51, 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, 0x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51, -#if LJ_64 0x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, -#else -0x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, -#endif 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05, 0x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51, 0x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, @@ -3052,10 +2341,6 @@ static uint32_t asm_x86_inslen(const uint8_t* p) else if ((prefixes & 2) && (x == 0x66)) x = 4; goto mrm; case 7: /* VEX c4/c5. */ - if (LJ_32 && p[1] < 0xc0) { - x = 2; - goto mrm; - } if (x == 0x70) { x = *++p & 0x1f; result++; @@ -3096,17 +2381,13 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) MSize len = T->szmcode; MCode *px = exitstub_addr(J, exitno) - 6; MCode *pe = p+len-6; -#if LJ_GC64 uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch)); -#else - uint32_t statei = u32ptr(&J2G(J)->vmstate); -#endif if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) *(int32_t *)(p+len-4) = jmprel(p+len, target); /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ for (; p < pe; p += asm_x86_inslen(p)) { - intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64; - if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi) + intptr_t ofs = (p[0] & 0xf0) == 0x40; + if (*(uint32_t *)(p+2+ofs) == statei && p[ofs] == XI_MOVmi) break; } lua_assert(p < pe); diff --git a/src/lj_auditlog.c b/src/lj_auditlog.c new file mode 100644 index 0000000000..866a0346ec --- /dev/null +++ b/src/lj_auditlog.c @@ -0,0 +1,291 @@ +/* +** Audit log. Records JIT/runtime events for offline analysis. +*/ + +#define lj_auditlog_c + +#include +#include + +#include "lj_trace.h" +#include "lj_ctype.h" +#include "lj_auditlog.h" +#include "lj_debuginfo.h" + +/* Maximum data to buffer in memory before file is opened. */ +#define MAX_MEM_BUFFER 1024*1024 +/* State for initial in-memory stream. */ +static char *membuffer; +static size_t membuffersize; + +static FILE *fp; /* File where the audit log is written. */ +static int error; /* Have we been unable to initialize the log? */ +static int open; /* are we logging to a real file? */ +static size_t loggedbytes; /* Bytes already written to log. */ +static size_t sizelimit; /* File size when logging will stop. */ +#define DEFAULT_SIZE_LIMIT 100*1024*1024 /* Generous size limit. */ + +/* -- byte counting file write wrappers ----------------------------------- */ + +static int cfputc(int c, FILE *f) { + loggedbytes++; + return fputc(c, f); +} + +static int cfputs(const char *s, FILE *f) { + loggedbytes += strlen(s); + return fputs(s, f); +} + +static int cfwrite(const void *ptr, size_t size, size_t nmemb, FILE *f) { + loggedbytes += size * nmemb; + return fwrite(ptr, size, nmemb, f); +} + +/* -- msgpack writer - see http://msgpack.org/index.html ------------------ */ +/* XXX assumes little endian cpu. */ + +static void fixmap(int size) { + cfputc(0x80|size, fp); /* map header with size */ +}; + +static void str_16(const char *s) { + uint16_t biglen = __builtin_bswap16(strlen(s)); + cfputc(0xda, fp); /* string header */ + cfwrite(&biglen, sizeof(biglen), 1, fp); /* string length */ + cfputs(s, fp); /* string contents */ +} + +static void uint_64(uint64_t n) { + uint64_t big = __builtin_bswap64(n); + cfputc(0xcf, fp); /* uint 64 header */ + cfwrite(&big, sizeof(big), 1, fp); /* value */ +} + +static void bin_32(const void *ptr, int n) { + uint32_t biglen = __builtin_bswap32(n); + cfputc(0xc6, fp); /* array 32 header */ + cfwrite(&biglen, sizeof(biglen), 1, fp); /* length */ + cfwrite(ptr, n, 1, fp); /* data */ +} + +/* -- low-level object logging API ---------------------------------------- */ + +/* Log a snapshot of an object in memory. */ +static void log_mem(const char *type, void *ptr, unsigned int size) { + fixmap(4); + str_16("type"); /* = */ str_16("memory"); + str_16("hint"); /* = */ str_16(type); + str_16("address"); /* = */ uint_64((uint64_t)ptr); + str_16("data"); /* = */ bin_32(ptr, size); +} + +static void log_event(const char *type, int nattributes) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + lua_assert(nattributes <= 252); + fixmap(nattributes+3); + str_16("nanotime"); /* = */ uint_64(ts.tv_sec * 1000000000LL + ts.tv_nsec); + str_16("type"); /* = */ str_16("event"); + str_16("event"); /* = */ str_16(type); + /* Caller fills in the further nattributes... */ +} + +static void log_blob(const char *name, const char *ptr, int size) { + fixmap(3); + str_16("type"); /* = */ str_16("blob"); + str_16("name"); /* = */ str_16(name); + str_16("data"); /* = */ bin_32(ptr, size); +} + +/* Log objects that define the virtual machine. */ +static void lj_auditlog_vm_definitions() +{ + log_mem("lj_ir_mode", (void*)&lj_ir_mode, sizeof(lj_ir_mode)); + log_blob("lj_dwarf.dwo", &_binary_lj_dwarf_dwo_start, &_binary_lj_dwarf_dwo_end - &_binary_lj_dwarf_dwo_start); +} + +/* Check that the log is open before logging a message. */ +static int ensure_log_started() { + if (fp != NULL) { + if (loggedbytes < sizelimit) { + return 1; + } else { + /* Log has grown to size limit. */ + log_event("auditlog_size_limit_reached", 0); + fclose(fp); + fp = NULL; + error = 1; + return 0; + } + } + if (fp != NULL) return 1; /* Log already open? */ + if (error) return 0; /* Log has already errored? */ + /* Start logging into a memory buffer. The entries will be migrated + ** onto disk when (if) a file system path is provided. + ** (We want the log to be complete even if it is opened after some + ** JIT activity has ocurred.) + */ + if ((fp = open_memstream(&membuffer, &membuffersize)) != NULL) { + lj_auditlog_vm_definitions(); + sizelimit = MAX_MEM_BUFFER; + return 1; + } else { + error = 1; + return 0; + } +} + +/* Open the auditlog at a new path. +** Migrate in-memory log onto file. +** Can only open once. +** Return zero on failure. +*/ +int lj_auditlog_open(const char *path, size_t maxsize) +{ + FILE *newfp; + if (open || error) return 0; /* Sorry, too late... */ + sizelimit = maxsize ? maxsize : DEFAULT_SIZE_LIMIT; + if (!ensure_log_started()) return 0; + newfp = fopen(path, "wb+"); + /* Migrate log entries from memory buffer. */ + fflush(fp); + if (fwrite(membuffer, 1, membuffersize, newfp) != membuffersize) return 0; + fp = newfp; + open = 1; + return 1; +} + +/* -- high-level LuaJIT object logging ------------------------------------ */ + +static void log_GCobj(GCobj *o); + +static void log_jit_State(jit_State *J) +{ + log_mem("BCRecLog[]", J->bclog, J->nbclog * sizeof(*J->bclog)); + log_mem("jit_State", J, sizeof(*J)); +} + +static void log_GCtrace(GCtrace *T) +{ + IRRef ref; + log_mem("MCode[]", T->mcode, T->szmcode); + log_mem("SnapShot[]", T->snap, T->nsnap * sizeof(*T->snap)); + log_mem("SnapEntry[]", T->snapmap, T->nsnapmap * sizeof(*T->snapmap)); + log_mem("IRIns[]", &T->ir[T->nk], (T->nins - T->nk + 1) * sizeof(IRIns)); + log_mem("uint16_t[]", T->szirmcode, T->nszirmcode * sizeof(uint16_t)); + for (ref = T->nk; ref < REF_TRUE; ref++) { + IRIns *ir = &T->ir[ref]; + if (ir->o == IR_KGC) { + GCobj *o = ir_kgc(ir); + /* Log referenced string constants. For e.g. HREFK table keys. */ + switch (o->gch.gct) { + case ~LJ_TSTR: + case ~LJ_TFUNC: + log_GCobj(o); + break; + } + } + if (irt_is64(ir->t) && ir->o != IR_KNULL) { + /* Skip over 64-bit inline operand for this instruction. */ + ref++; + } + } + log_mem("GCtrace", T, sizeof(*T)); +} + +static void log_GCproto(GCproto *pt) +{ + log_GCobj(gcref(pt->chunkname)); + log_mem("GCproto", pt, pt->sizept); /* includes colocated arrays */ +} + +static void log_GCstr(GCstr *s) +{ + log_mem("GCstr", s, sizeof(*s) + s->len); +} + +static void log_GCfunc(GCfunc *f) +{ + log_mem("GCfunc", f, sizeof(*f)); +} + +static void log_GCobj(GCobj *o) +{ + /* Log some kinds of objects (could be fancier...) */ + switch (o->gch.gct) { + case ~LJ_TPROTO: + log_GCproto((GCproto *)o); + break; + case ~LJ_TTRACE: + log_GCtrace((GCtrace *)o); + break; + case ~LJ_TSTR: + log_GCstr((GCstr *)o); + break; + case ~LJ_TFUNC: + log_GCfunc((GCfunc *)o); + } +} + +/* API functions */ + +/* Log a trace that has just been compiled. */ +void lj_auditlog_trace_stop(jit_State *J, GCtrace *T) +{ + if (ensure_log_started()) { + log_GCtrace(T); + log_jit_State(J); + log_event("trace_stop", 2); + str_16("GCtrace"); /* = */ uint_64((uint64_t)T); + str_16("jit_State"); /* = */ uint_64((uint64_t)J); + } +} + +void lj_auditlog_trace_abort(jit_State *J, TraceError e) +{ + if (ensure_log_started()) { + log_jit_State(J); + log_event("trace_abort", 2); + str_16("TraceError"); /* = */ uint_64(e); + str_16("jit_State"); /* = */ uint_64((uint64_t)J); + } +} + +void lj_auditlog_lex(const char *chunkname, const char *s, int sz) +{ + if (ensure_log_started()) { + log_mem("char[]", (void*)s, sz); + log_event("lex", 2); + str_16("chunkname"); /* = */ str_16(chunkname); + str_16("source"); /* = */ bin_32((void*)s, sz); + } +} + +void lj_auditlog_new_prototype(GCproto *pt) +{ + if (ensure_log_started()) { + log_GCproto(pt); + log_event("new_prototype", 1); + str_16("GCproto"); /* = */ uint_64((uint64_t)pt);; + } +} + +void lj_auditlog_trace_flushall(jit_State *J) +{ + if (ensure_log_started()) { + log_jit_State(J); + log_event("trace_flushall", 1); + str_16("jit_State"); /* = */ uint_64((uint64_t)J); + } +} + +void lj_auditlog_new_ctypeid(CTypeID id, const char *desc) +{ + if (ensure_log_started()) { + log_event("new_ctypeid", 2); + str_16("id"); /* = */ uint_64(id); + str_16("desc"); /* = */ str_16(desc); + } +} + diff --git a/src/lj_auditlog.h b/src/lj_auditlog.h new file mode 100644 index 0000000000..3948b1e5d1 --- /dev/null +++ b/src/lj_auditlog.h @@ -0,0 +1,21 @@ +/* +** Audit log. Records JIT/runtime events for offline analysis. +*/ + +#ifndef _LJ_AUDITLOG_H +#define _LJ_AUDITLOG_H + +#include "lj_jit.h" +#include "lj_trace.h" +#include "lj_ctype.h" + +int lj_auditlog_open(const char *path, size_t maxsize); + +void lj_auditlog_new_prototype(GCproto *pt); +void lj_auditlog_lex(const char *chunkname, const char *s, int sz); +void lj_auditlog_trace_flushall(jit_State *J); +void lj_auditlog_trace_stop(jit_State *J, GCtrace *T); +void lj_auditlog_trace_abort(jit_State *J, TraceError e); +void lj_auditlog_new_ctypeid(CTypeID id, const char *desc); + +#endif diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h index fdfc6ec0c6..97b39f190d 100644 --- a/src/lj_bcdump.h +++ b/src/lj_bcdump.h @@ -36,7 +36,7 @@ /* If you perform *any* kind of private modifications to the bytecode itself ** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. */ -#define BCDUMP_VERSION 2 +#define BCDUMP_VERSION 3 /* Compatibility flags. */ #define BCDUMP_F_BE 0x01 diff --git a/src/lj_bcread.c b/src/lj_bcread.c index 48c5e7c7f5..b890016263 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c @@ -13,20 +13,19 @@ #include "lj_str.h" #include "lj_tab.h" #include "lj_bc.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_cdata.h" #include "lualib.h" -#endif #include "lj_lex.h" #include "lj_bcdump.h" #include "lj_state.h" #include "lj_strfmt.h" +#include "lj_auditlog.h" /* Reuse some lexer fields for our own purposes. */ #define bcread_flags(ls) ls->level #define bcread_swap(ls) \ - ((bcread_flags(ls) & BCDUMP_F_BE) != LJ_BE*BCDUMP_F_BE) + ((bcread_flags(ls) & BCDUMP_F_BE) != 0) #define bcread_oldtop(L, ls) restorestack(L, ls->lastline) #define bcread_savetop(L, ls, top) \ ls->lastline = (BCLine)savestack(L, (top)) @@ -153,18 +152,12 @@ static uint32_t bcread_uleb128_33(LexState *ls) /* Read debug info of a prototype. */ static void bcread_dbg(LexState *ls, GCproto *pt, MSize sizedbg) { - void *lineinfo = (void *)proto_lineinfo(pt); - bcread_block(ls, lineinfo, sizedbg); + uint32_t *lineinfo = (uint32_t*)proto_lineinfo(pt); + bcread_block(ls, (void*)lineinfo, sizedbg); /* Swap lineinfo if the endianess differs. */ - if (bcread_swap(ls) && pt->numline >= 256) { - MSize i, n = pt->sizebc-1; - if (pt->numline < 65536) { - uint16_t *p = (uint16_t *)lineinfo; - for (i = 0; i < n; i++) p[i] = (uint16_t)((p[i] >> 8)|(p[i] << 8)); - } else { - uint32_t *p = (uint32_t *)lineinfo; - for (i = 0; i < n; i++) p[i] = lj_bswap(p[i]); - } + if (bcread_swap(ls)) { + int i; + for (i = 0; i < pt->sizebc-1; i++) lineinfo[i] = lj_bswap(lineinfo[i]); } } @@ -172,9 +165,7 @@ static void bcread_dbg(LexState *ls, GCproto *pt, MSize sizedbg) static const void *bcread_varinfo(GCproto *pt) { const uint8_t *p = proto_uvinfo(pt); - MSize n = pt->sizeuv; - if (n) while (*p++ || --n) ; - return p; + return p + pt->sizeuv; } /* Read a single constant key/value of a template table. */ @@ -233,7 +224,6 @@ static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc) setgcref(*kr, obj2gco(lj_str_new(ls->L, p, len))); } else if (tp == BCDUMP_KGC_TAB) { setgcref(*kr, obj2gco(bcread_ktab(ls))); -#if LJ_HASFFI } else if (tp != BCDUMP_KGC_CHILD) { CTypeID id = tp == BCDUMP_KGC_COMPLEX ? CTID_COMPLEX_DOUBLE : tp == BCDUMP_KGC_I64 ? CTID_INT64 : CTID_UINT64; @@ -247,7 +237,6 @@ static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc) p[1].u32.lo = bcread_uleb128(ls); p[1].u32.hi = bcread_uleb128(ls); } -#endif } else { lua_State *L = ls->L; lua_assert(tp == BCDUMP_KGC_CHILD); @@ -310,7 +299,7 @@ GCproto *lj_bcread_proto(LexState *ls) { GCproto *pt; MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; - MSize ofsk, ofsuv, ofsdbg; + MSize ofsk, ofsuv, ofsdbg, ofsdeclname = 0; MSize sizedbg = 0; BCLine firstline = 0, numline = 0; @@ -327,6 +316,7 @@ GCproto *lj_bcread_proto(LexState *ls) if (sizedbg) { firstline = bcread_uleb128(ls); numline = bcread_uleb128(ls); + ofsdeclname = bcread_uleb128(ls); } } @@ -371,16 +361,19 @@ GCproto *lj_bcread_proto(LexState *ls) pt->firstline = firstline; pt->numline = numline; if (sizedbg) { - MSize sizeli = (sizebc-1) << (numline < 256 ? 0 : numline < 65536 ? 1 : 2); + MSize sizeli = (sizebc-1) * sizeof(BCLine); setmref(pt->lineinfo, (char *)pt + ofsdbg); setmref(pt->uvinfo, (char *)pt + ofsdbg + sizeli); + setmref(pt->declname, (char *)pt + ofsdbg + ofsdeclname); bcread_dbg(ls, pt, sizedbg); setmref(pt->varinfo, bcread_varinfo(pt)); } else { setmref(pt->lineinfo, NULL); setmref(pt->uvinfo, NULL); setmref(pt->varinfo, NULL); + setmref(pt->declname, NULL); } + lj_auditlog_new_prototype(pt); return pt; } @@ -396,16 +389,12 @@ static int bcread_header(LexState *ls) if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0; if ((flags & BCDUMP_F_FFI)) { -#if LJ_HASFFI lua_State *L = ls->L; if (!ctype_ctsG(G(L))) { ptrdiff_t oldtop = savestack(L, L->top); luaopen_ffi(L); /* Load FFI library on-demand. */ L->top = restorestack(L, oldtop); } -#else - return 0; -#endif } if ((flags & BCDUMP_F_STRIP)) { ls->chunkname = lj_str_newz(ls->L, ls->chunkarg); diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index 5e05caeaf5..92f3dd92a3 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c @@ -10,13 +10,9 @@ #include "lj_gc.h" #include "lj_buf.h" #include "lj_bc.h" -#if LJ_HASFFI #include "lj_ctype.h" -#endif -#if LJ_HASJIT #include "lj_dispatch.h" #include "lj_jit.h" -#endif #include "lj_strfmt.h" #include "lj_bcdump.h" #include "lj_vm.h" @@ -43,11 +39,8 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) p = lj_buf_more(&ctx->sb, 5+len); p = lj_strfmt_wuleb128(p, BCDUMP_KTAB_STR+len); p = lj_buf_wmem(p, strdata(str), len); - } else if (tvisint(o)) { - *p++ = BCDUMP_KTAB_INT; - p = lj_strfmt_wuleb128(p, intV(o)); } else if (tvisnum(o)) { - if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ + if (narrow) { /* Narrow number constants to integers. */ lua_Number num = numV(o); int32_t k = lj_num2int(num); if (num == (lua_Number)k) { /* -0 is never a constant. */ @@ -123,7 +116,6 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) } else if (o->gch.gct == ~LJ_TPROTO) { lua_assert((pt->flags & PROTO_CHILD)); tp = BCDUMP_KGC_CHILD; -#if LJ_HASFFI } else if (o->gch.gct == ~LJ_TCDATA) { CTypeID id = gco2cd(o)->ctypeid; need = 1+4*5; @@ -135,7 +127,6 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) lua_assert(id == CTID_COMPLEX_DOUBLE); tp = BCDUMP_KGC_COMPLEX; } -#endif } else { lua_assert(o->gch.gct == ~LJ_TTAB); tp = BCDUMP_KGC_TAB; @@ -150,7 +141,6 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) } else if (tp == BCDUMP_KGC_TAB) { bcwrite_ktab(ctx, p, gco2tab(o)); continue; -#if LJ_HASFFI } else if (tp != BCDUMP_KGC_CHILD) { cTValue *q = (TValue *)cdataptr(gco2cd(o)); p = lj_strfmt_wuleb128(p, q[0].u32.lo); @@ -159,7 +149,6 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) p = lj_strfmt_wuleb128(p, q[1].u32.lo); p = lj_strfmt_wuleb128(p, q[1].u32.hi); } -#endif } setsbufP(&ctx->sb, p); } @@ -173,27 +162,19 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) char *p = lj_buf_more(&ctx->sb, 10*sizekn); for (i = 0; i < sizekn; i++, o++) { int32_t k; - if (tvisint(o)) { - k = intV(o); - goto save_int; - } else { - /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */ - if (!LJ_DUALNUM) { /* Narrow number constants to integers. */ - lua_Number num = numV(o); - k = lj_num2int(num); - if (num == (lua_Number)k) { /* -0 is never a constant. */ - save_int: - p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u)); - if (k < 0) - p[-1] = (p[-1] & 7) | ((k>>27) & 0x18); - continue; - } - } - p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); - if (o->u32.lo >= 0x80000000u) - p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18); - p = lj_strfmt_wuleb128(p, o->u32.hi); + /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */ + lua_Number num = numV(o); + k = lj_num2int(num); + if (num == (lua_Number)k) { /* -0 is never a constant. */ + p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u)); + if (k < 0) + p[-1] = (p[-1] & 7) | ((k>>27) & 0x18); + continue; } + p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); + if (o->u32.lo >= 0x80000000u) + p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18); + p = lj_strfmt_wuleb128(p, o->u32.hi); } setsbufP(&ctx->sb, p); } @@ -202,12 +183,9 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt) { MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */ -#if LJ_HASJIT uint8_t *q = (uint8_t *)p; -#endif p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); UNUSED(ctx); -#if LJ_HASJIT /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */ if ((pt->flags & PROTO_ILOOP) || pt->trace) { jit_State *J = L2J(sbufL(&ctx->sb)); @@ -226,14 +204,13 @@ static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt) } } } -#endif return p; } /* Write prototype. */ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) { - MSize sizedbg = 0; + MSize sizedbg = 0, ofsdeclname = 0; char *p; /* Recursively write children of prototype. */ @@ -261,12 +238,15 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) p = lj_strfmt_wuleb128(p, pt->sizekn); p = lj_strfmt_wuleb128(p, pt->sizebc-1); if (!ctx->strip) { - if (proto_lineinfo(pt)) + if (proto_lineinfo(pt)) { sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); + ofsdeclname = (MSize)((char*)proto_declname(pt) - (char *)proto_lineinfo(pt)); + } p = lj_strfmt_wuleb128(p, sizedbg); if (sizedbg) { p = lj_strfmt_wuleb128(p, pt->firstline); p = lj_strfmt_wuleb128(p, pt->numline); + p = lj_strfmt_wuleb128(p, ofsdeclname); } } @@ -309,7 +289,6 @@ static void bcwrite_header(BCWriteCtx *ctx) *p++ = BCDUMP_HEAD3; *p++ = BCDUMP_VERSION; *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) + - LJ_BE*BCDUMP_F_BE + ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) + LJ_FR2*BCDUMP_F_FR2; if (!ctx->strip) { diff --git a/src/lj_buf.c b/src/lj_buf.c index 0dfe7f9807..4ecf6fc620 100644 --- a/src/lj_buf.c +++ b/src/lj_buf.c @@ -28,7 +28,7 @@ static void buf_grow(SBuf *sb, MSize sz) setmref(sb->e, b + nsz); } -LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz) +LJ_NOINLINE char *lj_buf_need2(SBuf *sb, MSize sz) { lua_assert(sz > sbufsz(sb)); if (LJ_UNLIKELY(sz > LJ_MAX_BUF)) @@ -37,7 +37,7 @@ LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz) return sbufB(sb); } -LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz) +LJ_NOINLINE char *lj_buf_more2(SBuf *sb, MSize sz) { MSize len = sbuflen(sb); lua_assert(sz > sbufleft(sb)); @@ -47,7 +47,7 @@ LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz) return sbufP(sb); } -void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb) +void lj_buf_shrink(lua_State *L, SBuf *sb) { char *b = sbufB(sb); MSize osz = (MSize)(sbufE(sb) - b); @@ -60,7 +60,7 @@ void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb) } } -char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz) +char * lj_buf_tmp(lua_State *L, MSize sz) { SBuf *sb = &G(L)->tmpbuf; setsbufL(sb, L); @@ -77,7 +77,7 @@ SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len) return sb; } -SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c) +SBuf * lj_buf_putchar(SBuf *sb, int c) { char *p = lj_buf_more(sb, 1); *p++ = (char)c; @@ -85,7 +85,7 @@ SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c) return sb; } -SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s) +SBuf * lj_buf_putstr(SBuf *sb, GCstr *s) { MSize len = s->len; char *p = lj_buf_more(sb, len); @@ -96,7 +96,7 @@ SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s) /* -- High-level buffer put operations ------------------------------------ */ -SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s) +SBuf * lj_buf_putstr_reverse(SBuf *sb, GCstr *s) { MSize len = s->len; char *p = lj_buf_more(sb, len), *e = p+len; @@ -107,37 +107,29 @@ SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s) return sb; } -SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s) +SBuf * lj_buf_putstr_lower(SBuf *sb, GCstr *s) { MSize len = s->len; char *p = lj_buf_more(sb, len), *e = p+len; const char *q = strdata(s); for (; p < e; p++, q++) { uint32_t c = *(unsigned char *)q; -#if LJ_TARGET_PPC - *p = c + ((c >= 'A' && c <= 'Z') << 5); -#else if (c >= 'A' && c <= 'Z') c += 0x20; *p = c; -#endif } setsbufP(sb, p); return sb; } -SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s) +SBuf * lj_buf_putstr_upper(SBuf *sb, GCstr *s) { MSize len = s->len; char *p = lj_buf_more(sb, len), *e = p+len; const char *q = strdata(s); for (; p < e; p++, q++) { uint32_t c = *(unsigned char *)q; -#if LJ_TARGET_PPC - *p = c - ((c >= 'a' && c <= 'z') << 5); -#else if (c >= 'a' && c <= 'z') c -= 0x20; *p = c; -#endif } setsbufP(sb, p); return sb; @@ -181,8 +173,6 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e) } else if (tvisstr(o)) { MSize len = strV(o)->len; p = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len); - } else if (tvisint(o)) { - p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o)); } else if (tvisnum(o)) { p = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen); } else { @@ -201,7 +191,7 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e) /* -- Miscellaneous buffer operations ------------------------------------- */ -GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb) +GCstr * lj_buf_tostr(SBuf *sb) { return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb)); } @@ -217,7 +207,7 @@ GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2) } /* Read ULEB128 from buffer. */ -uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp) +uint32_t lj_buf_ruleb128(const char **pp) { const uint8_t *p = (const uint8_t *)*pp; uint32_t v = *p++; diff --git a/src/lj_buf.h b/src/lj_buf.h index a405169444..ab858f240d 100644 --- a/src/lj_buf.h +++ b/src/lj_buf.h @@ -22,10 +22,10 @@ #define setsbufL(sb, l) (setmref((sb)->L, (l))) /* Buffer management */ -LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz); -LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz); -LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb); -LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz); +LJ_FUNC char *lj_buf_need2(SBuf *sb, MSize sz); +LJ_FUNC char *lj_buf_more2(SBuf *sb, MSize sz); +LJ_FUNC void lj_buf_shrink(lua_State *L, SBuf *sb); +LJ_FUNC char * lj_buf_tmp(lua_State *L, MSize sz); static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb) { @@ -67,8 +67,8 @@ static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz) /* Low-level buffer put operations */ LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len); -LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c); -LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s); +LJ_FUNC SBuf * lj_buf_putchar(SBuf *sb, int c); +LJ_FUNC SBuf * lj_buf_putstr(SBuf *sb, GCstr *s); static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len) { @@ -83,17 +83,17 @@ static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c) } /* High-level buffer put operations */ -LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s); -LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s); -LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s); +LJ_FUNCA SBuf * lj_buf_putstr_reverse(SBuf *sb, GCstr *s); +LJ_FUNCA SBuf * lj_buf_putstr_lower(SBuf *sb, GCstr *s); +LJ_FUNCA SBuf * lj_buf_putstr_upper(SBuf *sb, GCstr *s); LJ_FUNC SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep); LJ_FUNC SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e); /* Miscellaneous buffer operations */ -LJ_FUNCA GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb); +LJ_FUNCA GCstr * lj_buf_tostr(SBuf *sb); LJ_FUNC GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2); -LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp); +LJ_FUNC uint32_t lj_buf_ruleb128(const char **pp); static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb) { diff --git a/src/lj_carith.c b/src/lj_carith.c index 218abd260f..91f3d5a415 100644 --- a/src/lj_carith.c +++ b/src/lj_carith.c @@ -5,7 +5,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_gc.h" #include "lj_err.h" @@ -51,9 +50,6 @@ static int carith_checkarg(lua_State *L, CTState *cts, CDArith *ca) if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct); ca->ct[i] = ct; ca->p[i] = p; - } else if (tvisint(o)) { - ca->ct[i] = ctype_get(cts, CTID_INT32); - ca->p[i] = (uint8_t *)&o->i; } else if (tvisnum(o)) { ca->ct[i] = ctype_get(cts, CTID_DOUBLE); ca->p[i] = (uint8_t *)&o->n; @@ -276,14 +272,8 @@ int lj_carith_op(lua_State *L, MMS mm) /* -- 64 bit bit operations helpers --------------------------------------- */ -#if LJ_64 #define B64DEF(name) \ static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh) -#else -/* Not inlined on 32 bit archs, since some of these are quite lengthy. */ -#define B64DEF(name) \ - uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh) -#endif B64DEF(shl64) { return x << (sh&63); } B64DEF(shr64) { return x >> (sh&63); } @@ -338,25 +328,13 @@ uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id) } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) { goto err; } - if (LJ_LIKELY(tvisint(o))) { - return (uint32_t)intV(o); - } else { - int32_t i = lj_num2bit(numV(o)); - if (LJ_DUALNUM) setintV(o, i); - return (uint32_t)i; - } + int32_t i = lj_num2bit(numV(o)); + return (uint32_t)i; } /* -- 64 bit integer arithmetic helpers ----------------------------------- */ -#if LJ_32 && LJ_HASJIT -/* Signed/unsigned 64 bit multiplication. */ -int64_t lj_carith_mul64(int64_t a, int64_t b) -{ - return a * b; -} -#endif /* Unsigned 64 bit division. */ uint64_t lj_carith_divu64(uint64_t a, uint64_t b) @@ -426,4 +404,3 @@ int64_t lj_carith_powi64(int64_t x, int64_t k) return (int64_t)lj_carith_powu64((uint64_t)x, (uint64_t)k); } -#endif diff --git a/src/lj_carith.h b/src/lj_carith.h index 67d976bf0c..41c73ffe65 100644 --- a/src/lj_carith.h +++ b/src/lj_carith.h @@ -8,23 +8,12 @@ #include "lj_obj.h" -#if LJ_HASFFI LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); -#if LJ_32 -LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh); -LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh); -LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh); -LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh); -LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh); -#endif LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op); LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id); -#if LJ_32 && LJ_HASJIT -LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); -#endif LJ_FUNC uint64_t lj_carith_divu64(uint64_t a, uint64_t b); LJ_FUNC int64_t lj_carith_divi64(int64_t a, int64_t b); LJ_FUNC uint64_t lj_carith_modu64(uint64_t a, uint64_t b); @@ -32,6 +21,5 @@ LJ_FUNC int64_t lj_carith_modi64(int64_t a, int64_t b); LJ_FUNC uint64_t lj_carith_powu64(uint64_t x, uint64_t k); LJ_FUNC int64_t lj_carith_powi64(int64_t x, int64_t k); -#endif #endif diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 2b7ca36456..948a1f27bd 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -5,7 +5,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_gc.h" #include "lj_err.h" @@ -17,119 +16,6 @@ #include "lj_trace.h" /* Target-specific handling of register arguments. */ -#if LJ_TARGET_X86 -/* -- x86 calling conventions --------------------------------------------- */ - -#if LJ_ABI_WIN - -#define CCALL_HANDLE_STRUCTRET \ - /* Return structs bigger than 8 by reference (on stack only). */ \ - cc->retref = (sz > 8); \ - if (cc->retref) cc->stack[nsp++] = (GPRArg)dp; - -#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET - -#else - -#if LJ_TARGET_OSX - -#define CCALL_HANDLE_STRUCTRET \ - /* Return structs of size 1, 2, 4 or 8 in registers. */ \ - cc->retref = !(sz == 1 || sz == 2 || sz == 4 || sz == 8); \ - if (cc->retref) { \ - if (ngpr < maxgpr) \ - cc->gpr[ngpr++] = (GPRArg)dp; \ - else \ - cc->stack[nsp++] = (GPRArg)dp; \ - } else { /* Struct with single FP field ends up in FPR. */ \ - cc->resx87 = ccall_classify_struct(cts, ctr); \ - } - -#define CCALL_HANDLE_STRUCTRET2 \ - if (cc->resx87) sp = (uint8_t *)&cc->fpr[0]; \ - memcpy(dp, sp, ctr->size); - -#else - -#define CCALL_HANDLE_STRUCTRET \ - cc->retref = 1; /* Return all structs by reference (in reg or on stack). */ \ - if (ngpr < maxgpr) \ - cc->gpr[ngpr++] = (GPRArg)dp; \ - else \ - cc->stack[nsp++] = (GPRArg)dp; - -#endif - -#define CCALL_HANDLE_COMPLEXRET \ - /* Return complex float in GPRs and complex double by reference. */ \ - cc->retref = (sz > 8); \ - if (cc->retref) { \ - if (ngpr < maxgpr) \ - cc->gpr[ngpr++] = (GPRArg)dp; \ - else \ - cc->stack[nsp++] = (GPRArg)dp; \ - } - -#endif - -#define CCALL_HANDLE_COMPLEXRET2 \ - if (!cc->retref) \ - *(int64_t *)dp = *(int64_t *)sp; /* Copy complex float from GPRs. */ - -#define CCALL_HANDLE_STRUCTARG \ - ngpr = maxgpr; /* Pass all structs by value on the stack. */ - -#define CCALL_HANDLE_COMPLEXARG \ - isfp = 1; /* Pass complex by value on stack. */ - -#define CCALL_HANDLE_REGARG \ - if (!isfp) { /* Only non-FP values may be passed in registers. */ \ - if (n > 1) { /* Anything > 32 bit is passed on the stack. */ \ - if (!LJ_ABI_WIN) ngpr = maxgpr; /* Prevent reordering. */ \ - } else if (ngpr + 1 <= maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } \ - } - -#elif LJ_TARGET_X64 && LJ_ABI_WIN -/* -- Windows/x64 calling conventions ------------------------------------- */ - -#define CCALL_HANDLE_STRUCTRET \ - /* Return structs of size 1, 2, 4 or 8 in a GPR. */ \ - cc->retref = !(sz == 1 || sz == 2 || sz == 4 || sz == 8); \ - if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; - -#define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET - -#define CCALL_HANDLE_COMPLEXRET2 \ - if (!cc->retref) \ - *(int64_t *)dp = *(int64_t *)sp; /* Copy complex float from GPRs. */ - -#define CCALL_HANDLE_STRUCTARG \ - /* Pass structs of size 1, 2, 4 or 8 in a GPR by value. */ \ - if (!(sz == 1 || sz == 2 || sz == 4 || sz == 8)) { \ - rp = cdataptr(lj_cdata_new(cts, did, sz)); \ - sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \ - } - -#define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex float in a GPR and complex double by reference. */ \ - if (sz != 2*sizeof(float)) { \ - rp = cdataptr(lj_cdata_new(cts, did, sz)); \ - sz = CTSIZE_PTR; \ - } - -/* Windows/x64 argument registers are strictly positional (use ngpr). */ -#define CCALL_HANDLE_REGARG \ - if (isfp) { \ - if (ngpr < maxgpr) { dp = &cc->fpr[ngpr++]; nfpr = ngpr; goto done; } \ - } else { \ - if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \ - } - -#elif LJ_TARGET_X64 /* -- POSIX/x64 calling conventions --------------------------------------- */ #define CCALL_HANDLE_STRUCTRET \ @@ -187,377 +73,6 @@ } \ } -#elif LJ_TARGET_ARM -/* -- ARM calling conventions --------------------------------------------- */ - -#if LJ_ABI_SOFTFP - -#define CCALL_HANDLE_STRUCTRET \ - /* Return structs of size <= 4 in a GPR. */ \ - cc->retref = !(sz <= 4); \ - if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; - -#define CCALL_HANDLE_COMPLEXRET \ - cc->retref = 1; /* Return all complex values by reference. */ \ - cc->gpr[ngpr++] = (GPRArg)dp; - -#define CCALL_HANDLE_COMPLEXRET2 \ - UNUSED(dp); /* Nothing to do. */ - -#define CCALL_HANDLE_STRUCTARG \ - /* Pass all structs by value in registers and/or on the stack. */ - -#define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex by value in 2 or 4 GPRs. */ - -#define CCALL_HANDLE_REGARG_FP1 -#define CCALL_HANDLE_REGARG_FP2 - -#else - -#define CCALL_HANDLE_STRUCTRET \ - cc->retref = !ccall_classify_struct(cts, ctr, ct); \ - if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; - -#define CCALL_HANDLE_STRUCTRET2 \ - if (ccall_classify_struct(cts, ctr, ct) > 1) sp = (uint8_t *)&cc->fpr[0]; \ - memcpy(dp, sp, ctr->size); - -#define CCALL_HANDLE_COMPLEXRET \ - if (!(ct->info & CTF_VARARG)) cc->retref = 0; /* Return complex in FPRs. */ - -#define CCALL_HANDLE_COMPLEXRET2 \ - if (!(ct->info & CTF_VARARG)) memcpy(dp, &cc->fpr[0], ctr->size); - -#define CCALL_HANDLE_STRUCTARG \ - isfp = (ccall_classify_struct(cts, d, ct) > 1); - /* Pass all structs by value in registers and/or on the stack. */ - -#define CCALL_HANDLE_COMPLEXARG \ - isfp = 1; /* Pass complex by value in FPRs or on stack. */ - -#define CCALL_HANDLE_REGARG_FP1 \ - if (isfp && !(ct->info & CTF_VARARG)) { \ - if ((d->info & CTF_ALIGN) > CTALIGN_PTR) { \ - if (nfpr + (n >> 1) <= CCALL_NARG_FPR) { \ - dp = &cc->fpr[nfpr]; \ - nfpr += (n >> 1); \ - goto done; \ - } \ - } else { \ - if (sz > 1 && fprodd != nfpr) fprodd = 0; \ - if (fprodd) { \ - if (2*nfpr+n <= 2*CCALL_NARG_FPR+1) { \ - dp = (void *)&cc->fpr[fprodd-1].f[1]; \ - nfpr += (n >> 1); \ - if ((n & 1)) fprodd = 0; else fprodd = nfpr-1; \ - goto done; \ - } \ - } else { \ - if (2*nfpr+n <= 2*CCALL_NARG_FPR) { \ - dp = (void *)&cc->fpr[nfpr]; \ - nfpr += (n >> 1); \ - if ((n & 1)) fprodd = ++nfpr; else fprodd = 0; \ - goto done; \ - } \ - } \ - } \ - fprodd = 0; /* No reordering after the first FP value is on stack. */ \ - } else { - -#define CCALL_HANDLE_REGARG_FP2 } - -#endif - -#define CCALL_HANDLE_REGARG \ - CCALL_HANDLE_REGARG_FP1 \ - if ((d->info & CTF_ALIGN) > CTALIGN_PTR) { \ - if (ngpr < maxgpr) \ - ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - } \ - if (ngpr < maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ - ngpr = maxgpr; \ - } else { \ - ngpr += n; \ - } \ - goto done; \ - } CCALL_HANDLE_REGARG_FP2 - -#define CCALL_HANDLE_RET \ - if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; - -#elif LJ_TARGET_ARM64 -/* -- ARM64 calling conventions ------------------------------------------- */ - -#define CCALL_HANDLE_STRUCTRET \ - cc->retref = !ccall_classify_struct(cts, ctr); \ - if (cc->retref) cc->retp = dp; - -#define CCALL_HANDLE_STRUCTRET2 \ - unsigned int cl = ccall_classify_struct(cts, ctr); \ - if ((cl & 4)) { /* Combine float HFA from separate registers. */ \ - CTSize i = (cl >> 8) - 1; \ - do { ((uint32_t *)dp)[i] = cc->fpr[i].u32; } while (i--); \ - } else { \ - if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \ - memcpy(dp, sp, ctr->size); \ - } - -#define CCALL_HANDLE_COMPLEXRET \ - /* Complex values are returned in one or two FPRs. */ \ - cc->retref = 0; - -#define CCALL_HANDLE_COMPLEXRET2 \ - if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ - ((float *)dp)[0] = cc->fpr[0].f; \ - ((float *)dp)[1] = cc->fpr[1].f; \ - } else { /* Copy complex double from FPRs. */ \ - ((double *)dp)[0] = cc->fpr[0].d; \ - ((double *)dp)[1] = cc->fpr[1].d; \ - } - -#define CCALL_HANDLE_STRUCTARG \ - unsigned int cl = ccall_classify_struct(cts, d); \ - if (cl == 0) { /* Pass struct by reference. */ \ - rp = cdataptr(lj_cdata_new(cts, did, sz)); \ - sz = CTSIZE_PTR; \ - } else if (cl > 1) { /* Pass struct in FPRs or on stack. */ \ - isfp = (cl & 4) ? 2 : 1; \ - } /* else: Pass struct in GPRs or on stack. */ - -#define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex by value in separate (!) FPRs or on stack. */ \ - isfp = sz == 2*sizeof(float) ? 2 : 1; - -#define CCALL_HANDLE_REGARG \ - if (LJ_TARGET_IOS && isva) { \ - /* IOS: All variadic arguments are on the stack. */ \ - } else if (isfp) { /* Try to pass argument in FPRs. */ \ - int n2 = ctype_isvector(d->info) ? 1 : n*isfp; \ - if (nfpr + n2 <= CCALL_NARG_FPR) { \ - dp = &cc->fpr[nfpr]; \ - nfpr += n2; \ - goto done; \ - } else { \ - nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ - if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \ - } \ - } else { /* Try to pass argument in GPRs. */ \ - if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \ - ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - if (ngpr + n <= maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } else { \ - ngpr = maxgpr; /* Prevent reordering. */ \ - if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \ - } \ - } - -#elif LJ_TARGET_PPC -/* -- PPC calling conventions --------------------------------------------- */ - -#define CCALL_HANDLE_STRUCTRET \ - cc->retref = 1; /* Return all structs by reference. */ \ - cc->gpr[ngpr++] = (GPRArg)dp; - -#define CCALL_HANDLE_COMPLEXRET \ - /* Complex values are returned in 2 or 4 GPRs. */ \ - cc->retref = 0; - -#define CCALL_HANDLE_COMPLEXRET2 \ - memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ - -#define CCALL_HANDLE_STRUCTARG \ - rp = cdataptr(lj_cdata_new(cts, did, sz)); \ - sz = CTSIZE_PTR; /* Pass all structs by reference. */ - -#define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex by value in 2 or 4 GPRs. */ - -#define CCALL_HANDLE_REGARG \ - if (isfp) { /* Try to pass argument in FPRs. */ \ - if (nfpr + 1 <= CCALL_NARG_FPR) { \ - dp = &cc->fpr[nfpr]; \ - nfpr += 1; \ - d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ - goto done; \ - } \ - } else { /* Try to pass argument in GPRs. */ \ - if (n > 1) { \ - lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ - if (ctype_isinteger(d->info)) \ - ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ - else if (ngpr + n > maxgpr) \ - ngpr = maxgpr; /* Prevent reordering. */ \ - } \ - if (ngpr + n <= maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } \ - } - -#define CCALL_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ - -#elif LJ_TARGET_MIPS32 -/* -- MIPS o32 calling conventions ---------------------------------------- */ - -#define CCALL_HANDLE_STRUCTRET \ - cc->retref = 1; /* Return all structs by reference. */ \ - cc->gpr[ngpr++] = (GPRArg)dp; - -#define CCALL_HANDLE_COMPLEXRET \ - /* Complex values are returned in 1 or 2 FPRs. */ \ - cc->retref = 0; - -#if LJ_ABI_SOFTFP -#define CCALL_HANDLE_COMPLEXRET2 \ - if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \ - ((intptr_t *)dp)[0] = cc->gpr[0]; \ - ((intptr_t *)dp)[1] = cc->gpr[1]; \ - } else { /* Copy complex double from GPRs. */ \ - ((intptr_t *)dp)[0] = cc->gpr[0]; \ - ((intptr_t *)dp)[1] = cc->gpr[1]; \ - ((intptr_t *)dp)[2] = cc->gpr[2]; \ - ((intptr_t *)dp)[3] = cc->gpr[3]; \ - } -#else -#define CCALL_HANDLE_COMPLEXRET2 \ - if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ - ((float *)dp)[0] = cc->fpr[0].f; \ - ((float *)dp)[1] = cc->fpr[1].f; \ - } else { /* Copy complex double from FPRs. */ \ - ((double *)dp)[0] = cc->fpr[0].d; \ - ((double *)dp)[1] = cc->fpr[1].d; \ - } -#endif - -#define CCALL_HANDLE_STRUCTARG \ - /* Pass all structs by value in registers and/or on the stack. */ - -#define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex by value in 2 or 4 GPRs. */ - -#define CCALL_HANDLE_GPR \ - if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \ - ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - if (ngpr < maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ - ngpr = maxgpr; \ - } else { \ - ngpr += n; \ - } \ - goto done; \ - } - -#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */ -#define CCALL_HANDLE_REGARG \ - if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \ - /* Try to pass argument in FPRs. */ \ - dp = n == 1 ? (void *)&cc->fpr[nfpr].f : (void *)&cc->fpr[nfpr].d; \ - nfpr++; ngpr += n; \ - goto done; \ - } else { /* Try to pass argument in GPRs. */ \ - nfpr = CCALL_NARG_FPR; \ - CCALL_HANDLE_GPR \ - } -#else /* MIPS32 soft-float */ -#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR -#endif - -#if !LJ_ABI_SOFTFP -/* On MIPS64 soft-float, position of float return values is endian-dependant. */ -#define CCALL_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - sp = (uint8_t *)&cc->fpr[0].f; -#endif - -#elif LJ_TARGET_MIPS64 -/* -- MIPS n64 calling conventions ---------------------------------------- */ - -#define CCALL_HANDLE_STRUCTRET \ - cc->retref = !(sz <= 16); \ - if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; - -#define CCALL_HANDLE_STRUCTRET2 \ - ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct)); - -#define CCALL_HANDLE_COMPLEXRET \ - /* Complex values are returned in 1 or 2 FPRs. */ \ - cc->retref = 0; - -#if LJ_ABI_SOFTFP /* MIPS64 soft-float */ - -#define CCALL_HANDLE_COMPLEXRET2 \ - if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \ - ((intptr_t *)dp)[0] = cc->gpr[0]; \ - } else { /* Copy complex double from GPRs. */ \ - ((intptr_t *)dp)[0] = cc->gpr[0]; \ - ((intptr_t *)dp)[1] = cc->gpr[1]; \ - } - -#define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex by value in 2 or 4 GPRs. */ - -/* Position of soft-float 'float' return value depends on endianess. */ -#define CCALL_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4); - -#else /* MIPS64 hard-float */ - -#define CCALL_HANDLE_COMPLEXRET2 \ - if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ - ((float *)dp)[0] = cc->fpr[0].f; \ - ((float *)dp)[1] = cc->fpr[1].f; \ - } else { /* Copy complex double from FPRs. */ \ - ((double *)dp)[0] = cc->fpr[0].d; \ - ((double *)dp)[1] = cc->fpr[1].d; \ - } - -#define CCALL_HANDLE_COMPLEXARG \ - if (sz == 2*sizeof(float)) { \ - isfp = 2; \ - if (ngpr < maxgpr) \ - sz *= 2; \ - } - -#define CCALL_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - sp = (uint8_t *)&cc->fpr[0].f; - -#endif - -#define CCALL_HANDLE_STRUCTARG \ - /* Pass all structs by value in registers and/or on the stack. */ - -#define CCALL_HANDLE_REGARG \ - if (ngpr < maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ - ngpr = maxgpr; \ - } else { \ - ngpr += n; \ - } \ - goto done; \ - } - -#else -#error "Missing calling convention definitions for this architecture" -#endif #ifndef CCALL_HANDLE_STRUCTRET2 #define CCALL_HANDLE_STRUCTRET2 \ @@ -566,43 +81,9 @@ /* -- x86 OSX ABI struct classification ----------------------------------- */ -#if LJ_TARGET_X86 && LJ_TARGET_OSX - -/* Check for struct with single FP field. */ -static int ccall_classify_struct(CTState *cts, CType *ct) -{ - CTSize sz = ct->size; - if (!(sz == sizeof(float) || sz == sizeof(double))) return 0; - if ((ct->info & CTF_UNION)) return 0; - while (ct->sib) { - ct = ctype_get(cts, ct->sib); - if (ctype_isfield(ct->info)) { - CType *sct = ctype_rawchild(cts, ct); - if (ctype_isfp(sct->info)) { - if (sct->size == sz) - return (sz >> 2); /* Return 1 for float or 2 for double. */ - } else if (ctype_isstruct(sct->info)) { - if (sct->size) - return ccall_classify_struct(cts, sct); - } else { - break; - } - } else if (ctype_isbitfield(ct->info)) { - break; - } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { - CType *sct = ctype_rawchild(cts, ct); - if (sct->size) - return ccall_classify_struct(cts, sct); - } - } - return 0; -} - -#endif /* -- x64 struct classification ------------------------------------------- */ -#if LJ_TARGET_X64 && !LJ_ABI_WIN /* Register classes for x64 struct classification. */ #define CCALL_RCL_INT 1 @@ -699,172 +180,15 @@ static void ccall_struct_ret(CCallState *cc, int *rcl, uint8_t *dp, CTSize sz) } memcpy(dp, sp, sz); } -#endif /* -- ARM hard-float ABI struct classification ---------------------------- */ -#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP - -/* Classify a struct based on its fields. */ -static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf) -{ - CTSize sz = ct->size; - unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); - if ((ctf->info & CTF_VARARG)) goto noth; - while (ct->sib) { - CType *sct; - ct = ctype_get(cts, ct->sib); - if (ctype_isfield(ct->info)) { - sct = ctype_rawchild(cts, ct); - if (ctype_isfp(sct->info)) { - r |= sct->size; - if (!isu) n++; else if (n == 0) n = 1; - } else if (ctype_iscomplex(sct->info)) { - r |= (sct->size >> 1); - if (!isu) n += 2; else if (n < 2) n = 2; - } else if (ctype_isstruct(sct->info)) { - goto substruct; - } else { - goto noth; - } - } else if (ctype_isbitfield(ct->info)) { - goto noth; - } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { - sct = ctype_rawchild(cts, ct); - substruct: - if (sct->size > 0) { - unsigned int s = ccall_classify_struct(cts, sct, ctf); - if (s <= 1) goto noth; - r |= (s & 255); - if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); - } - } - } - if ((r == 4 || r == 8) && n <= 4) - return r + (n << 8); -noth: /* Not a homogeneous float/double aggregate. */ - return (sz <= 4); /* Return structs of size <= 4 in a GPR. */ -} - -#endif /* -- ARM64 ABI struct classification ------------------------------------- */ -#if LJ_TARGET_ARM64 - -/* Classify a struct based on its fields. */ -static unsigned int ccall_classify_struct(CTState *cts, CType *ct) -{ - CTSize sz = ct->size; - unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); - while (ct->sib) { - CType *sct; - ct = ctype_get(cts, ct->sib); - if (ctype_isfield(ct->info)) { - sct = ctype_rawchild(cts, ct); - if (ctype_isfp(sct->info)) { - r |= sct->size; - if (!isu) n++; else if (n == 0) n = 1; - } else if (ctype_iscomplex(sct->info)) { - r |= (sct->size >> 1); - if (!isu) n += 2; else if (n < 2) n = 2; - } else if (ctype_isstruct(sct->info)) { - goto substruct; - } else { - goto noth; - } - } else if (ctype_isbitfield(ct->info)) { - goto noth; - } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { - sct = ctype_rawchild(cts, ct); - substruct: - if (sct->size > 0) { - unsigned int s = ccall_classify_struct(cts, sct); - if (s <= 1) goto noth; - r |= (s & 255); - if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); - } - } - } - if ((r == 4 || r == 8) && n <= 4) - return r + (n << 8); -noth: /* Not a homogeneous float/double aggregate. */ - return (sz <= 16); /* Return structs of size <= 16 in GPRs. */ -} - -#endif /* -- MIPS64 ABI struct classification ---------------------------- */ -#if LJ_TARGET_MIPS64 - -#define FTYPE_FLOAT 1 -#define FTYPE_DOUBLE 2 - -/* Classify FP fields (max. 2) and their types. */ -static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf) -{ - int n = 0, ft = 0; - if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION)) - goto noth; - while (ct->sib) { - CType *sct; - ct = ctype_get(cts, ct->sib); - if (n == 2) { - goto noth; - } else if (ctype_isfield(ct->info)) { - sct = ctype_rawchild(cts, ct); - if (ctype_isfp(sct->info)) { - ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n; - n++; - } else { - goto noth; - } - } else if (ctype_isbitfield(ct->info) || - ctype_isxattrib(ct->info, CTA_SUBTYPE)) { - goto noth; - } - } - if (n <= 2) - return ft; -noth: /* Not a homogeneous float/double aggregate. */ - return 0; /* Struct is in GPRs. */ -} - -void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, int ft) -{ - if (LJ_ABI_SOFTFP ? ft : - ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) { - int i, ofs = 0; - for (i = 0; ft != 0; i++, ft >>= 2) { - if ((ft & 3) == FTYPE_FLOAT) { -#if LJ_ABI_SOFTFP - /* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */ - memcpy((uint8_t *)dp + ofs, - (uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4); -#else - *(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f; -#endif - ofs += 4; - } else { - ofs = (ofs + 7) & ~7; /* 64 bit alignment. */ -#if LJ_ABI_SOFTFP - *(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i]; -#else - *(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d; -#endif - ofs += 8; - } - } - } else { -#if !LJ_ABI_SOFTFP - if (ft) sp = (uint8_t *)&cc->fpr[0]; -#endif - memcpy(dp, sp, ctr->size); - } -} - -#endif /* -- Common C call handling ---------------------------------------------- */ @@ -907,9 +231,6 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, MSize maxgpr, ngpr = 0, nsp = 0, narg; #if CCALL_NARG_FPR MSize nfpr = 0; -#if LJ_TARGET_ARM - MSize fprodd = 0; -#endif #endif /* Clear unused regs to get some determinism in case of misdeclaration. */ @@ -918,17 +239,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, memset(cc->fpr, 0, sizeof(cc->fpr)); #endif -#if LJ_TARGET_X86 - /* x86 has several different calling conventions. */ - cc->resx87 = 0; - switch (ctype_cconv(ct->info)) { - case CTCC_FASTCALL: maxgpr = 2; break; - case CTCC_THISCALL: maxgpr = 1; break; - default: maxgpr = 0; break; - } -#else maxgpr = CCALL_NARG_GPR; -#endif /* Perform required setup for some result types. */ ctr = ctype_rawchild(cts, ct); @@ -946,10 +257,6 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, } else { CCALL_HANDLE_COMPLEXRET } -#if LJ_TARGET_X86 - } else if (ctype_isfp(ctr->info)) { - cc->resx87 = ctr->size == sizeof(float) ? 1 : 2; -#endif } /* Skip initial attributes. */ @@ -1033,42 +340,15 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } -#if LJ_TARGET_MIPS64 - if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) || - (isfp && nsp == 0)) && d->size <= 4) { - *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ - } -#endif -#if LJ_TARGET_X64 && LJ_ABI_WIN - if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ - if (nfpr == ngpr) - cc->gpr[ngpr-1] = cc->fpr[ngpr-1].l[0]; - else - cc->fpr[ngpr-1].l[0] = cc->gpr[ngpr-1]; - } -#else UNUSED(isva); -#endif -#if LJ_TARGET_X64 && !LJ_ABI_WIN if (isfp == 2 && n == 2 && (uint8_t *)dp == (uint8_t *)&cc->fpr[nfpr-2]) { cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ cc->fpr[nfpr-2].d[1] = 0; } -#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP) - if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) { - /* Split float HFA or complex float into separate registers. */ - CTSize i = (sz >> 2) - 1; - do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); - } -#else - UNUSED(isfp); -#endif } if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ -#if LJ_TARGET_X64 || LJ_TARGET_PPC cc->nfpr = nfpr; /* Required for vararg functions. */ -#endif cc->nsp = nsp; cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR; if (nsp > CCALL_SPS_FREE) @@ -1101,9 +381,6 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct, CCALL_HANDLE_COMPLEXRET2 return 1; /* One GC step. */ } - if (LJ_BE && ctr->size < CTSIZE_PTR && - (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) - sp += (CTSIZE_PTR - ctr->size); #if CCALL_NUM_FPR if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info)) sp = (uint8_t *)&cc->fpr[0]; @@ -1141,13 +418,6 @@ int lj_ccall_func(lua_State *L, GCcdata *cd) } ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */ gcsteps += ccall_get_results(L, cts, ct, &cc, &ret); -#if LJ_TARGET_X86 && LJ_ABI_WIN - /* Automatically detect __stdcall and fix up C function declaration. */ - if (cc.spadj && ctype_cconv(ct->info) == CTCC_CDECL) { - CTF_INSERT(ct->info, CCONV, CTCC_STDCALL); - lj_trace_abort(G(L)); - } -#endif while (gcsteps-- > 0) lj_gc_check(L); return ret; @@ -1155,4 +425,3 @@ int lj_ccall_func(lua_State *L, GCcdata *cd) return -1; /* Not a function. */ } -#endif diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 34e800cc03..c621f2b4d5 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -9,31 +9,15 @@ #include "lj_obj.h" #include "lj_ctype.h" -#if LJ_HASFFI /* -- C calling conventions ----------------------------------------------- */ -#if LJ_TARGET_X86ORX64 -#if LJ_TARGET_X86 -#define CCALL_NARG_GPR 2 /* For fastcall arguments. */ -#define CCALL_NARG_FPR 0 -#define CCALL_NRET_GPR 2 -#define CCALL_NRET_FPR 1 /* For FP results on x87 stack. */ -#define CCALL_ALIGN_STACKARG 0 /* Don't align argument on stack. */ -#elif LJ_ABI_WIN -#define CCALL_NARG_GPR 4 -#define CCALL_NARG_FPR 4 -#define CCALL_NRET_GPR 1 -#define CCALL_NRET_FPR 1 -#define CCALL_SPS_EXTRA 4 -#else #define CCALL_NARG_GPR 6 #define CCALL_NARG_FPR 8 #define CCALL_NRET_GPR 2 #define CCALL_NRET_FPR 2 #define CCALL_VECTOR_REG 1 /* Pass vectors in registers. */ -#endif #define CCALL_SPS_FREE 1 #define CCALL_ALIGN_CALLSTATE 16 @@ -49,86 +33,6 @@ typedef LJ_ALIGN(16) union FPRArg { typedef intptr_t GPRArg; -#elif LJ_TARGET_ARM - -#define CCALL_NARG_GPR 4 -#define CCALL_NRET_GPR 2 /* For softfp double. */ -#if LJ_ABI_SOFTFP -#define CCALL_NARG_FPR 0 -#define CCALL_NRET_FPR 0 -#else -#define CCALL_NARG_FPR 8 -#define CCALL_NRET_FPR 4 -#endif -#define CCALL_SPS_FREE 0 - -typedef intptr_t GPRArg; -typedef union FPRArg { - double d; - float f[2]; -} FPRArg; - -#elif LJ_TARGET_ARM64 - -#define CCALL_NARG_GPR 8 -#define CCALL_NRET_GPR 2 -#define CCALL_NARG_FPR 8 -#define CCALL_NRET_FPR 4 -#define CCALL_SPS_FREE 0 - -typedef intptr_t GPRArg; -typedef union FPRArg { - double d; - float f; - uint32_t u32; -} FPRArg; - -#elif LJ_TARGET_PPC - -#define CCALL_NARG_GPR 8 -#define CCALL_NARG_FPR 8 -#define CCALL_NRET_GPR 4 /* For complex double. */ -#define CCALL_NRET_FPR 1 -#define CCALL_SPS_EXTRA 4 -#define CCALL_SPS_FREE 0 - -typedef intptr_t GPRArg; -typedef double FPRArg; - -#elif LJ_TARGET_MIPS32 - -#define CCALL_NARG_GPR 4 -#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2) -#define CCALL_NRET_GPR (LJ_ABI_SOFTFP ? 4 : 2) -#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) -#define CCALL_SPS_EXTRA 7 -#define CCALL_SPS_FREE 1 - -typedef intptr_t GPRArg; -typedef union FPRArg { - double d; - struct { LJ_ENDIAN_LOHI(float f; , float g;) }; -} FPRArg; - -#elif LJ_TARGET_MIPS64 - -/* FP args are positional and overlay the GPR array. */ -#define CCALL_NARG_GPR 8 -#define CCALL_NARG_FPR 0 -#define CCALL_NRET_GPR 2 -#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) -#define CCALL_SPS_EXTRA 3 -#define CCALL_SPS_FREE 1 - -typedef intptr_t GPRArg; -typedef union FPRArg { - double d; - struct { LJ_ENDIAN_LOHI(float f; , float g;) }; -} FPRArg; - -#else -#error "Missing calling convention definitions for this architecture" -#endif #ifndef CCALL_SPS_EXTRA #define CCALL_SPS_EXTRA 0 @@ -161,19 +65,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { uint32_t spadj; /* Stack pointer adjustment. */ uint8_t nsp; /* Number of stack slots. */ uint8_t retref; /* Return value by reference. */ -#if LJ_TARGET_X64 uint8_t ngpr; /* Number of arguments in GPRs. */ uint8_t nfpr; /* Number of arguments in FPRs. */ -#elif LJ_TARGET_X86 - uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ -#elif LJ_TARGET_ARM64 - void *retp; /* Aggregate return pointer in x8. */ -#elif LJ_TARGET_PPC - uint8_t nfpr; /* Number of arguments in FPRs. */ -#endif -#if LJ_32 - int32_t align1; -#endif #if CCALL_NUM_FPR FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */ #endif @@ -184,11 +77,10 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { /* -- C call handling ----------------------------------------------------- */ /* Really belongs to lj_vm.h. */ -LJ_ASMF void LJ_FASTCALL lj_vm_ffi_call(CCallState *cc); +LJ_ASMF void lj_vm_ffi_call(CCallState *cc); LJ_FUNC CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o); LJ_FUNC int lj_ccall_func(lua_State *L, GCcdata *cd); -#endif #endif diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index fce6a3ed46..6ffdefa8f0 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -5,7 +5,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_gc.h" #include "lj_err.h" @@ -25,17 +24,9 @@ #define CALLBACK_MCODE_SIZE (LJ_PAGESIZE * LJ_NUM_CBPAGE) -#if LJ_OS_NOJIT -/* Callbacks disabled. */ -#define CALLBACK_SLOT2OFS(slot) (0*(slot)) -#define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) -#define CALLBACK_MAX_SLOT 0 - -#elif LJ_TARGET_X86ORX64 - -#define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) -#define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5)) +#define CALLBACK_MCODE_HEAD 8 +#define CALLBACK_MCODE_GROUP (-2+1+2+10+6) #define CALLBACK_SLOT2OFS(slot) \ (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot)) @@ -51,34 +42,6 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #define CALLBACK_MAX_SLOT \ (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+4*32))*32) -#elif LJ_TARGET_ARM - -#define CALLBACK_MCODE_HEAD 32 - -#elif LJ_TARGET_ARM64 - -#define CALLBACK_MCODE_HEAD 32 - -#elif LJ_TARGET_PPC - -#define CALLBACK_MCODE_HEAD 24 - -#elif LJ_TARGET_MIPS32 - -#define CALLBACK_MCODE_HEAD 20 - -#elif LJ_TARGET_MIPS64 - -#define CALLBACK_MCODE_HEAD 52 - -#else - -/* Missing support for this architecture. */ -#define CALLBACK_SLOT2OFS(slot) (0*(slot)) -#define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) -#define CALLBACK_MAX_SLOT 0 - -#endif #ifndef CALLBACK_SLOT2OFS #define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) @@ -105,18 +68,12 @@ MSize lj_ccallback_ptr2slot(CTState *cts, void *p) } /* Initialize machine code for callback function pointers. */ -#if LJ_OS_NOJIT -/* Disabled callback support. */ -#define callback_mcode_init(g, p) UNUSED(p) -#elif LJ_TARGET_X86ORX64 static void callback_mcode_init(global_State *g, uint8_t *page) { uint8_t *p = page; uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback; MSize slot; -#if LJ_64 *(void **)p = target; p += 8; -#endif for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { /* mov al, slot; jmp group */ *p++ = XI_MOVrib | RID_EAX; *p++ = (uint8_t)slot; @@ -124,140 +81,26 @@ static void callback_mcode_init(global_State *g, uint8_t *page) /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ *p++ = XI_PUSH + RID_EBP; *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); -#if LJ_GC64 *p++ = 0x48; *p++ = XI_MOVri | RID_EBP; *(uint64_t *)p = (uint64_t)(g); p += 8; -#else - *p++ = XI_MOVri | RID_EBP; - *(int32_t *)p = i32ptr(g); p += 4; -#endif -#if LJ_64 /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */ *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; *(int32_t *)p = (int32_t)(page-(p+4)); p += 4; -#else - /* jmp lj_vm_ffi_callback. */ - *p++ = XI_JMP; *(int32_t *)p = target-(p+4); p += 4; -#endif } else { *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2); } } lua_assert(p - page <= CALLBACK_MCODE_SIZE); } -#elif LJ_TARGET_ARM -static void callback_mcode_init(global_State *g, uint32_t *page) -{ - uint32_t *p = page; - void *target = (void *)lj_vm_ffi_callback; - MSize slot; - /* This must match with the saveregs macro in buildvm_arm.dasc. */ - *p++ = ARMI_SUB|ARMF_D(RID_R12)|ARMF_N(RID_R12)|ARMF_M(RID_PC); - *p++ = ARMI_PUSH|ARMF_N(RID_SP)|RSET_RANGE(RID_R4,RID_R11+1)|RID2RSET(RID_LR); - *p++ = ARMI_SUB|ARMI_K12|ARMF_D(RID_R12)|ARMF_N(RID_R12)|CALLBACK_MCODE_HEAD; - *p++ = ARMI_STR|ARMI_LS_P|ARMI_LS_W|ARMF_D(RID_R12)|ARMF_N(RID_SP)|(CFRAME_SIZE-4*9); - *p++ = ARMI_LDR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_R12)|ARMF_N(RID_PC); - *p++ = ARMI_LDR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_PC)|ARMF_N(RID_PC); - *p++ = u32ptr(g); - *p++ = u32ptr(target); - for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { - *p++ = ARMI_MOV|ARMF_D(RID_R12)|ARMF_M(RID_PC); - *p = ARMI_B | ((page-p-2) & 0x00ffffffu); - p++; - } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); -} -#elif LJ_TARGET_ARM64 -static void callback_mcode_init(global_State *g, uint32_t *page) -{ - uint32_t *p = page; - void *target = (void *)lj_vm_ffi_callback; - MSize slot; - *p++ = A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4); - *p++ = A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5); - *p++ = A64I_BR | A64F_N(RID_X11); - *p++ = A64I_NOP; - ((void **)p)[0] = target; - ((void **)p)[1] = g; - p += 4; - for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { - *p++ = A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot); - *p = A64I_B | A64F_S26((page-p) & 0x03ffffffu); - p++; - } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); -} -#elif LJ_TARGET_PPC -static void callback_mcode_init(global_State *g, uint32_t *page) -{ - uint32_t *p = page; - void *target = (void *)lj_vm_ffi_callback; - MSize slot; - *p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16); - *p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16); - *p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 0xffff); - *p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff); - *p++ = PPCI_MTCTR | PPCF_T(RID_TMP); - *p++ = PPCI_BCTR; - for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { - *p++ = PPCI_LI | PPCF_T(RID_R11) | slot; - *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); - p++; - } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); -} -#elif LJ_TARGET_MIPS -static void callback_mcode_init(global_State *g, uint32_t *page) -{ - uint32_t *p = page; - uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; - uintptr_t ug = (uintptr_t)(void *)g; - MSize slot; -#if LJ_TARGET_MIPS32 - *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16); - *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16); -#else - *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 48); - *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 48); - *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff); - *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff); - *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16); - *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16); - *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff); - *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff); - *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16); - *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16); -#endif - *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff); - *p++ = MIPSI_JR | MIPSF_S(RID_R3); - *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff); - for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { - *p = MIPSI_B | ((page-p-1) & 0x0000ffffu); - p++; - *p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot; - } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); -} -#else -/* Missing support for this architecture. */ -#define callback_mcode_init(g, p) UNUSED(p) -#endif /* -- Machine code management --------------------------------------------- */ -#if LJ_TARGET_WINDOWS - -#define WIN32_LEAN_AND_MEAN -#include - -#elif LJ_TARGET_POSIX #include #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif -#endif /* Allocate and initialize area for callback function pointers. */ static void callback_mcode_new(CTState *cts) @@ -266,30 +109,14 @@ static void callback_mcode_new(CTState *cts) void *p; if (CALLBACK_MAX_SLOT == 0) lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); -#if LJ_TARGET_WINDOWS - p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); - if (!p) - lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); -#elif LJ_TARGET_POSIX p = mmap(NULL, sz, (PROT_READ|PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); -#else - /* Fallback allocator. Fails if memory is not executable by default. */ - p = lj_mem_new(cts->L, sz); -#endif cts->cb.mcode = p; callback_mcode_init(cts->g, p); lj_mcode_sync(p, (char *)p + sz); -#if LJ_TARGET_WINDOWS - { - DWORD oprot; - VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot); - } -#elif LJ_TARGET_POSIX mprotect(p, sz, (PROT_READ|PROT_EXEC)); -#endif } /* Free area for callback function pointers. */ @@ -298,103 +125,12 @@ void lj_ccallback_mcode_free(CTState *cts) size_t sz = (size_t)CALLBACK_MCODE_SIZE; void *p = cts->cb.mcode; if (p == NULL) return; -#if LJ_TARGET_WINDOWS - VirtualFree(p, 0, MEM_RELEASE); - UNUSED(sz); -#elif LJ_TARGET_POSIX munmap(p, sz); -#else - lj_mem_free(cts->g, p, sz); -#endif } /* -- C callback entry ---------------------------------------------------- */ /* Target-specific handling of register arguments. Similar to lj_ccall.c. */ -#if LJ_TARGET_X86 - -#define CALLBACK_HANDLE_REGARG \ - if (!isfp) { /* Only non-FP values may be passed in registers. */ \ - if (n > 1) { /* Anything > 32 bit is passed on the stack. */ \ - if (!LJ_ABI_WIN) ngpr = maxgpr; /* Prevent reordering. */ \ - } else if (ngpr + 1 <= maxgpr) { \ - sp = &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } \ - } - -#elif LJ_TARGET_X64 && LJ_ABI_WIN - -/* Windows/x64 argument registers are strictly positional (use ngpr). */ -#define CALLBACK_HANDLE_REGARG \ - if (isfp) { \ - if (ngpr < maxgpr) { sp = &cts->cb.fpr[ngpr++]; UNUSED(nfpr); goto done; } \ - } else { \ - if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \ - } - -#elif LJ_TARGET_X64 - -#define CALLBACK_HANDLE_REGARG \ - if (isfp) { \ - if (nfpr + n <= CCALL_NARG_FPR) { \ - sp = &cts->cb.fpr[nfpr]; \ - nfpr += n; \ - goto done; \ - } \ - } else { \ - if (ngpr + n <= maxgpr) { \ - sp = &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } \ - } - -#elif LJ_TARGET_ARM - -#if LJ_ABI_SOFTFP - -#define CALLBACK_HANDLE_REGARG_FP1 UNUSED(isfp); -#define CALLBACK_HANDLE_REGARG_FP2 - -#else - -#define CALLBACK_HANDLE_REGARG_FP1 \ - if (isfp) { \ - if (n == 1) { \ - if (fprodd) { \ - sp = &cts->cb.fpr[fprodd-1]; \ - fprodd = 0; \ - goto done; \ - } else if (nfpr + 1 <= CCALL_NARG_FPR) { \ - sp = &cts->cb.fpr[nfpr++]; \ - fprodd = nfpr; \ - goto done; \ - } \ - } else { \ - if (nfpr + 1 <= CCALL_NARG_FPR) { \ - sp = &cts->cb.fpr[nfpr++]; \ - goto done; \ - } \ - } \ - fprodd = 0; /* No reordering after the first FP value is on stack. */ \ - } else { - -#define CALLBACK_HANDLE_REGARG_FP2 } - -#endif - -#define CALLBACK_HANDLE_REGARG \ - CALLBACK_HANDLE_REGARG_FP1 \ - if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - if (ngpr + n <= maxgpr) { \ - sp = &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } CALLBACK_HANDLE_REGARG_FP2 - -#elif LJ_TARGET_ARM64 #define CALLBACK_HANDLE_REGARG \ if (isfp) { \ @@ -402,35 +138,8 @@ void lj_ccallback_mcode_free(CTState *cts) sp = &cts->cb.fpr[nfpr]; \ nfpr += n; \ goto done; \ - } else { \ - nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ } \ } else { \ - if (!LJ_TARGET_IOS && n > 1) \ - ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - if (ngpr + n <= maxgpr) { \ - sp = &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } else { \ - ngpr = CCALL_NARG_GPR; /* Prevent reordering. */ \ - } \ - } - -#elif LJ_TARGET_PPC - -#define CALLBACK_HANDLE_REGARG \ - if (isfp) { \ - if (nfpr + 1 <= CCALL_NARG_FPR) { \ - sp = &cts->cb.fpr[nfpr++]; \ - cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ - goto done; \ - } \ - } else { /* Try to pass argument in GPRs. */ \ - if (n > 1) { \ - lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \ - ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ - } \ if (ngpr + n <= maxgpr) { \ sp = &cts->cb.gpr[ngpr]; \ ngpr += n; \ @@ -438,66 +147,6 @@ void lj_ccallback_mcode_free(CTState *cts) } \ } -#define CALLBACK_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ - -#elif LJ_TARGET_MIPS32 - -#define CALLBACK_HANDLE_GPR \ - if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - if (ngpr + n <= maxgpr) { \ - sp = &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } - -#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */ -#define CALLBACK_HANDLE_REGARG \ - if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \ - sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \ - nfpr++; ngpr += n; \ - goto done; \ - } else { /* Try to pass argument in GPRs. */ \ - nfpr = CCALL_NARG_FPR; \ - CALLBACK_HANDLE_GPR \ - } -#else /* MIPS32 soft-float */ -#define CALLBACK_HANDLE_REGARG \ - CALLBACK_HANDLE_GPR \ - UNUSED(isfp); -#endif - -#define CALLBACK_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - ((float *)dp)[1] = *(float *)dp; - -#elif LJ_TARGET_MIPS64 - -#if !LJ_ABI_SOFTFP /* MIPS64 hard-float */ -#define CALLBACK_HANDLE_REGARG \ - if (ngpr + n <= maxgpr) { \ - sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } -#else /* MIPS64 soft-float */ -#define CALLBACK_HANDLE_REGARG \ - if (ngpr + n <= maxgpr) { \ - UNUSED(isfp); \ - sp = (void*) &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } -#endif - -#define CALLBACK_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - ((float *)dp)[1] = *(float *)dp; - -#else -#error "Missing calling convention definitions for this architecture" -#endif /* Convert and push callback arguments to Lua stack. */ static void callback_conv_args(CTState *cts, lua_State *L) @@ -513,9 +162,6 @@ static void callback_conv_args(CTState *cts, lua_State *L) MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR; #if CCALL_NARG_FPR MSize nfpr = 0; -#if LJ_TARGET_ARM - MSize fprodd = 0; -#endif #endif if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) { @@ -549,14 +195,6 @@ static void callback_conv_args(CTState *cts, lua_State *L) lj_state_checkstack(L, LUA_MINSTACK); /* May throw. */ o = L->base; /* Might have been reallocated. */ -#if LJ_TARGET_X86 - /* x86 has several different calling conventions. */ - switch (ctype_cconv(ct->info)) { - case CTCC_FASTCALL: maxgpr = 2; break; - case CTCC_THISCALL: maxgpr = 1; break; - default: maxgpr = 0; break; - } -#endif fid = ct->sib; while (fid) { @@ -576,33 +214,15 @@ static void callback_conv_args(CTState *cts, lua_State *L) CALLBACK_HANDLE_REGARG /* Handle register arguments. */ /* Otherwise pass argument on stack. */ - if (CCALL_ALIGN_STACKARG && LJ_32 && sz == 8) - nsp = (nsp + 1) & ~1u; /* Align 64 bit argument on stack. */ sp = &stack[nsp]; nsp += n; done: - if (LJ_BE && cta->size < CTSIZE_PTR -#if LJ_TARGET_MIPS64 - && !(isfp && nsp) -#endif - ) - sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size); gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp); } fid = ctf->sib; } L->top = o; -#if LJ_TARGET_X86 - /* Store stack adjustment for returns from non-cdecl callbacks. */ - if (ctype_cconv(ct->info) != CTCC_CDECL) { -#if LJ_FR2 - (L->base-3)->u64 |= (nsp << (16+2)); -#else - (L->base-2)->u32.hi |= (nsp << (16+2)); -#endif - } -#endif while (gcsteps-- > 0) lj_gc_check(L); } @@ -610,14 +230,7 @@ static void callback_conv_args(CTState *cts, lua_State *L) /* Convert Lua object to callback result. */ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) { -#if LJ_FR2 CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64); -#else - CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi); -#endif -#if LJ_TARGET_X86 - cts->cb.gpr[2] = 0; -#endif if (!ctype_isvoid(ctr->info)) { uint8_t *dp = (uint8_t *)&cts->cb.gpr[0]; #if CCALL_NUM_FPR @@ -637,21 +250,11 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } -#if LJ_TARGET_MIPS64 - /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ - if (ctr->size <= 4 && - (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) - *(int64_t *)dp = (int64_t)*(int32_t *)dp; -#endif -#if LJ_TARGET_X86 - if (ctype_isfp(ctr->info)) - cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2; -#endif } } /* Enter callback. */ -lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf) +lua_State * lj_ccallback_enter(CTState *cts, void *cf) { lua_State *L = cts->L; global_State *g = cts->g; @@ -673,7 +276,7 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf) } /* Leave callback. */ -void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o) +void lj_ccallback_leave(CTState *cts, TValue *o) { lua_State *L = cts->L; GCfunc *fn; @@ -725,7 +328,7 @@ static MSize callback_slot_new(CTState *cts, CType *ct) static CType *callback_checkfunc(CTState *cts, CType *ct) { int narg = 0; - if (!ctype_isptr(ct->info) || (LJ_64 && ct->size != CTSIZE_PTR)) + if (!ctype_isptr(ct->info) || ct->size != CTSIZE_PTR) return NULL; ct = ctype_rawchild(cts, ct); if (ctype_isfunc(ct->info)) { @@ -768,4 +371,3 @@ void *lj_ccallback_new(CTState *cts, CType *ct, GCfunc *fn) return NULL; /* Bad conversion. */ } -#endif diff --git a/src/lj_ccallback.h b/src/lj_ccallback.h index a8cdad3863..b5a3731f27 100644 --- a/src/lj_ccallback.h +++ b/src/lj_ccallback.h @@ -9,17 +9,15 @@ #include "lj_obj.h" #include "lj_ctype.h" -#if LJ_HASFFI /* Really belongs to lj_vm.h. */ LJ_ASMF void lj_vm_ffi_callback(void); LJ_FUNC MSize lj_ccallback_ptr2slot(CTState *cts, void *p); -LJ_FUNCA lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf); -LJ_FUNCA void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o); +LJ_FUNCA lua_State * lj_ccallback_enter(CTState *cts, void *cf); +LJ_FUNCA void lj_ccallback_leave(CTState *cts, TValue *o); LJ_FUNC void *lj_ccallback_new(CTState *cts, CType *ct, GCfunc *fn); LJ_FUNC void lj_ccallback_mcode_free(CTState *cts); -#endif #endif diff --git a/src/lj_cconv.c b/src/lj_cconv.c index ab398adcdf..a4e7e50827 100644 --- a/src/lj_cconv.c +++ b/src/lj_cconv.c @@ -5,7 +5,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_err.h" #include "lj_tab.h" @@ -164,21 +163,11 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, case CCX(I, I): conv_I_I: if (dsize > ssize) { /* Zero-extend or sign-extend LSB. */ -#if LJ_LE uint8_t fill = (!(sinfo & CTF_UNSIGNED) && (sp[ssize-1]&0x80)) ? 0xff : 0; memcpy(dp, sp, ssize); memset(dp + ssize, fill, dsize-ssize); -#else - uint8_t fill = (!(sinfo & CTF_UNSIGNED) && (sp[0]&0x80)) ? 0xff : 0; - memset(dp, fill, dsize-ssize); - memcpy(dp + (dsize-ssize), sp, ssize); -#endif } else { /* Copy LSB. */ -#if LJ_LE memcpy(dp, sp, dsize); -#else - memcpy(dp, sp + (ssize-dsize), dsize); -#endif } break; case CCX(I, F): { @@ -331,7 +320,7 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, case CCX(P, F): if (!(flags & CCF_CAST) || !(flags & CCF_FROMTV)) goto err_conv; /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ - dinfo = CTINFO(CT_NUM, (LJ_64 && dsize == 8) ? 0 : CTF_UNSIGNED); + dinfo = CTINFO(CT_NUM, (dsize == 8) ? 0 : CTF_UNSIGNED); goto conv_I_F; case CCX(P, P): @@ -377,20 +366,10 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid, if (ctype_isnum(sinfo)) { if (!ctype_isbool(sinfo)) { if (ctype_isinteger(sinfo) && s->size > 4) goto copyval; - if (LJ_DUALNUM && ctype_isinteger(sinfo)) { - int32_t i; - lj_cconv_ct_ct(cts, ctype_get(cts, CTID_INT32), s, - (uint8_t *)&i, sp, 0); - if ((sinfo & CTF_UNSIGNED) && i < 0) - setnumV(o, (lua_Number)(uint32_t)i); - else - setintV(o, i); - } else { - lj_cconv_ct_ct(cts, ctype_get(cts, CTID_DOUBLE), s, - (uint8_t *)&o->n, sp, 0); - /* Numbers are NOT canonicalized here! Beware of uninitialized data. */ - lua_assert(tvisnum(o)); - } + lj_cconv_ct_ct(cts, ctype_get(cts, CTID_DOUBLE), s, + (uint8_t *)&o->n, sp, 0); + /* Numbers are NOT canonicalized here! Beware of uninitialized data. */ + lua_assert(tvisnum(o)); } else { uint32_t b = s->size == 1 ? (*sp != 0) : (*(int *)sp != 0); setboolV(o, b); @@ -442,14 +421,16 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp) setintV(o, (int32_t)(val << (shift-pos)) >> shift); } else { val = (val << (shift-pos)) >> shift; - if (!LJ_DUALNUM || (int32_t)val < 0) + if ((int32_t)val < 0) setnumV(o, (lua_Number)(uint32_t)val); else setintV(o, (int32_t)val); } } else { + uint32_t b = (val >> pos) & 1; lua_assert(bsz == 1); - setboolV(o, (val >> pos) & 1); + setboolV(o, b); + setboolV(&cts->g->tmptv2, b); /* Remember for trace recorder. */ } return 0; /* No GC step needed. */ } @@ -538,11 +519,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, CType *s; void *tmpptr; uint8_t tmpbool, *sp = (uint8_t *)&tmpptr; - if (LJ_LIKELY(tvisint(o))) { - sp = (uint8_t *)&o->i; - sid = CTID_INT32; - flags |= CCF_FROMTV; - } else if (LJ_LIKELY(tvisnum(o))) { + if (LJ_LIKELY(tvisnum(o))) { sp = (uint8_t *)&o->n; sid = CTID_DOUBLE; flags |= CCF_FROMTV; @@ -749,4 +726,3 @@ void lj_cconv_ct_init(CTState *cts, CType *d, CTSize sz, cconv_err_initov(cts, d); } -#endif diff --git a/src/lj_cconv.h b/src/lj_cconv.h index 0a0b66c909..161dbf2739 100644 --- a/src/lj_cconv.h +++ b/src/lj_cconv.h @@ -9,7 +9,6 @@ #include "lj_obj.h" #include "lj_ctype.h" -#if LJ_HASFFI /* Compressed C type index. ORDER CCX. */ enum { @@ -28,11 +27,7 @@ static LJ_AINLINE uint32_t cconv_idx(CTInfo info) { uint32_t idx = ((info >> 26) & 15u); /* Dispatch bits. */ lua_assert(ctype_type(info) <= CT_MAYCONVERT); -#if LJ_64 idx = ((uint32_t)(U64x(f436fff5,fff7f021) >> 4*idx) & 15u); -#else - idx = (((idx < 8 ? 0xfff7f021u : 0xf436fff5) >> 4*(idx & 7u)) & 15u); -#endif lua_assert(idx < 8); return idx; } @@ -65,6 +60,5 @@ LJ_FUNC int lj_cconv_multi_init(CTState *cts, CType *d, TValue *o); LJ_FUNC void lj_cconv_ct_init(CTState *cts, CType *d, CTSize sz, uint8_t *dp, TValue *o, MSize len); -#endif #endif diff --git a/src/lj_cdata.c b/src/lj_cdata.c index 68e16d76fc..6cca751c49 100644 --- a/src/lj_cdata.c +++ b/src/lj_cdata.c @@ -5,7 +5,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_gc.h" #include "lj_err.h" @@ -59,7 +58,7 @@ GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, CTInfo info) } /* Free a C data object. */ -void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) +void lj_cdata_free(global_State *g, GCcdata *cd) { if (LJ_UNLIKELY(cd->marked & LJ_GC_CDATA_FIN)) { GCobj *root; @@ -128,16 +127,9 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp, } lua_assert(!ctype_isref(ct->info)); /* Interning rejects refs to refs. */ - if (tvisint(key)) { - idx = (ptrdiff_t)intV(key); - goto integer_key; - } else if (tvisnum(key)) { /* Numeric key. */ -#ifdef _MSC_VER - /* Workaround for MSVC bug. */ - volatile -#endif + if (tvisnum(key)) { /* Numeric key. */ lua_Number n = numV(key); - idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n); + idx = (ptrdiff_t)n; integer_key: if (ctype_ispointer(ct->info)) { CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ @@ -296,4 +288,3 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual) lj_cconv_ct_tv(cts, d, dp, o, 0); } -#endif diff --git a/src/lj_cdata.h b/src/lj_cdata.h index 5bb0f5dca2..dd74b2c741 100644 --- a/src/lj_cdata.h +++ b/src/lj_cdata.h @@ -10,12 +10,11 @@ #include "lj_gc.h" #include "lj_ctype.h" -#if LJ_HASFFI /* Get C data pointer. */ static LJ_AINLINE void *cdata_getptr(void *p, CTSize sz) { - if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ + if (sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ return ((void *)(uintptr_t)*(uint32_t *)p); } else { lua_assert(sz == CTSIZE_PTR); @@ -26,7 +25,7 @@ static LJ_AINLINE void *cdata_getptr(void *p, CTSize sz) /* Set C data pointer. */ static LJ_AINLINE void cdata_setptr(void *p, CTSize sz, const void *v) { - if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ + if (sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ *(uint32_t *)p = (uint32_t)(uintptr_t)v; } else { lua_assert(sz == CTSIZE_PTR); @@ -63,7 +62,7 @@ LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, LJ_FUNC GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, CTInfo info); -LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd); +LJ_FUNC void lj_cdata_free(global_State *g, GCcdata *cd); LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it); @@ -73,6 +72,5 @@ LJ_FUNC int lj_cdata_get(CTState *cts, CType *s, TValue *o, uint8_t *sp); LJ_FUNC void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual); -#endif #endif diff --git a/src/lj_clib.c b/src/lj_clib.c index 614265903a..a3f2791f88 100644 --- a/src/lj_clib.c +++ b/src/lj_clib.c @@ -5,7 +5,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_gc.h" #include "lj_err.h" @@ -20,7 +19,6 @@ /* -- OS-specific functions ----------------------------------------------- */ -#if LJ_TARGET_DLOPEN #include #include @@ -40,16 +38,10 @@ LJ_NORET LJ_NOINLINE static void clib_error_(lua_State *L) #define clib_error(L, fmt, name) clib_error_(L) -#if LJ_TARGET_CYGWIN -#define CLIB_SOPREFIX "cyg" -#else #define CLIB_SOPREFIX "lib" -#endif #if LJ_TARGET_OSX #define CLIB_SOEXT "%s.dylib" -#elif LJ_TARGET_CYGWIN -#define CLIB_SOEXT "%s.dll" #else #define CLIB_SOEXT "%s.so" #endif @@ -57,17 +49,10 @@ LJ_NORET LJ_NOINLINE static void clib_error_(lua_State *L) static const char *clib_extname(lua_State *L, const char *name) { if (!strchr(name, '/') -#if LJ_TARGET_CYGWIN - && !strchr(name, '\\') -#endif ) { if (!strchr(name, '.')) { name = lj_strfmt_pushf(L, CLIB_SOEXT, name); L->top--; -#if LJ_TARGET_CYGWIN - } else { - return name; -#endif } if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] && name[2] == CLIB_SOPREFIX[2])) { @@ -142,177 +127,9 @@ static void *clib_getsym(CLibrary *cl, const char *name) return p; } -#elif LJ_TARGET_WINDOWS - -#define WIN32_LEAN_AND_MEAN -#include - -#ifndef GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS -#define GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS 4 -#define GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT 2 -BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); -#endif - -#define CLIB_DEFHANDLE ((void *)-1) - -/* Default libraries. */ -enum { - CLIB_HANDLE_EXE, - CLIB_HANDLE_DLL, - CLIB_HANDLE_CRT, - CLIB_HANDLE_KERNEL32, - CLIB_HANDLE_USER32, - CLIB_HANDLE_GDI32, - CLIB_HANDLE_MAX -}; - -static void *clib_def_handle[CLIB_HANDLE_MAX]; - -LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, - const char *name) -{ - DWORD err = GetLastError(); -#if LJ_TARGET_XBOXONE - wchar_t wbuf[128]; - char buf[128*2]; - if (!FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, - NULL, err, 0, wbuf, sizeof(wbuf)/sizeof(wchar_t), NULL) || - !WideCharToMultiByte(CP_ACP, 0, wbuf, 128, buf, 128*2, NULL, NULL)) -#else - char buf[128]; - if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, - NULL, err, 0, buf, sizeof(buf), NULL)) -#endif - buf[0] = '\0'; - lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, buf)); -} - -static int clib_needext(const char *s) -{ - while (*s) { - if (*s == '/' || *s == '\\' || *s == '.') return 0; - s++; - } - return 1; -} - -static const char *clib_extname(lua_State *L, const char *name) -{ - if (clib_needext(name)) { - name = lj_strfmt_pushf(L, "%s.dll", name); - L->top--; - } - return name; -} - -static void *clib_loadlib(lua_State *L, const char *name, int global) -{ - DWORD oldwerr = GetLastError(); - void *h = (void *)LoadLibraryExA(clib_extname(L, name), NULL, 0); - if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name); - SetLastError(oldwerr); - UNUSED(global); - return h; -} - -static void clib_unloadlib(CLibrary *cl) -{ - if (cl->handle == CLIB_DEFHANDLE) { - MSize i; - for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) { - void *h = clib_def_handle[i]; - if (h) { - clib_def_handle[i] = NULL; - FreeLibrary((HINSTANCE)h); - } - } - } else if (cl->handle) { - FreeLibrary((HINSTANCE)cl->handle); - } -} - -static void *clib_getsym(CLibrary *cl, const char *name) -{ - void *p = NULL; - if (cl->handle == CLIB_DEFHANDLE) { /* Search default libraries. */ - MSize i; - for (i = 0; i < CLIB_HANDLE_MAX; i++) { - HINSTANCE h = (HINSTANCE)clib_def_handle[i]; - if (!(void *)h) { /* Resolve default library handles (once). */ - switch (i) { - case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break; - case CLIB_HANDLE_DLL: - GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - (const char *)clib_def_handle, &h); - break; - case CLIB_HANDLE_CRT: - GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - (const char *)&_fmode, &h); - break; - case CLIB_HANDLE_KERNEL32: h = LoadLibraryExA("kernel32.dll", NULL, 0); break; - case CLIB_HANDLE_USER32: h = LoadLibraryExA("user32.dll", NULL, 0); break; - case CLIB_HANDLE_GDI32: h = LoadLibraryExA("gdi32.dll", NULL, 0); break; - } - if (!h) continue; - clib_def_handle[i] = (void *)h; - } - p = (void *)GetProcAddress(h, name); - if (p) break; - } - } else { - p = (void *)GetProcAddress((HINSTANCE)cl->handle, name); - } - return p; -} - -#else - -#define CLIB_DEFHANDLE NULL - -LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, - const char *name) -{ - lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, "no support for this OS")); -} - -static void *clib_loadlib(lua_State *L, const char *name, int global) -{ - lj_err_callermsg(L, "no support for loading dynamic libraries for this OS"); - UNUSED(name); UNUSED(global); - return NULL; -} - -static void clib_unloadlib(CLibrary *cl) -{ - UNUSED(cl); -} - -static void *clib_getsym(CLibrary *cl, const char *name) -{ - UNUSED(cl); UNUSED(name); - return NULL; -} - -#endif /* -- C library indexing -------------------------------------------------- */ -#if LJ_TARGET_X86 && LJ_ABI_WIN -/* Compute argument size for fastcall/stdcall functions. */ -static CTSize clib_func_argsize(CTState *cts, CType *ct) -{ - CTSize n = 0; - while (ct->sib) { - CType *d; - ct = ctype_get(cts, ct->sib); - if (ctype_isfield(ct->info)) { - d = ctype_rawchild(cts, ct); - n += ((d->size + 3) & ~3); - } - } - return n; -} -#endif /* Get redirected or mangled external symbol. */ static const char *clib_extsym(CTState *cts, CType *ct, GCstr *name) @@ -344,31 +161,11 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) setintV(tv, (int32_t)ct->size); } else { const char *sym = clib_extsym(cts, ct, name); -#if LJ_TARGET_WINDOWS - DWORD oldwerr = GetLastError(); -#endif void *p = clib_getsym(cl, sym); GCcdata *cd; lua_assert(ctype_isfunc(ct->info) || ctype_isextern(ct->info)); -#if LJ_TARGET_X86 && LJ_ABI_WIN - /* Retry with decorated name for fastcall/stdcall functions. */ - if (!p && ctype_isfunc(ct->info)) { - CTInfo cconv = ctype_cconv(ct->info); - if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) { - CTSize sz = clib_func_argsize(cts, ct); - const char *symd = lj_strfmt_pushf(L, - cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d", - sym, sz); - L->top--; - p = clib_getsym(cl, symd); - } - } -#endif if (!p) clib_error(L, "cannot resolve symbol " LUA_QS ": %s", sym); -#if LJ_TARGET_WINDOWS - SetLastError(oldwerr); -#endif cd = lj_cdata_new(cts, id, CTSIZE_PTR); *(void **)cdataptr(cd) = p; setcdataV(L, tv, cd); @@ -415,4 +212,3 @@ void lj_clib_default(lua_State *L, GCtab *mt) cl->handle = CLIB_DEFHANDLE; } -#endif diff --git a/src/lj_clib.h b/src/lj_clib.h index fcc9dac592..0cf636ca7e 100644 --- a/src/lj_clib.h +++ b/src/lj_clib.h @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASFFI /* Namespace for C library indexing. */ #define CLNS_INDEX ((1u<stack[decl->pos]; if (ctype_isfunc(ct->info)) { /* Ok to modify in-place. */ -#if LJ_TARGET_X86 - if ((decl->fattr & CTFP_CCONV)) - ct->info = (ct->info & (CTMASK_NUM|CTF_VARARG|CTMASK_CID)) + - (decl->fattr & ~CTMASK_CID); -#endif } else { if ((decl->attr & CTFP_ALIGNED) && !(decl->mode & CPARSE_MODE_FIELD)) cp_push(decl, CTINFO(CT_ATTRIB, CTATTRIB(CTA_ALIGN)), @@ -1075,32 +1069,6 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl) if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize)); } break; -#if LJ_TARGET_X86 - case H_(5ad22db8,c689b848): case H_(439150fa,65ea78cb): /* regparm */ - CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp)); - decl->fattr |= CTFP_CCONV; - break; - case H_(18fc0b98,7ff4c074): case H_(4e62abed,0a747424): /* cdecl */ - CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL); - decl->fattr |= CTFP_CCONV; - break; - case H_(72b2e41b,494c5a44): case H_(f2356d59,f25fc9bd): /* thiscall */ - CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL); - decl->fattr |= CTFP_CCONV; - break; - case H_(0d0ffc42,ab746f88): case H_(21c54ba1,7f0ca7e3): /* fastcall */ - CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL); - decl->fattr |= CTFP_CCONV; - break; - case H_(ef76b040,9412e06a): case H_(de56697b,c750e6e1): /* stdcall */ - CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL); - decl->fattr |= CTFP_CCONV; - break; - case H_(ea78b622,f234bd8e): case H_(252ffb06,8d50f34b): /* sseregparm */ - decl->fattr |= CTF_SSEREGPARM; - decl->fattr |= CTFP_CCONV; - break; -#endif default: /* Skip all other attributes. */ goto skip_attr; } @@ -1156,15 +1124,9 @@ static void cp_decl_attributes(CPState *cp, CPDecl *decl) case CTOK_ASM: cp_decl_asm(cp, decl); continue; case CTOK_DECLSPEC: cp_decl_msvcattribute(cp, decl); continue; case CTOK_CCDECL: -#if LJ_TARGET_X86 - CTF_INSERT(decl->fattr, CCONV, cp->ct->size); - decl->fattr |= CTFP_CCONV; -#endif break; case CTOK_PTRSZ: -#if LJ_64 CTF_INSERT(decl->attr, MSIZEP, cp->ct->size); -#endif break; default: return; } @@ -1216,19 +1178,6 @@ static CTSize cp_field_align(CPState *cp, CType *ct, CTInfo info) { CTSize align = ctype_align(info); UNUSED(cp); UNUSED(ct); -#if (LJ_TARGET_X86 && !LJ_ABI_WIN) || (LJ_TARGET_ARM && __APPLE__) - /* The SYSV i386 and iOS ABIs limit alignment of non-vector fields to 2^2. */ - if (align > 2 && !(info & CTFP_ALIGNED)) { - if (ctype_isarray(info) && !(info & CTF_VECTOR)) { - do { - ct = ctype_rawchild(cp->cts, ct); - info = ct->info; - } while (ctype_isarray(info) && !(info & CTF_VECTOR)); - } - if (ctype_isnum(info) || ctype_isenum(info)) - align = 2; - } -#endif return align; } @@ -1286,11 +1235,7 @@ static void cp_struct_layout(CPState *cp, CTypeID sid, CTInfo sattr) ct->info = CTINFO(CT_BITFIELD, (info & (CTF_QUAL|CTF_UNSIGNED|CTF_BOOL)) + (csz << (CTSHIFT_BITCSZ-3)) + (bsz << CTSHIFT_BITBSZ)); -#if LJ_BE - ct->info += ((csz - (bofs & (csz-1)) - bsz) << CTSHIFT_BITPOS); -#else ct->info += ((bofs & (csz-1)) << CTSHIFT_BITPOS); -#endif ct->size = ((bofs & ~(csz-1)) >> 3); /* Store container offset. */ } } @@ -1646,12 +1591,10 @@ static void cp_declarator(CPState *cp, CPDecl *decl) cp_decl_attributes(cp, decl); sz = CTSIZE_PTR; info = CTINFO(CT_PTR, CTALIGN_PTR); -#if LJ_64 if (ctype_msizeP(decl->attr) == 4) { sz = 4; info = CTINFO(CT_PTR, CTALIGN(2)); } -#endif info += (decl->attr & (CTF_QUAL|CTF_REF)); decl->attr &= ~(CTF_QUAL|(CTMASK_MSIZEP<info & CTF_UNSIGNED) ? 1 : 0); } } else if (ctype_isptr(ct->info)) { - return (LJ_64 && ct->size == 8) ? IRT_P64 : IRT_P32; + return (ct->size == 8) ? IRT_P64 : IRT_P32; } else if (ctype_iscomplex(ct->info)) { if (ct->size == 2*sizeof(double)) return IRT_NUM; @@ -128,13 +127,7 @@ static IRType crec_ct2irt(CTState *cts, CType *ct) #define CREC_FILL_MAXUNROLL 16 /* Number of windowed registers used for optimized memory copy. */ -#if LJ_TARGET_X86 -#define CREC_COPY_REGWIN 2 -#elif LJ_TARGET_PPC || LJ_TARGET_MIPS -#define CREC_COPY_REGWIN 8 -#else #define CREC_COPY_REGWIN 4 -#endif /* List of memory offsets for copy/fill. */ typedef struct CRecMemList { @@ -425,7 +418,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, conv_I_I: if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; /* Extend 32 to 64 bit integer. */ - if (dsize == 8 && ssize < 8 && !(LJ_64 && (sinfo & CTF_UNSIGNED))) + if (dsize == 8 && ssize < 8 && !(sinfo & CTF_UNSIGNED)) sp = emitconv(sp, dt, ssize < 4 ? IRT_INT : st, (sinfo & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); else if (dsize < 8 && ssize == 8) /* Truncate from 64 bit integer. */ @@ -515,13 +508,11 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, goto xstore; case CCX(P, I): if (st == IRT_CDATA) goto err_nyi; - if (!LJ_64 && ssize == 8) /* Truncate from 64 bit integer. */ - sp = emitconv(sp, IRT_U32, st, 0); goto xstore; case CCX(P, F): if (st == IRT_CDATA) goto err_nyi; /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ - sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, + sp = emitconv(sp, (dsize == 8) ? IRT_I64 : IRT_U32, st, IRCONV_ANY); goto xstore; @@ -603,10 +594,10 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) CType *s; if (LJ_LIKELY(tref_isinteger(sp))) { sid = CTID_INT32; - svisnz = (void *)(intptr_t)(tvisint(sval)?(intV(sval)!=0):!tviszero(sval)); + svisnz = (void *)(intptr_t)!tviszero(sval); } else if (tref_isnum(sp)) { sid = CTID_DOUBLE; - svisnz = (void *)(intptr_t)(tvisint(sval)?(intV(sval)!=0):!tviszero(sval)); + svisnz = (void *)(intptr_t)!tviszero(sval); } else if (tref_isbool(sp)) { sp = lj_ir_kint(J, tref_istrue(sp) ? 1 : 0); sid = CTID_BOOL; @@ -642,10 +633,8 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) sid = CTID_A_CCHAR; } } else if (tref_islightud(sp)) { -#if LJ_64 sp = emitir(IRT(IR_BAND, IRT_P64), sp, lj_ir_kint64(J, U64x(00007fff,ffffffff))); -#endif } else { /* NYI: tref_istab(sp). */ IRType t; sid = argv2cdata(J, sp, sval)->ctypeid; @@ -702,7 +691,7 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) (ir->o == IR_ADD || ir->o == IR_ADDOV || ir->o == IR_SUBOV)) { IRIns *irk = IR(ir->op2); ptrdiff_t k; - if (LJ_64 && irk->o == IR_KINT64) + if (irk->o == IR_KINT64) k = (ptrdiff_t)ir_kint64(irk)->u64 * sz; else k = (ptrdiff_t)irk->i * sz; @@ -716,12 +705,8 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv) { TRef kfunc = lj_ir_kfunc(J, funcV(tv)); -#if LJ_FR2 J->base[-2] = kfunc; J->base[-1] = TREF_FRAME; -#else - J->base[-1] = kfunc | TREF_FRAME; -#endif rd->nres = -1; /* Pending tailcall. */ } @@ -751,7 +736,49 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, } } -void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) +/* Record bitfield load/store. */ +static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info) +{ + IRType t = IRT_I8 + 2*lj_fls(ctype_bitcsz(info)) + ((info&CTF_UNSIGNED)?1:0); + TRef tr = emitir(IRT(IR_XLOAD, t), ptr, 0); + CTSize pos = ctype_bitpos(info), bsz = ctype_bitbsz(info), shift = 32 - bsz; + lua_assert(t <= IRT_U32); /* NYI: 64 bit bitfields. */ + if (rd->data == 0) { /* __index metamethod. */ + if ((info & CTF_BOOL)) { + tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << pos)))); + /* Assume not equal to zero. Fixup and emit pending guard later. */ + lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0)); + J->postproc = LJ_POST_FIXGUARD; + tr = TREF_TRUE; + } else if (!(info & CTF_UNSIGNED)) { + tr = emitir(IRTI(IR_BSHL), tr, lj_ir_kint(J, shift - pos)); + tr = emitir(IRTI(IR_BSAR), tr, lj_ir_kint(J, shift)); + } else { + lua_assert(bsz < 32); /* Full-size fields cannot end up here. */ + tr = emitir(IRTI(IR_BSHR), tr, lj_ir_kint(J, pos)); + tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << bsz)-1))); + /* We can omit the U32 to NUM conversion, since bsz < 32. */ + } + J->base[0] = tr; + } else { /* __newindex metamethod. */ + CTState *cts = ctype_ctsG(J2G(J)); + CType *ct = ctype_get(cts, + (info & CTF_BOOL) ? CTID_BOOL : + (info & CTF_UNSIGNED) ? CTID_UINT32 : CTID_INT32); + int32_t mask = (int32_t)(((1u << bsz)-1) << pos); + TRef sp = crec_ct_tv(J, ct, 0, J->base[2], &rd->argv[2]); + sp = emitir(IRTI(IR_BSHL), sp, lj_ir_kint(J, pos)); + /* Use of the target type avoids forwarding conversions. */ + sp = emitir(IRT(IR_BAND, t), sp, lj_ir_kint(J, mask)); + tr = emitir(IRT(IR_BAND, t), tr, lj_ir_kint(J, (int32_t)~mask)); + tr = emitir(IRT(IR_BOR, t), tr, sp); + emitir(IRT(IR_XSTORE, t), ptr, tr); + rd->nres = 0; + J->needsnap = 1; + } +} + +void recff_cdata_index(jit_State *J, RecordFFData *rd) { TRef idx, ptr = J->base[0]; ptrdiff_t ofs = sizeof(GCcdata); @@ -762,7 +789,7 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) /* Resolve pointer or reference for cdata object. */ if (ctype_isptr(ct->info)) { - IRType t = (LJ_64 && ct->size == 8) ? IRT_P64 : IRT_P32; + IRType t = (ct->size == 8) ? IRT_P64 : IRT_P32; if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct); ptr = emitir(IRT(IR_FLOAD, t), ptr, IRFL_CDATA_PTR); ofs = 0; @@ -780,17 +807,6 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) idx = emitir(IRT(IR_BAND, IRT_INTP), idx, lj_ir_kintp(J, 1)); sz = lj_ctype_size(cts, (sid = ctype_cid(ct->info))); idx = crec_reassoc_ofs(J, idx, &ofs, sz); -#if LJ_TARGET_ARM || LJ_TARGET_PPC - /* Hoist base add to allow fusion of index/shift into operands. */ - if (LJ_LIKELY(J->flags & JIT_F_OPT_LOOP) && ofs -#if LJ_TARGET_ARM - && (sz == 1 || sz == 4) -#endif - ) { - ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs)); - ofs = 0; - } -#endif idx = emitir(IRT(IR_MUL, IRT_INTP), idx, lj_ir_kintp(J, sz)); ptr = emitir(IRT(IR_ADD, IRT_PTR), idx, ptr); } @@ -808,12 +824,8 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) lj_ir_kintp(J, sizeof(GCcdata))); idx = emitir(IRT(IR_XLOAD, t), idx, 0); } - if (LJ_64 && ctk->size < sizeof(intptr_t) && !(ctk->info & CTF_UNSIGNED)) + if (ctk->size < sizeof(intptr_t) && !(ctk->info & CTF_UNSIGNED)) idx = emitconv(idx, IRT_INTP, IRT_INT, IRCONV_SEXT); - if (!LJ_64 && ctk->size > sizeof(intptr_t)) { - idx = emitconv(idx, IRT_INTP, t, 0); - lj_needsplit(J); - } goto integer_key; } } else if (tref_isstr(idx)) { @@ -825,6 +837,7 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) CType *fct; fct = lj_ctype_getfield(cts, ct, name, &fofs); if (fct) { + ofs += (ptrdiff_t)fofs; /* Always specialize to the field name. */ emitir(IRTG(IR_EQ, IRT_STR), idx, lj_ir_kstr(J, name)); if (ctype_isconstval(fct->info)) { @@ -836,12 +849,14 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) J->base[0] = lj_ir_kint(J, (int32_t)fct->size); return; /* Interpreter will throw for newindex. */ } else if (ctype_isbitfield(fct->info)) { - lj_trace_err(J, LJ_TRERR_NYICONV); + if (ofs) + ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs)); + crec_index_bf(J, rd, ptr, fct->info); + return; } else { lua_assert(ctype_isfield(fct->info)); sid = ctype_cid(fct->info); } - ofs += (ptrdiff_t)fofs; } } else if (ctype_iscomplex(ct->info)) { if (name->len == 2 && @@ -1037,16 +1052,6 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, MSize i, n; TRef tr, *base; cTValue *o; -#if LJ_TARGET_X86 -#if LJ_ABI_WIN - TRef *arg0 = NULL, *arg1 = NULL; -#endif - int ngpr = 0; - if (ctype_cconv(ct->info) == CTCC_THISCALL) - ngpr = 1; - else if (ctype_cconv(ct->info) == CTCC_FASTCALL) - ngpr = 2; -#endif /* Skip initial attributes. */ fid = ct->sib; @@ -1088,35 +1093,6 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4) { lj_needsplit(J); } -#if LJ_TARGET_X86 - /* 64 bit args must not end up in registers for fastcall/thiscall. */ -#if LJ_ABI_WIN - if (!ctype_isfp(d->info)) { - /* Sigh, the Windows/x86 ABI allows reordering across 64 bit args. */ - if (tref_typerange(tr, IRT_I64, IRT_U64)) { - if (ngpr) { - arg0 = &args[n]; args[n++] = TREF_NIL; ngpr--; - if (ngpr) { - arg1 = &args[n]; args[n++] = TREF_NIL; ngpr--; - } - } - } else { - if (arg0) { *arg0 = tr; arg0 = NULL; n--; continue; } - if (arg1) { *arg1 = tr; arg1 = NULL; n--; continue; } - if (ngpr) ngpr--; - } - } -#else - if (!ctype_isfp(d->info) && ngpr) { - if (tref_typerange(tr, IRT_I64, IRT_U64)) { - /* No reordering for other x86 ABIs. Simply add alignment args. */ - do { args[n++] = TREF_NIL; } while (--ngpr); - } else { - ngpr--; - } - } -#endif -#endif args[n] = tr; } tr = args[0]; @@ -1154,7 +1130,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) CType *ct = ctype_raw(cts, cd->ctypeid); IRType tp = IRT_PTR; if (ctype_isptr(ct->info)) { - tp = (LJ_64 && ct->size == 8) ? IRT_P64 : IRT_P32; + tp = (ct->size == 8) ? IRT_P64 : IRT_P32; ct = ctype_rawchild(cts, ct); } if (ctype_isfunc(ct->info)) { @@ -1165,7 +1141,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) TValue tv; /* Check for blacklisted C functions that might call a callback. */ setlightudV(&tv, - cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4)); + cdata_getptr(cdataptr(cd), (tp == IRT_P64) ? 8 : 4)); if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv))) lj_trace_err(J, LJ_TRERR_BLACKL); if (ctype_isvoid(ctr->info)) { @@ -1176,9 +1152,6 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) lj_trace_err(J, LJ_TRERR_NYICALL); } if ((ct->info & CTF_VARARG) -#if LJ_TARGET_X86 - || ctype_cconv(ct->info) != CTCC_CDECL -#endif ) func = emitir(IRT(IR_CARG, IRT_NIL), func, lj_ir_kint(J, ctype_typeid(cts, ct))); @@ -1189,16 +1162,12 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) tr = TREF_NIL; } else { crec_snap_caller(J); -#if LJ_TARGET_X86ORX64 /* Note: only the x86/x64 backend supports U8 and only for EQ(tr, 0). */ lj_ir_set(J, IRTG(IR_NE, IRT_U8), tr, lj_ir_kint(J, 0)); -#else - lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0)); -#endif J->postproc = LJ_POST_FIXGUARDSNAP; tr = TREF_TRUE; } - } else if (t == IRT_PTR || (LJ_64 && t == IRT_P32) || + } else if (t == IRT_PTR || t == IRT_P32 || t == IRT_I64 || t == IRT_U64 || ctype_isenum(ctr->info)) { TRef trid = lj_ir_kint(J, ctype_cid(ct->info)); tr = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, tr); @@ -1217,7 +1186,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) return 0; } -void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd) +void recff_cdata_call(jit_State *J, RecordFFData *rd) { CTState *cts = ctype_ctsG(J2G(J)); GCcdata *cd = argv2cdata(J, J->base[0], &rd->argv[0]); @@ -1314,14 +1283,17 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm) if (mm == MM_sub) { /* Pointer difference. */ TRef tr; CTSize sz = lj_ctype_size(cts, ctype_cid(ctp->info)); - if (sz == 0 || (sz & (sz-1)) != 0) - return 0; /* NYI: integer division. */ - tr = emitir(IRT(IR_SUB, IRT_INTP), sp[0], sp[1]); - tr = emitir(IRT(IR_BSAR, IRT_INTP), tr, lj_ir_kint(J, lj_fls(sz))); -#if LJ_64 - tr = emitconv(tr, IRT_NUM, IRT_INTP, 0); -#endif - return tr; + if (sz == 0) { + return 0; + } + tr = emitir(IRT(IR_SUB, IRT_INTP), sp[0], sp[1]); + if ((sz & (sz-1)) == 0) { /* special case: divide using bit-shift */ + tr = emitir(IRT(IR_BSAR, IRT_INTP), tr, lj_ir_kint(J, lj_fls(sz))); + } else { /* general case: divide using division */ + tr = emitir(IRT(IR_DIV, IRT_INTP), tr, lj_ir_kint(J, sz)); + } + tr = emitconv(tr, IRT_NUM, IRT_INTP, 0); + return tr; } else { /* Pointer comparison (unsigned). */ /* Assume true comparison. Fixup and emit pending guard later. */ IROp op = mm == MM_eq ? IR_EQ : mm == MM_lt ? IR_ULT : IR_ULE; @@ -1344,18 +1316,11 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm) IRType t = tref_type(tr); CTSize sz = lj_ctype_size(cts, ctype_cid(ctp->info)); CTypeID id; -#if LJ_64 if (t == IRT_NUM || t == IRT_FLOAT) tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY); else if (!(t == IRT_I64 || t == IRT_U64)) tr = emitconv(tr, IRT_INTP, IRT_INT, ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); -#else - if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { - tr = emitconv(tr, IRT_INTP, t, - (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0); - } -#endif tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); tr = emitir(IRT(mm+(int)IR_ADD-(int)MM_add, IRT_PTR), sp[0], tr); id = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|ctype_cid(ctp->info)), @@ -1402,7 +1367,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts, return 0; } -void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) +void recff_cdata_arith(jit_State *J, RecordFFData *rd) { CTState *cts = ctype_ctsG(J2G(J)); TRef sp[2]; @@ -1504,7 +1469,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) /* -- C library namespace metamethods ------------------------------------- */ -void LJ_FASTCALL recff_clib_index(jit_State *J, RecordFFData *rd) +void recff_clib_index(jit_State *J, RecordFFData *rd) { CTState *cts = ctype_ctsG(J2G(J)); if (tref_isudata(J->base[0]) && tref_isstr(J->base[1]) && @@ -1529,7 +1494,7 @@ void LJ_FASTCALL recff_clib_index(jit_State *J, RecordFFData *rd) void *sp = *(void **)cdataptr(cdataV(tv)); TRef ptr; ct = ctype_raw(cts, sid); - if (LJ_64 && !checkptr32(sp)) + if (!checkptr32(sp)) ptr = lj_ir_kintp(J, (uintptr_t)sp); else ptr = lj_ir_kptr(J, sp); @@ -1555,12 +1520,12 @@ static TRef crec_toint(jit_State *J, CTState *cts, TRef sp, TValue *sval) return crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, sp, sval); } -void LJ_FASTCALL recff_ffi_new(jit_State *J, RecordFFData *rd) +void recff_ffi_new(jit_State *J, RecordFFData *rd) { crec_alloc(J, rd, argv2ctype(J, J->base[0], &rd->argv[0])); } -void LJ_FASTCALL recff_ffi_errno(jit_State *J, RecordFFData *rd) +void recff_ffi_errno(jit_State *J, RecordFFData *rd) { UNUSED(rd); if (J->base[0]) @@ -1568,7 +1533,7 @@ void LJ_FASTCALL recff_ffi_errno(jit_State *J, RecordFFData *rd) J->base[0] = lj_ir_call(J, IRCALL_lj_vm_errno); } -void LJ_FASTCALL recff_ffi_string(jit_State *J, RecordFFData *rd) +void recff_ffi_string(jit_State *J, RecordFFData *rd) { CTState *cts = ctype_ctsG(J2G(J)); TRef tr = J->base[0]; @@ -1585,7 +1550,7 @@ void LJ_FASTCALL recff_ffi_string(jit_State *J, RecordFFData *rd) } /* else: interpreter will throw. */ } -void LJ_FASTCALL recff_ffi_copy(jit_State *J, RecordFFData *rd) +void recff_ffi_copy(jit_State *J, RecordFFData *rd) { CTState *cts = ctype_ctsG(J2G(J)); TRef trdst = J->base[0], trsrc = J->base[1], trlen = J->base[2]; @@ -1603,7 +1568,7 @@ void LJ_FASTCALL recff_ffi_copy(jit_State *J, RecordFFData *rd) } /* else: interpreter will throw. */ } -void LJ_FASTCALL recff_ffi_fill(jit_State *J, RecordFFData *rd) +void recff_ffi_fill(jit_State *J, RecordFFData *rd) { CTState *cts = ctype_ctsG(J2G(J)); TRef trdst = J->base[0], trlen = J->base[1], trfill = J->base[2]; @@ -1627,7 +1592,7 @@ void LJ_FASTCALL recff_ffi_fill(jit_State *J, RecordFFData *rd) } /* else: interpreter will throw. */ } -void LJ_FASTCALL recff_ffi_typeof(jit_State *J, RecordFFData *rd) +void recff_ffi_typeof(jit_State *J, RecordFFData *rd) { if (tref_iscdata(J->base[0])) { TRef trid = lj_ir_kint(J, argv2ctype(J, J->base[0], &rd->argv[0])); @@ -1639,7 +1604,7 @@ void LJ_FASTCALL recff_ffi_typeof(jit_State *J, RecordFFData *rd) } } -void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd) +void recff_ffi_istype(jit_State *J, RecordFFData *rd) { argv2ctype(J, J->base[0], &rd->argv[0]); if (tref_iscdata(J->base[1])) { @@ -1651,7 +1616,7 @@ void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd) } } -void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd) +void recff_ffi_abi(jit_State *J, RecordFFData *rd) { if (tref_isstr(J->base[0])) { /* Specialize to the ABI string to make the boolean result a constant. */ @@ -1664,7 +1629,7 @@ void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd) } /* Record ffi.sizeof(), ffi.alignof(), ffi.offsetof(). */ -void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd) +void recff_ffi_xof(jit_State *J, RecordFFData *rd) { CTypeID id = argv2ctype(J, J->base[0], &rd->argv[0]); if (rd->data == FF_ffi_sizeof) { @@ -1681,7 +1646,7 @@ void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd) J->base[0] = J->base[1] = J->base[2] = TREF_NIL; } -void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd) +void recff_ffi_gc(jit_State *J, RecordFFData *rd) { argv2cdata(J, J->base[0], &rd->argv[0]); if (!J->base[1]) @@ -1705,7 +1670,7 @@ static CTypeID crec_bit64_type(CTState *cts, cTValue *tv) return 0; /* Use regular 32 bit ops. */ } -void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd) +void recff_bit64_tobit(jit_State *J, RecordFFData *rd) { CTState *cts = ctype_ctsG(J2G(J)); TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0, @@ -1715,7 +1680,7 @@ void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd) J->base[0] = tr; } -int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd) +int recff_bit64_unary(jit_State *J, RecordFFData *rd) { CTState *cts = ctype_ctsG(J2G(J)); CTypeID id = crec_bit64_type(cts, &rd->argv[0]); @@ -1728,7 +1693,7 @@ int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd) return 0; } -int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd) +int recff_bit64_nary(jit_State *J, RecordFFData *rd) { CTState *cts = ctype_ctsG(J2G(J)); CTypeID id = 0; @@ -1751,7 +1716,7 @@ int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd) return 0; } -int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd) +int recff_bit64_shift(jit_State *J, RecordFFData *rd) { CTState *cts = ctype_ctsG(J2G(J)); CTypeID id; @@ -1821,7 +1786,7 @@ TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr) /* -- Miscellaneous library functions ------------------------------------- */ -void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd) +void lj_crecord_tonumber(jit_State *J, RecordFFData *rd) { CTState *cts = ctype_ctsG(J2G(J)); CType *d, *ct = lj_ctype_rawref(cts, cdataV(&rd->argv[0])->ctypeid); @@ -1842,4 +1807,3 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd) #undef emitir #undef emitconv -#endif diff --git a/src/lj_crecord.h b/src/lj_crecord.h index c165def475..3aac7af82c 100644 --- a/src/lj_crecord.h +++ b/src/lj_crecord.h @@ -10,29 +10,27 @@ #include "lj_jit.h" #include "lj_ffrecord.h" -#if LJ_HASJIT && LJ_HASFFI -LJ_FUNC void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd); -LJ_FUNC void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd); -LJ_FUNC void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd); -LJ_FUNC void LJ_FASTCALL recff_clib_index(jit_State *J, RecordFFData *rd); -LJ_FUNC void LJ_FASTCALL recff_ffi_new(jit_State *J, RecordFFData *rd); -LJ_FUNC void LJ_FASTCALL recff_ffi_errno(jit_State *J, RecordFFData *rd); -LJ_FUNC void LJ_FASTCALL recff_ffi_string(jit_State *J, RecordFFData *rd); -LJ_FUNC void LJ_FASTCALL recff_ffi_copy(jit_State *J, RecordFFData *rd); -LJ_FUNC void LJ_FASTCALL recff_ffi_fill(jit_State *J, RecordFFData *rd); -LJ_FUNC void LJ_FASTCALL recff_ffi_typeof(jit_State *J, RecordFFData *rd); -LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd); -LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd); -LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd); -LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_cdata_index(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_cdata_call(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_cdata_arith(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_clib_index(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_ffi_new(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_ffi_errno(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_ffi_string(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_ffi_copy(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_ffi_fill(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_ffi_typeof(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_ffi_istype(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_ffi_abi(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_ffi_xof(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_ffi_gc(jit_State *J, RecordFFData *rd); -LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd); -LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd); -LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd); -LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd); +LJ_FUNC void recff_bit64_tobit(jit_State *J, RecordFFData *rd); +LJ_FUNC int recff_bit64_unary(jit_State *J, RecordFFData *rd); +LJ_FUNC int recff_bit64_nary(jit_State *J, RecordFFData *rd); +LJ_FUNC int recff_bit64_shift(jit_State *J, RecordFFData *rd); LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr); -LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); -#endif +LJ_FUNC void lj_crecord_tonumber(jit_State *J, RecordFFData *rd); #endif diff --git a/src/lj_ctype.c b/src/lj_ctype.c index 0ea89c7486..411c3f0bd1 100644 --- a/src/lj_ctype.c +++ b/src/lj_ctype.c @@ -5,7 +5,6 @@ #include "lj_obj.h" -#if LJ_HASFFI #include "lj_gc.h" #include "lj_err.h" @@ -15,6 +14,7 @@ #include "lj_ctype.h" #include "lj_ccallback.h" #include "lj_buf.h" +#include "lj_auditlog.h" /* -- C type definitions -------------------------------------------------- */ @@ -218,6 +218,7 @@ void lj_ctype_addname(CTState *cts, CType *ct, CTypeID id) uint32_t h = ct_hashname(gcref(ct->name)); ct->next = cts->hash[h]; cts->hash[h] = (CTypeID1)id; + lj_auditlog_new_ctypeid(id, strdata(gco2str(gcref(ct->name)))); } /* Get a C type by name, matching the type mask. */ @@ -446,6 +447,7 @@ static void ctype_repr(CTRepr *ctr, CTypeID id) for (;;) { CTInfo info = ct->info; CTSize size = ct->size; + CType *newct; switch (ctype_type(info)) { case CT_NUM: if ((info & CTF_BOOL)) { @@ -492,7 +494,7 @@ static void ctype_repr(CTRepr *ctr, CTypeID id) ctype_prepc(ctr, '&'); } else { ctype_prepqual(ctr, (qual|info)); - if (LJ_64 && size == 4) ctype_preplit(ctr, "__ptr32"); + if (size == 4) ctype_preplit(ctr, "__ptr32"); ctype_prepc(ctr, '*'); } qual = 0; @@ -528,10 +530,20 @@ static void ctype_repr(CTRepr *ctr, CTypeID id) ctype_appc(ctr, ')'); break; default: - lua_assert(0); - break; + ctr->ok = 0; + return; + } + if (ctype_cid(info) == 0) { + ctr->ok = 0; + return; } - ct = ctype_get(ctr->cts, ctype_cid(info)); + newct = ctype_get(ctr->cts, ctype_cid(info)); + /* Detect ctypes that are not OK due to looping. */ + if (newct == ct) { + ctr->ok = 0; + return; + } + ct = newct; } } @@ -599,6 +611,7 @@ CTState *lj_ctype_init(lua_State *L) cts->tab = ct; cts->sizetab = CTTYPETAB_MIN; cts->top = CTTYPEINFO_NUM; + cts->log = cts->top; cts->L = NULL; cts->g = G(L); for (id = 0; id < CTTYPEINFO_NUM; id++, ct++) { @@ -622,6 +635,18 @@ CTState *lj_ctype_init(lua_State *L) return cts; } +/* Log all new ctypes. */ +void lj_ctype_log(lua_State *L) +{ + global_State *g = G(L); + CTState *cts = ctype_ctsG(g); + while (cts && cts->log < cts->top) { + int id = cts->log++; + GCstr *name = lj_ctype_repr(L, id, NULL); + lj_auditlog_new_ctypeid(id, strdata(name)); + } +} + /* Free C type table and state. */ void lj_ctype_freestate(global_State *g) { @@ -634,4 +659,3 @@ void lj_ctype_freestate(global_State *g) } } -#endif diff --git a/src/lj_ctype.h b/src/lj_ctype.h index 0c220a8886..307d9d9996 100644 --- a/src/lj_ctype.h +++ b/src/lj_ctype.h @@ -9,7 +9,6 @@ #include "lj_obj.h" #include "lj_gc.h" -#if LJ_HASFFI /* -- C type definitions -------------------------------------------------- */ @@ -174,6 +173,7 @@ typedef LJ_ALIGN(8) struct CCallback { typedef struct CTState { CType *tab; /* C type table. */ CTypeID top; /* Current top of C type table. */ + CTypeID log; /* Current top of logged C types table. */ MSize sizetab; /* Size of C type table. */ lua_State *L; /* Lua state (needed for errors and allocations). */ global_State *g; /* Global state. */ @@ -247,13 +247,8 @@ typedef struct CTState { CTINFO(CT_ATTRIB, CTATTRIB(at))) /* Target-dependent sizes and alignments. */ -#if LJ_64 #define CTSIZE_PTR 8 #define CTALIGN_PTR CTALIGN(3) -#else -#define CTSIZE_PTR 4 -#define CTALIGN_PTR CTALIGN(2) -#endif #define CTINFO_REF(ref) \ CTINFO(CT_PTR, (CTF_CONST|CTF_REF|CTALIGN_PTR) + (ref)) @@ -263,12 +258,7 @@ typedef struct CTState { /* -- Predefined types ---------------------------------------------------- */ /* Target-dependent types. */ -#if LJ_TARGET_PPC -#define CTTYDEFP(_) \ - _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2)) -#else #define CTTYDEFP(_) -#endif /* Common types. */ #define CTTYDEF(_) \ @@ -307,21 +297,10 @@ CTTYDEF(CTTYIDDEF) }; /* Target-dependent type IDs. */ -#if LJ_64 #define CTID_INT_PSZ CTID_INT64 #define CTID_UINT_PSZ CTID_UINT64 -#else -#define CTID_INT_PSZ CTID_INT32 -#define CTID_UINT_PSZ CTID_UINT32 -#endif -#if LJ_ABI_WIN -#define CTID_WCHAR CTID_UINT16 -#elif LJ_TARGET_PPC -#define CTID_WCHAR CTID_LINT32 -#else #define CTID_WCHAR CTID_INT32 -#endif /* -- C tokens and keywords ----------------------------------------------- */ @@ -454,8 +433,8 @@ LJ_FUNC GCstr *lj_ctype_repr(lua_State *L, CTypeID id, GCstr *name); LJ_FUNC GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned); LJ_FUNC GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size); LJ_FUNC CTState *lj_ctype_init(lua_State *L); +LJ_FUNC void lj_ctype_log(lua_State *L); LJ_FUNC void lj_ctype_freestate(global_State *g); -#endif #endif diff --git a/src/lj_debug.c b/src/lj_debug.c index 959dc289c7..939ec9c325 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c @@ -15,9 +15,7 @@ #include "lj_frame.h" #include "lj_bc.h" #include "lj_strfmt.h" -#if LJ_HASJIT #include "lj_jit.h" -#endif /* -- Frames -------------------------------------------------------------- */ @@ -97,32 +95,22 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) } pt = funcproto(fn); pos = proto_bcpos(pt, ins) - 1; -#if LJ_HASJIT if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */ GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins)); lua_assert(bc_isret(bc_op(ins[-1]))); pos = proto_bcpos(pt, mref(T->startpc, const BCIns)); } -#endif return pos; } /* -- Line numbers -------------------------------------------------------- */ /* Get line number for a bytecode position. */ -BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc) +BCLine lj_debug_line(GCproto *pt, BCPos pc) { const void *lineinfo = proto_lineinfo(pt); if (pc <= pt->sizebc && lineinfo) { - BCLine first = pt->firstline; - if (pc == pt->sizebc) return first + pt->numline; - if (pc-- == 0) return first; - if (pt->numline < 256) - return first + (BCLine)((const uint8_t *)lineinfo)[pc]; - else if (pt->numline < 65536) - return first + (BCLine)((const uint16_t *)lineinfo)[pc]; - else - return first + (BCLine)((const uint32_t *)lineinfo)[pc]; + return mref(lineinfo, uint32_t)[pc]; } return 0; } @@ -501,16 +489,12 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext) if (isluafunc(fn)) { GCtab *t = lj_tab_new(L, 0, 0); GCproto *pt = funcproto(fn); - const void *lineinfo = proto_lineinfo(pt); + const uint32_t *lineinfo = proto_lineinfo(pt); if (lineinfo) { BCLine first = pt->firstline; - int sz = pt->numline < 256 ? 1 : pt->numline < 65536 ? 2 : 4; MSize i, szl = pt->sizebc-1; for (i = 0; i < szl; i++) { - BCLine line = first + - (sz == 1 ? (BCLine)((const uint8_t *)lineinfo)[i] : - sz == 2 ? (BCLine)((const uint16_t *)lineinfo)[i] : - (BCLine)((const uint32_t *)lineinfo)[i]); + BCLine line = first + lineinfo[i]; setboolV(lj_tab_setint(L, t, line), 1); } } @@ -541,110 +525,6 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar) } } -#if LJ_HASPROFILE -/* Put the chunkname into a buffer. */ -static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip) -{ - GCstr *name = proto_chunkname(pt); - const char *p = strdata(name); - if (pt->firstline == ~(BCLine)0) { - lj_buf_putmem(sb, "[builtin:", 9); - lj_buf_putstr(sb, name); - lj_buf_putb(sb, ']'); - return 0; - } - if (*p == '=' || *p == '@') { - MSize len = name->len-1; - p++; - if (pathstrip) { - int i; - for (i = len-1; i >= 0; i--) - if (p[i] == '/' || p[i] == '\\') { - len -= i+1; - p = p+i+1; - break; - } - } - lj_buf_putmem(sb, p, len); - } else { - lj_buf_putmem(sb, "[string]", 8); - } - return 1; -} - -/* Put a compact stack dump into a buffer. */ -void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth) -{ - int level = 0, dir = 1, pathstrip = 1; - MSize lastlen = 0; - if (depth < 0) { level = ~depth; depth = dir = -1; } /* Reverse frames. */ - while (level != depth) { /* Loop through all frame. */ - int size; - cTValue *frame = lj_debug_frame(L, level, &size); - if (frame) { - cTValue *nextframe = size ? frame+size : NULL; - GCfunc *fn = frame_func(frame); - const uint8_t *p = (const uint8_t *)fmt; - int c; - while ((c = *p++)) { - switch (c) { - case 'p': /* Preserve full path. */ - pathstrip = 0; - break; - case 'F': case 'f': { /* Dump function name. */ - const char *name; - const char *what = lj_debug_funcname(L, frame, &name); - if (what) { - if (c == 'F' && isluafunc(fn)) { /* Dump module:name for 'F'. */ - GCproto *pt = funcproto(fn); - if (pt->firstline != ~(BCLine)0) { /* Not a bytecode builtin. */ - debug_putchunkname(sb, pt, pathstrip); - lj_buf_putb(sb, ':'); - } - } - lj_buf_putmem(sb, name, (MSize)strlen(name)); - break; - } /* else: can't derive a name, dump module:line. */ - } - /* fallthrough */ - case 'l': /* Dump module:line. */ - if (isluafunc(fn)) { - GCproto *pt = funcproto(fn); - if (debug_putchunkname(sb, pt, pathstrip)) { - /* Regular Lua function. */ - BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) : - pt->firstline; - lj_buf_putb(sb, ':'); - lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline); - } - } else if (isffunc(fn)) { /* Dump numbered builtins. */ - lj_buf_putmem(sb, "[builtin#", 9); - lj_strfmt_putint(sb, fn->c.ffid); - lj_buf_putb(sb, ']'); - } else { /* Dump C function address. */ - lj_buf_putb(sb, '@'); - lj_strfmt_putptr(sb, fn->c.f); - } - break; - case 'Z': /* Zap trailing separator. */ - lastlen = sbuflen(sb); - break; - default: - lj_buf_putb(sb, c); - break; - } - } - } else if (dir == 1) { - break; - } else { - level -= size; /* Reverse frame order: quickly skip missing level. */ - } - level += dir; - } - if (lastlen) - setsbufP(sb, sbufB(sb) + lastlen); /* Zap trailing separator. */ -} -#endif /* Number of frames for the leading and trailing part of a traceback. */ #define TRACEBACK_LEVELS1 12 diff --git a/src/lj_debug.h b/src/lj_debug.h index 5917c00bc6..df69c15a5b 100644 --- a/src/lj_debug.h +++ b/src/lj_debug.h @@ -27,7 +27,7 @@ typedef struct lj_Debug { } lj_Debug; LJ_FUNC cTValue *lj_debug_frame(lua_State *L, int level, int *size); -LJ_FUNC BCLine LJ_FASTCALL lj_debug_line(GCproto *pt, BCPos pc); +LJ_FUNC BCLine lj_debug_line(GCproto *pt, BCPos pc); LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx); LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp); LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, @@ -40,10 +40,6 @@ LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext); -#if LJ_HASPROFILE -LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, - int depth); -#endif /* Fixed internal variable names. */ #define VARNAMEDEF(_) \ diff --git a/src/lj_debuginfo.h b/src/lj_debuginfo.h new file mode 100644 index 0000000000..49c209dd17 --- /dev/null +++ b/src/lj_debuginfo.h @@ -0,0 +1,6 @@ +/* +** RaptorJIT access to embedded debug information objects. +**/ + +extern const char _binary_lj_dwarf_dwo_start, _binary_lj_dwarf_dwo_end; + diff --git a/src/lj_def.h b/src/lj_def.h index 2d8fff66f3..2dd7b99a7c 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -8,38 +8,7 @@ #include "lua.h" -#if defined(_MSC_VER) -/* MSVC is stuck in the last century and doesn't have C99's stdint.h. */ -typedef __int8 int8_t; -typedef __int16 int16_t; -typedef __int32 int32_t; -typedef __int64 int64_t; -typedef unsigned __int8 uint8_t; -typedef unsigned __int16 uint16_t; -typedef unsigned __int32 uint32_t; -typedef unsigned __int64 uint64_t; -#ifdef _WIN64 -typedef __int64 intptr_t; -typedef unsigned __int64 uintptr_t; -#else -typedef __int32 intptr_t; -typedef unsigned __int32 uintptr_t; -#endif -#elif defined(__symbian__) -/* Cough. */ -typedef signed char int8_t; -typedef short int int16_t; -typedef int int32_t; -typedef long long int64_t; -typedef unsigned char uint8_t; -typedef unsigned short int uint16_t; -typedef unsigned int uint32_t; -typedef unsigned long long uint64_t; -typedef int intptr_t; -typedef unsigned int uintptr_t; -#else #include -#endif /* Needed everywhere. */ #include @@ -49,7 +18,7 @@ typedef unsigned int uintptr_t; #define LJ_MAX_MEM32 0x7fffff00 /* Max. 32 bit memory allocation. */ #define LJ_MAX_MEM64 ((uint64_t)1<<47) /* Max. 64 bit memory allocation. */ /* Max. total memory allocation. */ -#define LJ_MAX_MEM (LJ_GC64 ? LJ_MAX_MEM64 : LJ_MAX_MEM32) +#define LJ_MAX_MEM LJ_MAX_MEM64 #define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */ #define LJ_MAX_STR LJ_MAX_MEM32 /* Max. string length. */ #define LJ_MAX_BUF LJ_MAX_MEM32 /* Max. buffer length. */ @@ -80,7 +49,6 @@ typedef unsigned int uintptr_t; #define LJ_MIN_SBUF 32 /* Min. string buffer length. */ #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ -#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */ /* JIT compiler limits. */ #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ @@ -97,7 +65,7 @@ typedef unsigned int uintptr_t; #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) #define i64ptr(p) ((int64_t)(intptr_t)(void *)(p)) #define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p)) -#define igcptr(p) (LJ_GC64 ? i64ptr(p) : i32ptr(p)) +#define igcptr(p) i64ptr(p) #define checki8(x) ((x) == (int32_t)(int8_t)(x)) #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) @@ -107,7 +75,7 @@ typedef unsigned int uintptr_t; #define checku32(x) ((x) == (uint32_t)(x)) #define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) #define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0) -#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr32((x)) :1) +#define checkptrGC(x) checkptr47((x)) /* Every half-decent C compiler transforms this into a rotate instruction. */ #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) @@ -129,18 +97,13 @@ typedef uintptr_t BloomFilter; #define LJ_NOINLINE __attribute__((noinline)) #if defined(__ELF__) || defined(__MACH__) || defined(__psp2__) -#if !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__)) #define LJ_NOAPI extern __attribute__((visibility("hidden"))) #endif -#endif /* Note: it's only beneficial to use fastcall on x86 and then only for up to ** two non-FP args. The amalgamated compile covers all LJ_FUNC cases. Only ** indirect calls and related tail-called C functions are marked as fastcall. */ -#if defined(__i386__) -#define LJ_FASTCALL __attribute__((fastcall)) -#endif #define LJ_LIKELY(x) __builtin_expect(!!(x), 1) #define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0) @@ -156,33 +119,7 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x) #define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31)) #endif -#if defined(__arm__) -static LJ_AINLINE uint32_t lj_bswap(uint32_t x) -{ -#if defined(__psp2__) - return __builtin_rev(x); -#else - uint32_t r; -#if __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6T2__ || __ARM_ARCH_6Z__ ||\ - __ARM_ARCH_6ZK__ || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ - __asm__("rev %0, %1" : "=r" (r) : "r" (x)); - return r; -#else -#ifdef __thumb__ - r = x ^ lj_ror(x, 16); -#else - __asm__("eor %0, %1, %1, ror #16" : "=r" (r) : "r" (x)); -#endif - return ((r & 0xff00ffffu) >> 8) ^ lj_ror(x, 8); -#endif -#endif -} - -static LJ_AINLINE uint64_t lj_bswap64(uint64_t x) -{ - return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32)); -} -#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) static LJ_AINLINE uint32_t lj_bswap(uint32_t x) { return (uint32_t)__builtin_bswap32((int32_t)x); @@ -198,17 +135,10 @@ static LJ_AINLINE uint32_t lj_bswap(uint32_t x) uint32_t r; __asm__("bswap %0" : "=r" (r) : "0" (x)); return r; } -#if defined(__i386__) -static LJ_AINLINE uint64_t lj_bswap64(uint64_t x) -{ - return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32)); -} -#else static LJ_AINLINE uint64_t lj_bswap64(uint64_t x) { uint64_t r; __asm__("bswap %0" : "=r" (r) : "0" (x)); return r; } -#endif #else static LJ_AINLINE uint32_t lj_bswap(uint32_t x) { @@ -244,75 +174,11 @@ static LJ_AINLINE uint32_t lj_getu32(const void *p) return ((const Unaligned32 *)p)->u; } -#elif defined(_MSC_VER) - -#define LJ_NORET __declspec(noreturn) -#define LJ_ALIGN(n) __declspec(align(n)) -#define LJ_INLINE __inline -#define LJ_AINLINE __forceinline -#define LJ_NOINLINE __declspec(noinline) -#if defined(_M_IX86) -#define LJ_FASTCALL __fastcall -#endif - -#ifdef _M_PPC -unsigned int _CountLeadingZeros(long); -#pragma intrinsic(_CountLeadingZeros) -static LJ_AINLINE uint32_t lj_fls(uint32_t x) -{ - return _CountLeadingZeros(x) ^ 31; -} -#else -unsigned char _BitScanForward(uint32_t *, unsigned long); -unsigned char _BitScanReverse(uint32_t *, unsigned long); -#pragma intrinsic(_BitScanForward) -#pragma intrinsic(_BitScanReverse) - -static LJ_AINLINE uint32_t lj_ffs(uint32_t x) -{ - uint32_t r; _BitScanForward(&r, x); return r; -} - -static LJ_AINLINE uint32_t lj_fls(uint32_t x) -{ - uint32_t r; _BitScanReverse(&r, x); return r; -} -#endif - -unsigned long _byteswap_ulong(unsigned long); -uint64_t _byteswap_uint64(uint64_t); -#define lj_bswap(x) (_byteswap_ulong((x))) -#define lj_bswap64(x) (_byteswap_uint64((x))) - -#if defined(_M_PPC) && defined(LUAJIT_NO_UNALIGNED) -/* -** Replacement for unaligned loads on Xbox 360. Disabled by default since it's -** usually more costly than the occasional stall when crossing a cache-line. -*/ -static LJ_AINLINE uint16_t lj_getu16(const void *v) -{ - const uint8_t *p = (const uint8_t *)v; - return (uint16_t)((p[0]<<8) | p[1]); -} -static LJ_AINLINE uint32_t lj_getu32(const void *v) -{ - const uint8_t *p = (const uint8_t *)v; - return (uint32_t)((p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3]); -} -#else -/* Unaligned loads are generally ok on x86/x64. */ -#define lj_getu16(p) (*(uint16_t *)(p)) -#define lj_getu32(p) (*(uint32_t *)(p)) -#endif - #else #error "missing defines for your compiler" #endif /* Optional defines. */ -#ifndef LJ_FASTCALL -#define LJ_FASTCALL -#endif #ifndef LJ_NORET #define LJ_NORET #endif @@ -329,11 +195,7 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v) #define LJ_DATADEF #define LJ_ASMF LJ_NOAPI #define LJ_FUNCA LJ_NOAPI -#if defined(ljamalg_c) -#define LJ_FUNC static -#else #define LJ_FUNC LJ_NOAPI -#endif #define LJ_FUNC_NORET LJ_FUNC LJ_NORET #define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET #define LJ_ASMF_NORET LJ_ASMF LJ_NORET diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 5d6795f88e..4e0a06fb11 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -19,17 +19,10 @@ #include "lj_bc.h" #include "lj_ff.h" #include "lj_strfmt.h" -#if LJ_HASJIT #include "lj_jit.h" -#endif -#if LJ_HASFFI #include "lj_ccallback.h" -#endif #include "lj_trace.h" #include "lj_dispatch.h" -#if LJ_HASPROFILE -#include "lj_profile.h" -#endif #include "lj_vm.h" #include "luajit.h" @@ -38,23 +31,6 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC); /* -- Dispatch table management ------------------------------------------- */ -#if LJ_TARGET_MIPS -#include -LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, - lua_State *co); -#if !LJ_HASJIT -#define lj_dispatch_stitch lj_dispatch_ins -#endif -#if !LJ_HASPROFILE -#define lj_dispatch_profile lj_dispatch_ins -#endif - -#define GOTFUNC(name) (ASMFunction)name, -static const ASMFunction dispatch_got[] = { - GOTDEF(GOTFUNC) -}; -#undef GOTFUNC -#endif /* Initialize instruction dispatch table and hot counters. */ void lj_dispatch_init(GG_State *GG) @@ -74,12 +50,8 @@ void lj_dispatch_init(GG_State *GG) GG->g.bc_cfunc_ext = GG->g.bc_cfunc_int = BCINS_AD(BC_FUNCC, LUA_MINSTACK, 0); for (i = 0; i < GG_NUM_ASMFF; i++) GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); -#if LJ_TARGET_MIPS - memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *)); -#endif } -#if LJ_HASJIT /* Initialize hotcount table. */ void lj_dispatch_init_hotcount(global_State *g) { @@ -90,7 +62,6 @@ void lj_dispatch_init_hotcount(global_State *g) for (i = 0; i < HOTCOUNT_SIZE; i++) hotcount[i] = start; } -#endif /* Internal dispatch mode bits. */ #define DISPMODE_CALL 0x01 /* Override call dispatch. */ @@ -98,21 +69,15 @@ void lj_dispatch_init_hotcount(global_State *g) #define DISPMODE_INS 0x04 /* Override instruction dispatch. */ #define DISPMODE_JIT 0x10 /* JIT compiler on. */ #define DISPMODE_REC 0x20 /* Recording active. */ -#define DISPMODE_PROF 0x40 /* Profiling active. */ /* Update dispatch table depending on various flags. */ void lj_dispatch_update(global_State *g) { uint8_t oldmode = g->dispatchmode; uint8_t mode = 0; -#if LJ_HASJIT mode |= (G2J(g)->flags & JIT_F_ON) ? DISPMODE_JIT : 0; mode |= G2J(g)->state != LJ_TRACE_IDLE ? (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0; -#endif -#if LJ_HASPROFILE - mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0; -#endif mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0; mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0; mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; @@ -141,7 +106,7 @@ void lj_dispatch_update(global_State *g) disp[GG_LEN_DDISP+BC_LOOP] = f_loop; /* Set dynamic instruction dispatch. */ - if ((oldmode ^ mode) & (DISPMODE_PROF|DISPMODE_REC|DISPMODE_INS)) { + if ((oldmode ^ mode) & (DISPMODE_REC|DISPMODE_INS)) { /* Need to update the whole table. */ if (!(mode & DISPMODE_INS)) { /* No ins dispatch? */ /* Copy static dispatch table to dynamic dispatch table. */ @@ -155,8 +120,7 @@ void lj_dispatch_update(global_State *g) } } else { /* The recording dispatch also checks for hooks. */ - ASMFunction f = (mode & DISPMODE_PROF) ? lj_vm_profhook : - (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; + ASMFunction f = (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; uint32_t i; for (i = 0; i < GG_LEN_SDISP; i++) disp[i] = f; @@ -196,17 +160,14 @@ void lj_dispatch_update(global_State *g) disp[BC_FUNCV] = f_funcv; } -#if LJ_HASJIT /* Reset hotcounts for JIT off to on transition. */ if ((mode & DISPMODE_JIT) && !(oldmode & DISPMODE_JIT)) lj_dispatch_init_hotcount(g); -#endif } } /* -- JIT mode setting ---------------------------------------------------- */ -#if LJ_HASJIT /* Set JIT mode for a single prototype. */ static void setptmode(global_State *g, GCproto *pt, int mode) { @@ -233,7 +194,6 @@ static void setptmode_all(global_State *g, GCproto *pt, int mode) } } } -#endif /* Public API function: control the JIT engine. */ int luaJIT_setmode(lua_State *L, int idx, int mode) @@ -245,22 +205,16 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) if ((g->hookmask & HOOK_GC)) lj_err_caller(L, LJ_ERR_NOGCMM); switch (mm) { -#if LJ_HASJIT case LUAJIT_MODE_ENGINE: if ((mode & LUAJIT_MODE_FLUSH)) { lj_trace_flushall(L); } else { if (!(mode & LUAJIT_MODE_ON)) G2J(g)->flags &= ~(uint32_t)JIT_F_ON; -#if LJ_TARGET_X86ORX64 else if ((G2J(g)->flags & JIT_F_SSE2)) G2J(g)->flags |= (uint32_t)JIT_F_ON; else return 0; /* Don't turn on JIT compiler without SSE2 support. */ -#else - else - G2J(g)->flags |= (uint32_t)JIT_F_ON; -#endif lj_dispatch_update(g); } break; @@ -287,16 +241,6 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) return 0; /* Failed. */ lj_trace_flush(G2J(g), idx); break; -#else - case LUAJIT_MODE_ENGINE: - case LUAJIT_MODE_FUNC: - case LUAJIT_MODE_ALLFUNC: - case LUAJIT_MODE_ALLSUBFUNC: - UNUSED(idx); - if ((mode & LUAJIT_MODE_ON)) - return 0; /* Failed. */ - break; -#endif case LUAJIT_MODE_WRAPCFUNC: if ((mode & LUAJIT_MODE_ON)) { if (idx != 0) { @@ -368,19 +312,11 @@ static void callhook(lua_State *L, int event, BCLine line) /* Top frame, nextframe = NULL. */ ar.i_ci = (int)((L->base-1) - tvref(L->stack)); lj_state_checkstack(L, 1+LUA_MINSTACK); -#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF - lj_profile_hook_enter(g); -#else hook_enter(g); -#endif hookf(L, &ar); lua_assert(hook_active(g)); setgcref(g->cur_L, obj2gco(L)); -#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF - lj_profile_hook_leave(g); -#else hook_leave(g); -#endif } } @@ -401,7 +337,7 @@ static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres) } /* Instruction dispatch. Used by instr/line/return hooks or when recording. */ -void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc) +void lj_dispatch_ins(lua_State *L, const BCIns *pc) { ERRNO_SAVE GCfunc *fn = curr_func(L); @@ -413,7 +349,6 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc) setcframe_pc(cf, pc); slots = cur_topslot(pt, pc, cframe_multres_n(cf)); L->top = L->base + slots; /* Fix top. */ -#if LJ_HASJIT { jit_State *J = G2J(g); if (J->state != LJ_TRACE_IDLE) { @@ -425,7 +360,6 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc) lua_assert(L->top - L->base == delta); } } -#endif if ((g->hookmask & LUA_MASKCOUNT) && g->hookcount == 0) { g->hookcount = g->hookcstart; callhook(L, LUA_HOOKCOUNT, -1); @@ -464,17 +398,14 @@ static int call_init(lua_State *L, GCfunc *fn) } /* Call dispatch. Used by call hooks, hot calls or when recording. */ -ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) +ASMFunction lj_dispatch_call(lua_State *L, const BCIns *pc) { ERRNO_SAVE GCfunc *fn = curr_func(L); BCOp op; global_State *g = G(L); -#if LJ_HASJIT jit_State *J = G2J(g); -#endif int missing = call_init(L, fn); -#if LJ_HASJIT J->L = L; if ((uintptr_t)pc & 1) { /* Marker for hot call. */ #ifdef LUA_USE_ASSERT @@ -485,7 +416,7 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) lua_assert(L->top - L->base == delta); goto out; } else if (J->state != LJ_TRACE_IDLE && - !(g->hookmask & (HOOK_GC|HOOK_VMEVENT))) { + !(g->hookmask & HOOK_GC)) { #ifdef LUA_USE_ASSERT ptrdiff_t delta = L->top - L->base; #endif @@ -493,7 +424,6 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ lua_assert(L->top - L->base == delta); } -#endif if ((g->hookmask & LUA_MASKCALL)) { int i; for (i = 0; i < missing; i++) /* Add missing parameters. */ @@ -503,23 +433,18 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) while (missing-- > 0 && tvisnil(L->top - 1)) L->top--; } -#if LJ_HASJIT out: -#endif op = bc_op(pc[-1]); /* Get FUNC* op. */ -#if LJ_HASJIT /* Use the non-hotcounting variants if JIT is off or while recording. */ if ((!(J->flags & JIT_F_ON) || J->state != LJ_TRACE_IDLE) && (op == BC_FUNCF || op == BC_FUNCV)) op = (BCOp)((int)op+(int)BC_IFUNCF-(int)BC_FUNCF); -#endif ERRNO_RESTORE return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ } -#if LJ_HASJIT /* Stitch a new trace. */ -void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc) +void lj_dispatch_stitch(jit_State *J, const BCIns *pc) { ERRNO_SAVE lua_State *L = J->L; @@ -532,26 +457,5 @@ void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc) setcframe_pc(cf, oldpc); ERRNO_RESTORE } -#endif -#if LJ_HASPROFILE -/* Profile dispatch. */ -void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc) -{ - ERRNO_SAVE - GCfunc *fn = curr_func(L); - GCproto *pt = funcproto(fn); - void *cf = cframe_raw(L->cframe); - const BCIns *oldpc = cframe_pc(cf); - global_State *g; - setcframe_pc(cf, pc); - L->top = L->base + cur_topslot(pt, pc, cframe_multres_n(cf)); - lj_profile_interpreter(L); - setcframe_pc(cf, oldpc); - g = G(L); - setgcref(g->cur_L, obj2gco(L)); - setvmstate(g, INTERP); - ERRNO_RESTORE -} -#endif diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 5bda51a213..1e48a1b172 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h @@ -8,63 +8,8 @@ #include "lj_obj.h" #include "lj_bc.h" -#if LJ_HASJIT #include "lj_jit.h" -#endif - -#if LJ_TARGET_MIPS -/* Need our own global offset table for the dreaded MIPS calling conventions. */ - -#ifndef _LJ_VM_H -LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b); -#endif - -#if LJ_SOFTFP -#ifndef _LJ_IRCALL_H -extern double __adddf3(double a, double b); -extern double __subdf3(double a, double b); -extern double __muldf3(double a, double b); -extern double __divdf3(double a, double b); -#endif -#define SFGOTDEF(_) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3) -#else -#define SFGOTDEF(_) -#endif -#if LJ_HASJIT -#define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) -#else -#define JITGOTDEF(_) -#endif -#if LJ_HASFFI -#define FFIGOTDEF(_) \ - _(lj_meta_equal_cd) _(lj_ccallback_enter) _(lj_ccallback_leave) -#else -#define FFIGOTDEF(_) -#endif -#define GOTDEF(_) \ - _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ - _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ - _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \ - _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \ - _(lj_dispatch_profile) _(lj_err_throw) \ - _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ - _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ - _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ - _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \ - _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_number) \ - _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \ - _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ - _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \ - _(lj_buf_putstr_upper) _(lj_buf_tostr) \ - JITGOTDEF(_) FFIGOTDEF(_) SFGOTDEF(_) -enum { -#define GOTENUM(name) LJ_GOT_##name, -GOTDEF(GOTENUM) -#undef GOTENUM - LJ_GOT__MAX -}; -#endif /* Type of hot counter. Must match the code in the assembler VM. */ /* 16 bits are sufficient. Only 0.0015% overhead with maximum slot penalty. */ @@ -89,13 +34,8 @@ typedef uint16_t HotCount; typedef struct GG_State { lua_State L; /* Main thread. */ global_State g; /* Global state. */ -#if LJ_TARGET_MIPS - ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ -#endif -#if LJ_HASJIT jit_State J; /* JIT state. */ HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */ -#endif ASMFunction dispatch[GG_LEN_DISP]; /* Instruction dispatch tables. */ BCIns bcff[GG_NUM_ASMFF]; /* Bytecode for ASM fast functions. */ } GG_State; @@ -121,33 +61,19 @@ typedef struct GG_State { /* Dispatch table management. */ LJ_FUNC void lj_dispatch_init(GG_State *GG); -#if LJ_HASJIT LJ_FUNC void lj_dispatch_init_hotcount(global_State *g); -#endif LJ_FUNC void lj_dispatch_update(global_State *g); /* Instruction dispatch callback for hooks or when recording. */ -LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); -LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); -#if LJ_HASJIT -LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc); -#endif -#if LJ_HASPROFILE -LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc); -#endif +LJ_FUNCA void lj_dispatch_ins(lua_State *L, const BCIns *pc); +LJ_FUNCA ASMFunction lj_dispatch_call(lua_State *L, const BCIns*pc); +LJ_FUNCA void lj_dispatch_stitch(jit_State *J, const BCIns *pc); -#if LJ_HASFFI && !defined(_BUILDVM_H) +#if !defined(_BUILDVM_H) /* Save/restore errno and GetLastError() around hooks, exits and recording. */ #include -#if LJ_TARGET_WINDOWS -#define WIN32_LEAN_AND_MEAN -#include -#define ERRNO_SAVE int olderr = errno; DWORD oldwerr = GetLastError(); -#define ERRNO_RESTORE errno = olderr; SetLastError(oldwerr); -#else #define ERRNO_SAVE int olderr = errno; #define ERRNO_RESTORE errno = olderr; -#endif #else #define ERRNO_SAVE #define ERRNO_RESTORE diff --git a/src/lj_dwarf.c b/src/lj_dwarf.c new file mode 100644 index 0000000000..c98b13780d --- /dev/null +++ b/src/lj_dwarf.c @@ -0,0 +1,27 @@ +/* +** Compilation unit for DWARF debug information. +*/ + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_debug.h" +#include "lj_str.h" +#include "lj_frame.h" +#include "lj_state.h" +#include "lj_bc.h" +#include "lj_ir.h" +#include "lj_ircall.h" +#include "lj_jit.h" +#include "lj_iropt.h" +#include "lj_mcode.h" +#include "lj_trace.h" +#include "lj_snap.h" +#include "lj_gdbjit.h" +#include "lj_record.h" +#include "lj_asm.h" +#include "lj_dispatch.h" +#include "lj_vm.h" +#include "lj_target.h" +#include "lj_ff.h" + diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h deleted file mode 100644 index dee8bdccd1..0000000000 --- a/src/lj_emit_arm.h +++ /dev/null @@ -1,357 +0,0 @@ -/* -** ARM instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Constant encoding --------------------------------------------------- */ - -static uint8_t emit_invai[16] = { - /* AND */ (ARMI_AND^ARMI_BIC) >> 21, - /* EOR */ 0, - /* SUB */ (ARMI_SUB^ARMI_ADD) >> 21, - /* RSB */ 0, - /* ADD */ (ARMI_ADD^ARMI_SUB) >> 21, - /* ADC */ (ARMI_ADC^ARMI_SBC) >> 21, - /* SBC */ (ARMI_SBC^ARMI_ADC) >> 21, - /* RSC */ 0, - /* TST */ 0, - /* TEQ */ 0, - /* CMP */ (ARMI_CMP^ARMI_CMN) >> 21, - /* CMN */ (ARMI_CMN^ARMI_CMP) >> 21, - /* ORR */ 0, - /* MOV */ (ARMI_MOV^ARMI_MVN) >> 21, - /* BIC */ (ARMI_BIC^ARMI_AND) >> 21, - /* MVN */ (ARMI_MVN^ARMI_MOV) >> 21 -}; - -/* Encode constant in K12 format for data processing instructions. */ -static uint32_t emit_isk12(ARMIns ai, int32_t n) -{ - uint32_t invai, i, m = (uint32_t)n; - /* K12: unsigned 8 bit value, rotated in steps of two bits. */ - for (i = 0; i < 4096; i += 256, m = lj_rol(m, 2)) - if (m <= 255) return ARMI_K12|m|i; - /* Otherwise try negation/complement with the inverse instruction. */ - invai = emit_invai[((ai >> 21) & 15)]; - if (!invai) return 0; /* Failed. No inverse instruction. */ - m = ~(uint32_t)n; - if (invai == ((ARMI_SUB^ARMI_ADD) >> 21) || - invai == (ARMI_CMP^ARMI_CMN) >> 21) m++; - for (i = 0; i < 4096; i += 256, m = lj_rol(m, 2)) - if (m <= 255) return ARMI_K12|(invai<<21)|m|i; - return 0; /* Failed. */ -} - -/* -- Emit basic instructions --------------------------------------------- */ - -static void emit_dnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm) -{ - *--as->mcp = ai | ARMF_D(rd) | ARMF_N(rn) | ARMF_M(rm); -} - -static void emit_dm(ASMState *as, ARMIns ai, Reg rd, Reg rm) -{ - *--as->mcp = ai | ARMF_D(rd) | ARMF_M(rm); -} - -static void emit_dn(ASMState *as, ARMIns ai, Reg rd, Reg rn) -{ - *--as->mcp = ai | ARMF_D(rd) | ARMF_N(rn); -} - -static void emit_nm(ASMState *as, ARMIns ai, Reg rn, Reg rm) -{ - *--as->mcp = ai | ARMF_N(rn) | ARMF_M(rm); -} - -static void emit_d(ASMState *as, ARMIns ai, Reg rd) -{ - *--as->mcp = ai | ARMF_D(rd); -} - -static void emit_n(ASMState *as, ARMIns ai, Reg rn) -{ - *--as->mcp = ai | ARMF_N(rn); -} - -static void emit_m(ASMState *as, ARMIns ai, Reg rm) -{ - *--as->mcp = ai | ARMF_M(rm); -} - -static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) -{ - lua_assert(ofs >= -255 && ofs <= 255); - if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; - *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) | - ((ofs & 0xf0) << 4) | (ofs & 0x0f); -} - -static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) -{ - lua_assert(ofs >= -4095 && ofs <= 4095); - /* Combine LDR/STR pairs to LDRD/STRD. */ - if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) && - (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn && - (uint32_t)ofs <= 252 && !(ofs & 3) && !((rd ^ (ofs >>2)) & 1) && - as->mcp != as->mcloop) { - as->mcp++; - emit_lsox(as, ai == ARMI_LDR ? ARMI_LDRD : ARMI_STRD, rd&~1, rn, ofs&~4); - return; - } - if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; - *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd) | ARMF_N(rn) | ofs; -} - -#if !LJ_SOFTFP -static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) -{ - lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0); - if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; - *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2); -} -#endif - -/* -- Emit loads/stores --------------------------------------------------- */ - -/* Prefer spills of BASE/L. */ -#define emit_canremat(ref) ((ref) < ASMREF_L) - -/* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg d, int32_t i) -{ - RegSet work = ~as->freeset & RSET_GPR; - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != d); - if (emit_canremat(ref)) { - int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); - uint32_t k = emit_isk12(ARMI_ADD, delta); - if (k) { - if (k == ARMI_K12) - emit_dm(as, ARMI_MOV, d, r); - else - emit_dn(as, ARMI_ADD^k, d, r); - return 1; - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -/* Try to find a two step delta relative to another constant. */ -static int emit_kdelta2(ASMState *as, Reg d, int32_t i) -{ - RegSet work = ~as->freeset & RSET_GPR; - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != d); - if (emit_canremat(ref)) { - int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i; - if (other) { - int32_t delta = i - other; - uint32_t sh, inv = 0, k2, k; - if (delta < 0) { delta = -delta; inv = ARMI_ADD^ARMI_SUB; } - sh = lj_ffs(delta) & ~1; - k2 = emit_isk12(0, delta & (255 << sh)); - k = emit_isk12(0, delta & ~(255 << sh)); - if (k) { - emit_dn(as, ARMI_ADD^k2^inv, d, d); - emit_dn(as, ARMI_ADD^k^inv, d, r); - return 1; - } - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -/* Load a 32 bit constant into a GPR. */ -static void emit_loadi(ASMState *as, Reg r, int32_t i) -{ - uint32_t k = emit_isk12(ARMI_MOV, i); - lua_assert(rset_test(as->freeset, r) || r == RID_TMP); - if (k) { - /* Standard K12 constant. */ - emit_d(as, ARMI_MOV^k, r); - } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { - /* 16 bit loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); - } else if (emit_kdelta1(as, r, i)) { - /* One step delta relative to another constant. */ - } else if ((as->flags & JIT_F_ARMV6T2)) { - /* 32 bit hiword/loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), r); - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); - } else if (emit_kdelta2(as, r, i)) { - /* Two step delta relative to another constant. */ - } else { - /* Otherwise construct the constant with up to 4 instructions. */ - /* NYI: use mvn+bic, use pc-relative loads. */ - for (;;) { - uint32_t sh = lj_ffs(i) & ~1; - int32_t m = i & (255 << sh); - i &= ~(255 << sh); - if (i == 0) { - emit_d(as, ARMI_MOV ^ emit_isk12(0, m), r); - break; - } - emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), r, r); - } - } -} - -#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) - -static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); - -/* Get/set from constant pointer. */ -static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) -{ - int32_t i = i32ptr(p); - emit_lso(as, ai, r, ra_allock(as, (i & ~4095), rset_exclude(RSET_GPR, r)), - (i & 4095)); -} - -#if !LJ_SOFTFP -/* Load a number constant into an FPR. */ -static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) -{ - cTValue *tv = ir_knum(ir); - int32_t i; - if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { - uint32_t hi = tv->u32.hi; - uint32_t b = ((hi >> 22) & 0x1ff); - if (!(hi & 0xffff) && (b == 0x100 || b == 0x0ff)) { - *--as->mcp = ARMI_VMOVI_D | ARMF_D(r & 15) | - ((tv->u32.hi >> 12) & 0x00080000) | - ((tv->u32.hi >> 4) & 0x00070000) | - ((tv->u32.hi >> 16) & 0x0000000f); - return; - } - } - i = i32ptr(tv); - emit_vlso(as, ARMI_VLDR_D, r, - ra_allock(as, (i & ~1020), RSET_GPR), (i & 1020)); -} -#endif - -/* Get/set global_State fields. */ -#define emit_getgl(as, r, field) \ - emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field) -#define emit_setgl(as, r, field) \ - emit_lsptr(as, ARMI_STR, (r), (void *)&J2G(as->J)->field) - -/* Trace number is determined from pc of exit instruction. */ -#define emit_setvmstate(as, i) UNUSED(i) - -/* -- Emit control-flow instructions -------------------------------------- */ - -/* Label for internal jumps. */ -typedef MCode *MCLabel; - -/* Return label pointing to current PC. */ -#define emit_label(as) ((as)->mcp) - -static void emit_branch(ASMState *as, ARMIns ai, MCode *target) -{ - MCode *p = as->mcp; - ptrdiff_t delta = (target - p) - 1; - lua_assert(((delta + 0x00800000) >> 24) == 0); - *--p = ai | ((uint32_t)delta & 0x00ffffffu); - as->mcp = p; -} - -#define emit_jmp(as, target) emit_branch(as, ARMI_B, (target)) - -static void emit_call(ASMState *as, void *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = ((char *)target - (char *)p) - 8; - if ((((delta>>2) + 0x00800000) >> 24) == 0) { - if ((delta & 1)) - *p = ARMI_BLX | ((uint32_t)(delta>>2) & 0x00ffffffu) | ((delta&2) << 23); - else - *p = ARMI_BL | ((uint32_t)(delta>>2) & 0x00ffffffu); - } else { /* Target out of range: need indirect call. But don't use R0-R3. */ - Reg r = ra_allock(as, i32ptr(target), RSET_RANGE(RID_R4, RID_R12+1)); - *p = ARMI_BLXr | ARMF_M(r); - } -} - -/* -- Emit generic operations --------------------------------------------- */ - -/* Generic move between two regs. */ -static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) -{ -#if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); -#else - if (dst >= RID_MAX_GPR) { - emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, - (dst & 15), (src & 15)); - return; - } -#endif - if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ - MCode ins = *as->mcp, swp = (src^dst); - if ((ins & 0x0c000000) == 0x04000000 && (ins & 0x02000010) != 0x02000010) { - if (!((ins ^ (dst << 16)) & 0x000f0000)) - *as->mcp = ins ^ (swp << 16); /* Swap N in load/store. */ - if (!(ins & 0x00100000) && !((ins ^ (dst << 12)) & 0x0000f000)) - *as->mcp = ins ^ (swp << 12); /* Swap D in store. */ - } - } - emit_dm(as, ARMI_MOV, dst, src); -} - -/* Generic load of register with base and (small) offset address. */ -static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ -#if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); -#else - if (r >= RID_MAX_GPR) - emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs); - else -#endif - emit_lso(as, ARMI_LDR, r, base, ofs); -} - -/* Generic store of register with base and (small) offset address. */ -static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ -#if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); -#else - if (r >= RID_MAX_GPR) - emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs); - else -#endif - emit_lso(as, ARMI_STR, r, base, ofs); -} - -/* Emit an arithmetic/logic operation with a constant operand. */ -static void emit_opk(ASMState *as, ARMIns ai, Reg dest, Reg src, - int32_t i, RegSet allow) -{ - uint32_t k = emit_isk12(ai, i); - if (k) - emit_dn(as, ai^k, dest, src); - else - emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); -} - -/* Add offset to pointer. */ -static void emit_addptr(ASMState *as, Reg r, int32_t ofs) -{ - if (ofs) - emit_opk(as, ARMI_ADD, r, r, ofs, rset_exclude(RSET_GPR, r)); -} - -#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) - diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h deleted file mode 100644 index cfa18c83c2..0000000000 --- a/src/lj_emit_arm64.h +++ /dev/null @@ -1,419 +0,0 @@ -/* -** ARM64 instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -** -** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -** Sponsored by Cisco Systems, Inc. -*/ - -/* -- Constant encoding --------------------------------------------------- */ - -static uint64_t get_k64val(IRIns *ir) -{ - if (ir->o == IR_KINT64) { - return ir_kint64(ir)->u64; - } else if (ir->o == IR_KGC) { - return (uint64_t)ir_kgc(ir); - } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { - return (uint64_t)ir_kptr(ir); - } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); - return ir->i; /* Sign-extended. */ - } -} - -/* Encode constant in K12 format for data processing instructions. */ -static uint32_t emit_isk12(int64_t n) -{ - uint64_t k = (n < 0) ? -n : n; - uint32_t m = (n < 0) ? 0x40000000 : 0; - if (k < 0x1000) { - return A64I_K12|m|A64F_U12(k); - } else if ((k & 0xfff000) == k) { - return A64I_K12|m|0x400000|A64F_U12(k>>12); - } - return 0; -} - -#define emit_clz64(n) __builtin_clzll(n) -#define emit_ctz64(n) __builtin_ctzll(n) - -/* Encode constant in K13 format for logical data processing instructions. */ -static uint32_t emit_isk13(uint64_t n, int is64) -{ - int inv = 0, w = 128, lz, tz; - if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */ - if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */ - do { /* Find the repeat width. */ - if (is64 && (uint32_t)(n^(n>>32))) break; - n = (uint32_t)n; - if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */ - w = 32; if ((n^(n>>16)) & 0xffff) break; - n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break; - n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break; - n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break; - n = n & 0x3; w = 2; - } while (0); - lz = emit_clz64(n); - tz = emit_ctz64(n); - if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */ - if (inv) - return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10); - else - return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10); -} - -static uint32_t emit_isfpk64(uint64_t n) -{ - uint64_t etop9 = ((n >> 54) & 0x1ff); - if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) { - return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80)); - } - return ~0u; -} - -/* -- Emit basic instructions --------------------------------------------- */ - -static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra) -{ - *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra); -} - -static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) -{ - *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); -} - -static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm) -{ - *--as->mcp = ai | A64F_D(rd) | A64F_M(rm); -} - -static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn) -{ - *--as->mcp = ai | A64F_D(rd) | A64F_N(rn); -} - -static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm) -{ - *--as->mcp = ai | A64F_N(rn) | A64F_M(rm); -} - -static void emit_d(ASMState *as, A64Ins ai, Reg rd) -{ - *--as->mcp = ai | A64F_D(rd); -} - -static void emit_n(ASMState *as, A64Ins ai, Reg rn) -{ - *--as->mcp = ai | A64F_N(rn); -} - -static int emit_checkofs(A64Ins ai, int64_t ofs) -{ - int scale = (ai >> 30) & 3; - if (ofs < 0 || (ofs & ((1<= -256 && ofs <= 255) ? -1 : 0; - } else { - return (ofs < (4096<> 30) & 3; - lua_assert(ot); - /* Combine LDR/STR pairs to LDP/STP. */ - if ((sc == 2 || sc == 3) && - (!(ai & 0x400000) || rd != rn) && - as->mcp != as->mcloop) { - uint32_t prev = *as->mcp & ~A64F_D(31); - int ofsm = ofs - (1<>sc)) || - prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) { - aip = (A64F_A(rd) | A64F_D(*as->mcp & 31)); - } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) || - prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) { - aip = (A64F_D(rd) | A64F_A(*as->mcp & 31)); - ofsm = ofs; - } else { - goto nopair; - } - if (ofsm >= (-64<mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | - (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); - return; - } - } -nopair: - if (ot == 1) - *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc); - else - *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff); -} - -/* -- Emit loads/stores --------------------------------------------------- */ - -/* Prefer rematerialization of BASE/L from global_State over spills. */ -#define emit_canremat(ref) ((ref) <= ASMREF_L) - -/* Try to find an N-step delta relative to other consts with N < lim. */ -static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) -{ - RegSet work = ~as->freeset & RSET_GPR; - if (lim <= 1) return 0; /* Can't beat that. */ - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != rd); - if (ref < REF_TRUE) { - uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : - get_k64val(IR(ref)); - int64_t delta = (int64_t)(k - kx); - if (delta == 0) { - emit_dm(as, A64I_MOVx, rd, r); - return 1; - } else { - uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta); - if (k12) { - emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); - return 1; - } - /* Do other ops or multi-step deltas pay off? Probably not. - ** E.g. XOR rarely helps with pointer consts. - */ - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) -{ - uint32_t k13 = emit_isk13(u64, is64); - if (k13) { /* Can the constant be represented as a bitmask immediate? */ - emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); - } else { - int i, zeros = 0, ones = 0, neg; - if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ - /* Count homogeneous 16 bit fragments. */ - for (i = 0; i < 4; i++) { - uint64_t frag = (u64 >> i*16) & 0xffff; - zeros += (frag == 0); - ones += (frag == 0xffff); - } - neg = ones > zeros; /* Use MOVN if it pays off. */ - if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { - int shift = 0, lshift = 0; - uint64_t n64 = neg ? ~u64 : u64; - if (n64 != 0) { - /* Find first/last fragment to be filled. */ - shift = (63-emit_clz64(n64)) & ~15; - lshift = emit_ctz64(n64) & ~15; - } - /* MOVK requires the original value (u64). */ - while (shift > lshift) { - uint32_t u16 = (u64 >> shift) & 0xffff; - /* Skip fragments that are correctly filled by MOVN/MOVZ. */ - if (u16 != (neg ? 0xffff : 0)) - emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); - shift -= 16; - } - /* But MOVN needs an inverted value (n64). */ - emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | - A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); - } - } -} - -/* Load a 32 bit constant into a GPR. */ -#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) - -/* Load a 64 bit constant into a GPR. */ -#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) - -#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr)) - -#define glofs(as, k) \ - ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) -#define mcpofs(as, k) \ - ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) -#define checkmcpofs(as, k) \ - ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0) - -static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); - -/* Get/set from constant pointer. */ -static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) -{ - /* First, check if ip + offset is in range. */ - if ((ai & 0x00400000) && checkmcpofs(as, p)) { - emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); - } else { - Reg base = RID_GL; /* Next, try GL + offset. */ - int64_t ofs = glofs(as, p); - if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */ - int64_t i64 = i64ptr(p); - base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); - ofs = i64 & 0x7fffull; - } - emit_lso(as, ai, r, base, ofs); - } -} - -/* Load 64 bit IR constant into register. */ -static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) -{ - const uint64_t *k = &ir_k64(ir)->u64; - int64_t ofs; - if (r >= RID_MAX_GPR) { - uint32_t fpk = emit_isfpk64(*k); - if (fpk != ~0u) { - emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31)); - return; - } - } - ofs = glofs(as, k); - if (emit_checkofs(A64I_LDRx, ofs)) { - emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, - (r & 31), RID_GL, ofs); - } else { - if (r >= RID_MAX_GPR) { - emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); - r = RID_TMP; - } - if (checkmcpofs(as, k)) - emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r); - else - emit_loadu64(as, r, *k); - } -} - -/* Get/set global_State fields. */ -#define emit_getgl(as, r, field) \ - emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field) -#define emit_setgl(as, r, field) \ - emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field) - -/* Trace number is determined from pc of exit instruction. */ -#define emit_setvmstate(as, i) UNUSED(i) - -/* -- Emit control-flow instructions -------------------------------------- */ - -/* Label for internal jumps. */ -typedef MCode *MCLabel; - -/* Return label pointing to current PC. */ -#define emit_label(as) ((as)->mcp) - -static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; - lua_assert(((delta + 0x40000) >> 19) == 0); - *p = A64I_BCC | A64F_S19(delta) | cond; -} - -static void emit_branch(ASMState *as, A64Ins ai, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; - lua_assert(((delta + 0x02000000) >> 26) == 0); - *p = ai | ((uint32_t)delta & 0x03ffffffu); -} - -static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; - lua_assert(bit < 63 && ((delta + 0x2000) >> 14) == 0); - if (bit > 31) ai |= A64I_X; - *p = ai | A64F_BIT(bit & 31) | A64F_S14((uint32_t)delta & 0x3fffu) | r; -} - -static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = target - p; - lua_assert(((delta + 0x40000) >> 19) == 0); - *p = ai | A64F_S19(delta) | r; -} - -#define emit_jmp(as, target) emit_branch(as, A64I_B, (target)) - -static void emit_call(ASMState *as, void *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; - if ((((delta>>2) + 0x02000000) >> 26) == 0) { - *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu); - } else { /* Target out of range: need indirect call. But don't use R0-R7. */ - Reg r = ra_allock(as, i64ptr(target), - RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); - *p = A64I_BLR | A64F_N(r); - } -} - -/* -- Emit generic operations --------------------------------------------- */ - -/* Generic move between two regs. */ -static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) -{ - if (dst >= RID_MAX_GPR) { - emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S, - (dst & 31), (src & 31)); - return; - } - if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ - MCode ins = *as->mcp, swp = (src^dst); - if ((ins & 0xbf800000) == 0xb9000000) { - if (!((ins ^ (dst << 5)) & 0x000003e0)) - *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */ - if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f)) - *as->mcp = ins ^ swp; /* Swap D in store. */ - } - } - emit_dm(as, A64I_MOVx, dst, src); -} - -/* Generic load of register with base and (small) offset address. */ -static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r >= RID_MAX_GPR) - emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs); - else - emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs); -} - -/* Generic store of register with base and (small) offset address. */ -static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r >= RID_MAX_GPR) - emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs); - else - emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs); -} - -/* Emit an arithmetic operation with a constant operand. */ -static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src, - int32_t i, RegSet allow) -{ - uint32_t k = emit_isk12(i); - if (k) - emit_dn(as, ai^k, dest, src); - else - emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); -} - -/* Add offset to pointer. */ -static void emit_addptr(ASMState *as, Reg r, int32_t ofs) -{ - if (ofs) - emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r, - ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r)); -} - -#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) - diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h deleted file mode 100644 index 8a9ee24dce..0000000000 --- a/src/lj_emit_mips.h +++ /dev/null @@ -1,293 +0,0 @@ -/* -** MIPS instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#if LJ_64 -static intptr_t get_k64val(IRIns *ir) -{ - if (ir->o == IR_KINT64) { - return (intptr_t)ir_kint64(ir)->u64; - } else if (ir->o == IR_KGC) { - return (intptr_t)ir_kgc(ir); - } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { - return (intptr_t)ir_kptr(ir); - } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); - return ir->i; /* Sign-extended. */ - } -} -#endif - -#if LJ_64 -#define get_kval(ir) get_k64val(ir) -#else -#define get_kval(ir) ((ir)->i) -#endif - -/* -- Emit basic instructions --------------------------------------------- */ - -static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt) -{ - *--as->mcp = mi | MIPSF_D(rd) | MIPSF_S(rs) | MIPSF_T(rt); -} - -static void emit_dta(ASMState *as, MIPSIns mi, Reg rd, Reg rt, uint32_t a) -{ - *--as->mcp = mi | MIPSF_D(rd) | MIPSF_T(rt) | MIPSF_A(a); -} - -#define emit_ds(as, mi, rd, rs) emit_dst(as, (mi), (rd), (rs), 0) -#define emit_tg(as, mi, rt, rg) emit_dst(as, (mi), (rg)&31, 0, (rt)) - -static void emit_tsi(ASMState *as, MIPSIns mi, Reg rt, Reg rs, int32_t i) -{ - *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | (i & 0xffff); -} - -#define emit_ti(as, mi, rt, i) emit_tsi(as, (mi), (rt), 0, (i)) -#define emit_hsi(as, mi, rh, rs, i) emit_tsi(as, (mi), (rh) & 31, (rs), (i)) - -static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh) -{ - *--as->mcp = mi | MIPSF_F(rf&31) | MIPSF_G(rg&31) | MIPSF_H(rh&31); -} - -#define emit_fg(as, mi, rf, rg) emit_fgh(as, (mi), (rf), (rg), 0) - -static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) -{ - if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { - emit_dta(as, MIPSI_ROTR, dest, src, shift); - } else { - emit_dst(as, MIPSI_OR, dest, dest, tmp); - emit_dta(as, MIPSI_SLL, dest, src, (-shift)&31); - emit_dta(as, MIPSI_SRL, tmp, src, shift); - } -} - -#if LJ_64 -static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, - uint32_t lsb) -{ - *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | MIPSF_M(msb) | MIPSF_L(lsb); -} -#endif - -/* -- Emit loads/stores --------------------------------------------------- */ - -/* Prefer rematerialization of BASE/L from global_State over spills. */ -#define emit_canremat(ref) ((ref) <= REF_BASE) - -/* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg t, intptr_t i) -{ - RegSet work = ~as->freeset & RSET_GPR; - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != t); - if (ref < ASMREF_L) { - intptr_t delta = (intptr_t)((uintptr_t)i - - (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(IR(ref)))); - if (checki16(delta)) { - emit_tsi(as, MIPSI_AADDIU, t, r, delta); - return 1; - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -/* Load a 32 bit constant into a GPR. */ -static void emit_loadi(ASMState *as, Reg r, int32_t i) -{ - if (checki16(i)) { - emit_ti(as, MIPSI_LI, r, i); - } else { - if ((i & 0xffff)) { - intptr_t jgl = (intptr_t)(void *)J2G(as->J); - if ((uintptr_t)(i-jgl) < 65536) { - emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); - return; - } else if (emit_kdelta1(as, r, i)) { - return; - } else if ((i >> 16) == 0) { - emit_tsi(as, MIPSI_ORI, r, RID_ZERO, i); - return; - } - emit_tsi(as, MIPSI_ORI, r, r, i); - } - emit_ti(as, MIPSI_LUI, r, (i >> 16)); - } -} - -#if LJ_64 -/* Load a 64 bit constant into a GPR. */ -static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) -{ - if (checki32((int64_t)u64)) { - emit_loadi(as, r, (int32_t)u64); - } else { - uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J); - if (delta < 65536) { - emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768)); - } else if (emit_kdelta1(as, r, (intptr_t)u64)) { - return; - } else { - if ((u64 & 0xffff)) { - emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff); - } - if (((u64 >> 16) & 0xffff)) { - emit_dta(as, MIPSI_DSLL, r, r, 16); - emit_tsi(as, MIPSI_ORI, r, r, (u64 >> 16) & 0xffff); - emit_dta(as, MIPSI_DSLL, r, r, 16); - } else { - emit_dta(as, MIPSI_DSLL32, r, r, 0); - } - emit_loadi(as, r, (int32_t)(u64 >> 32)); - } - /* TODO: There are probably more optimization opportunities. */ - } -} - -#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) -#else -#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) -#endif - -static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); -static void ra_allockreg(ASMState *as, intptr_t k, Reg r); - -/* Get/set from constant pointer. */ -static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) -{ - intptr_t jgl = (intptr_t)(J2G(as->J)); - intptr_t i = (intptr_t)(p); - Reg base; - if ((uint32_t)(i-jgl) < 65536) { - i = i-jgl-32768; - base = RID_JGL; - } else { - base = ra_allock(as, i-(int16_t)i, allow); - } - emit_tsi(as, mi, r, base, i); -} - -#if LJ_64 -static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) -{ - const uint64_t *k = &ir_k64(ir)->u64; - Reg r64 = r; - if (rset_test(RSET_FPR, r)) { - r64 = RID_TMP; - emit_tg(as, MIPSI_DMTC1, r64, r); - } - if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536) - emit_lsptr(as, MIPSI_LD, r64, (void *)k, 0); - else - emit_loadu64(as, r64, *k); -} -#else -#define emit_loadk64(as, r, ir) \ - emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) -#endif - -/* Get/set global_State fields. */ -static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) -{ - emit_tsi(as, mi, r, RID_JGL, ofs-32768); -} - -#define emit_getgl(as, r, field) \ - emit_lsglptr(as, MIPSI_AL, (r), (int32_t)offsetof(global_State, field)) -#define emit_setgl(as, r, field) \ - emit_lsglptr(as, MIPSI_AS, (r), (int32_t)offsetof(global_State, field)) - -/* Trace number is determined from per-trace exit stubs. */ -#define emit_setvmstate(as, i) UNUSED(i) - -/* -- Emit control-flow instructions -------------------------------------- */ - -/* Label for internal jumps. */ -typedef MCode *MCLabel; - -/* Return label pointing to current PC. */ -#define emit_label(as) ((as)->mcp) - -static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt, MCode *target) -{ - MCode *p = as->mcp; - ptrdiff_t delta = target - p; - lua_assert(((delta + 0x8000) >> 16) == 0); - *--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu); - as->mcp = p; -} - -static void emit_jmp(ASMState *as, MCode *target) -{ - *--as->mcp = MIPSI_NOP; - emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target)); -} - -static void emit_call(ASMState *as, void *target, int needcfa) -{ - MCode *p = as->mcp; - *--p = MIPSI_NOP; - if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) { - *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) | - (((uintptr_t)target >>2) & 0x03ffffffu); - } else { /* Target out of range: need indirect call. */ - *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); - needcfa = 1; - } - as->mcp = p; - if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); -} - -/* -- Emit generic operations --------------------------------------------- */ - -#define emit_move(as, dst, src) \ - emit_ds(as, MIPSI_MOVE, (dst), (src)) - -/* Generic move between two regs. */ -static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) -{ - if (dst < RID_MAX_GPR) - emit_move(as, dst, src); - else - emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src); -} - -/* Generic load of register with base and (small) offset address. */ -static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r < RID_MAX_GPR) - emit_tsi(as, irt_is64(ir->t) ? MIPSI_LD : MIPSI_LW, r, base, ofs); - else - emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, - (r & 31), base, ofs); -} - -/* Generic store of register with base and (small) offset address. */ -static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r < RID_MAX_GPR) - emit_tsi(as, irt_is64(ir->t) ? MIPSI_SD : MIPSI_SW, r, base, ofs); - else - emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, - (r&31), base, ofs); -} - -/* Add offset to pointer. */ -static void emit_addptr(ASMState *as, Reg r, int32_t ofs) -{ - if (ofs) { - lua_assert(checki16(ofs)); - emit_tsi(as, MIPSI_AADDIU, r, r, ofs); - } -} - -#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) - diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h deleted file mode 100644 index 21c3c2ace7..0000000000 --- a/src/lj_emit_ppc.h +++ /dev/null @@ -1,238 +0,0 @@ -/* -** PPC instruction emitter. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -- Emit basic instructions --------------------------------------------- */ - -static void emit_tab(ASMState *as, PPCIns pi, Reg rt, Reg ra, Reg rb) -{ - *--as->mcp = pi | PPCF_T(rt) | PPCF_A(ra) | PPCF_B(rb); -} - -#define emit_asb(as, pi, ra, rs, rb) emit_tab(as, (pi), (rs), (ra), (rb)) -#define emit_as(as, pi, ra, rs) emit_tab(as, (pi), (rs), (ra), 0) -#define emit_ab(as, pi, ra, rb) emit_tab(as, (pi), 0, (ra), (rb)) - -static void emit_tai(ASMState *as, PPCIns pi, Reg rt, Reg ra, int32_t i) -{ - *--as->mcp = pi | PPCF_T(rt) | PPCF_A(ra) | (i & 0xffff); -} - -#define emit_ti(as, pi, rt, i) emit_tai(as, (pi), (rt), 0, (i)) -#define emit_ai(as, pi, ra, i) emit_tai(as, (pi), 0, (ra), (i)) -#define emit_asi(as, pi, ra, rs, i) emit_tai(as, (pi), (rs), (ra), (i)) - -#define emit_fab(as, pi, rf, ra, rb) \ - emit_tab(as, (pi), (rf)&31, (ra)&31, (rb)&31) -#define emit_fb(as, pi, rf, rb) emit_tab(as, (pi), (rf)&31, 0, (rb)&31) -#define emit_fac(as, pi, rf, ra, rc) \ - emit_tab(as, (pi) | PPCF_C((rc) & 31), (rf)&31, (ra)&31, 0) -#define emit_facb(as, pi, rf, ra, rc, rb) \ - emit_tab(as, (pi) | PPCF_C((rc) & 31), (rf)&31, (ra)&31, (rb)&31) -#define emit_fai(as, pi, rf, ra, i) emit_tai(as, (pi), (rf)&31, (ra), (i)) - -static void emit_rot(ASMState *as, PPCIns pi, Reg ra, Reg rs, - int32_t n, int32_t b, int32_t e) -{ - *--as->mcp = pi | PPCF_T(rs) | PPCF_A(ra) | PPCF_B(n) | - PPCF_MB(b) | PPCF_ME(e); -} - -static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n) -{ - lua_assert(n >= 0 && n < 32); - emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n); -} - -static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) -{ - lua_assert(n >= 0 && n < 32); - emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31); -} - -/* -- Emit loads/stores --------------------------------------------------- */ - -/* Prefer rematerialization of BASE/L from global_State over spills. */ -#define emit_canremat(ref) ((ref) <= REF_BASE) - -/* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg t, int32_t i) -{ - RegSet work = ~as->freeset & RSET_GPR; - while (work) { - Reg r = rset_picktop(work); - IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != t); - if (ref < ASMREF_L) { - int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); - if (checki16(delta)) { - emit_tai(as, PPCI_ADDI, t, r, delta); - return 1; - } - } - rset_clear(work, r); - } - return 0; /* Failed. */ -} - -/* Load a 32 bit constant into a GPR. */ -static void emit_loadi(ASMState *as, Reg r, int32_t i) -{ - if (checki16(i)) { - emit_ti(as, PPCI_LI, r, i); - } else { - if ((i & 0xffff)) { - int32_t jgl = i32ptr(J2G(as->J)); - if ((uint32_t)(i-jgl) < 65536) { - emit_tai(as, PPCI_ADDI, r, RID_JGL, i-jgl-32768); - return; - } else if (emit_kdelta1(as, r, i)) { - return; - } - emit_asi(as, PPCI_ORI, r, r, i); - } - emit_ti(as, PPCI_LIS, r, (i >> 16)); - } -} - -#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) - -static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); - -/* Get/set from constant pointer. */ -static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) -{ - int32_t jgl = i32ptr(J2G(as->J)); - int32_t i = i32ptr(p); - Reg base; - if ((uint32_t)(i-jgl) < 65536) { - i = i-jgl-32768; - base = RID_JGL; - } else { - base = ra_allock(as, i-(int16_t)i, allow); - } - emit_tai(as, pi, r, base, i); -} - -#define emit_loadk64(as, r, ir) \ - emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) - -/* Get/set global_State fields. */ -static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) -{ - emit_tai(as, pi, r, RID_JGL, ofs-32768); -} - -#define emit_getgl(as, r, field) \ - emit_lsglptr(as, PPCI_LWZ, (r), (int32_t)offsetof(global_State, field)) -#define emit_setgl(as, r, field) \ - emit_lsglptr(as, PPCI_STW, (r), (int32_t)offsetof(global_State, field)) - -/* Trace number is determined from per-trace exit stubs. */ -#define emit_setvmstate(as, i) UNUSED(i) - -/* -- Emit control-flow instructions -------------------------------------- */ - -/* Label for internal jumps. */ -typedef MCode *MCLabel; - -/* Return label pointing to current PC. */ -#define emit_label(as) ((as)->mcp) - -static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; - lua_assert(((delta + 0x8000) >> 16) == 0); - pi ^= (delta & 0x8000) * (PPCF_Y/0x8000); - *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); -} - -static void emit_jmp(ASMState *as, MCode *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; - *p = PPCI_B | (delta & 0x03fffffcu); -} - -static void emit_call(ASMState *as, void *target) -{ - MCode *p = --as->mcp; - ptrdiff_t delta = (char *)target - (char *)p; - if ((((delta>>2) + 0x00800000) >> 24) == 0) { - *p = PPCI_BL | (delta & 0x03fffffcu); - } else { /* Target out of range: need indirect call. Don't use arg reg. */ - RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); - Reg r = ra_allock(as, i32ptr(target), allow); - *p = PPCI_BCTRL; - p[-1] = PPCI_MTCTR | PPCF_T(r); - as->mcp = p-1; - } -} - -/* -- Emit generic operations --------------------------------------------- */ - -#define emit_mr(as, dst, src) \ - emit_asb(as, PPCI_MR, (dst), (src), (src)) - -/* Generic move between two regs. */ -static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) -{ - UNUSED(ir); - if (dst < RID_MAX_GPR) - emit_mr(as, dst, src); - else - emit_fb(as, PPCI_FMR, dst, src); -} - -/* Generic load of register with base and (small) offset address. */ -static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r < RID_MAX_GPR) - emit_tai(as, PPCI_LWZ, r, base, ofs); - else - emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs); -} - -/* Generic store of register with base and (small) offset address. */ -static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) -{ - if (r < RID_MAX_GPR) - emit_tai(as, PPCI_STW, r, base, ofs); - else - emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs); -} - -/* Emit a compare (for equality) with a constant operand. */ -static void emit_cmpi(ASMState *as, Reg r, int32_t k) -{ - if (checki16(k)) { - emit_ai(as, PPCI_CMPWI, r, k); - } else if (checku16(k)) { - emit_ai(as, PPCI_CMPLWI, r, k); - } else { - emit_ai(as, PPCI_CMPLWI, RID_TMP, k); - emit_asi(as, PPCI_XORIS, RID_TMP, r, (k >> 16)); - } -} - -/* Add offset to pointer. */ -static void emit_addptr(ASMState *as, Reg r, int32_t ofs) -{ - if (ofs) { - emit_tai(as, PPCI_ADDI, r, r, ofs); - if (!checki16(ofs)) - emit_tai(as, PPCI_ADDIS, r, r, (ofs + 32768) >> 16); - } -} - -static void emit_spsub(ASMState *as, int32_t ofs) -{ - if (ofs) { - emit_tai(as, PPCI_STWU, RID_TMP, RID_SP, -ofs); - emit_tai(as, PPCI_ADDI, RID_TMP, RID_SP, - CFRAME_SIZE + (as->parent ? as->parent->spadjust : 0)); - } -} - diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index a6b8713e00..34a141d8c7 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -7,24 +7,13 @@ #define MODRM(mode, r1, r2) ((MCode)((mode)+(((r1)&7)<<3)+((r2)&7))) -#if LJ_64 #define REXRB(p, rr, rb) \ { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \ if (rex != 0x40) *--(p) = rex; } #define FORCE_REX 0x200 #define REX_64 (FORCE_REX|0x080000) #define VEX_64 0x800000 -#else -#define REXRB(p, rr, rb) ((void)0) -#define FORCE_REX 0 -#define REX_64 0 -#define VEX_64 0 -#endif -#if LJ_GC64 #define REX_GC64 REX_64 -#else -#define REX_GC64 0 -#endif #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) #define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4) @@ -39,9 +28,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, { int n = (int8_t)xo; if (n == -60) { /* VEX-encoded instruction */ -#if LJ_64 xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13; -#endif *(uint32_t *)(p+delta-5) = (uint32_t)xo; return p+delta-5; } @@ -54,7 +41,6 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, #endif *(uint32_t *)(p+delta-5) = (uint32_t)xo; p += n + delta; -#if LJ_64 { uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1); if (rex != 0x40) { @@ -64,9 +50,6 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, *--p = (MCode)rex; } } -#else - UNUSED(rr); UNUSED(rb); UNUSED(rx); -#endif return p; } @@ -88,7 +71,7 @@ static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2) as->mcp = emit_opm(xo, XM_REG, r1, r2, p, 0); } -#if LJ_64 && defined(LUA_USE_ASSERT) +#if defined(LUA_USE_ASSERT) /* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */ static int32_t ptr2addr(const void *p) { @@ -105,7 +88,7 @@ static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) MCode *p = as->mcp; x86Mode mode; if (ra_hasreg(rb)) { - if (LJ_GC64 && rb == RID_RIP) { + if (rb == RID_RIP) { mode = XM_OFS0; p -= 4; *(int32_t *)p = ofs; @@ -123,14 +106,9 @@ static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP); } else { *(int32_t *)(p-4) = ofs; -#if LJ_64 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); p -= 5; rb = RID_ESP; -#else - p -= 4; - rb = RID_EBP; -#endif mode = XM_OFS0; } as->mcp = emit_opm(xo, mode, rr, rb, p, 0); @@ -203,11 +181,9 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) *(int32_t *)p = as->mrm.ofs; if (as->mrm.idx != RID_NONE) goto mrmidx; -#if LJ_64 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); rb = RID_ESP; -#endif - } else if (LJ_GC64 && rb == RID_RIP) { + } else if (rb == RID_RIP) { lua_assert(as->mrm.idx == RID_NONE); mode = XM_OFS0; p -= 4; @@ -275,9 +251,7 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) static void emit_loadi(ASMState *as, Reg r, int32_t i) { /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP. */ - if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP || - (as->curins+1 < as->T->nins && - IR(as->curins+1)->o == IR_HIOP)))) { + if (i == 0) { emit_rr(as, XO_ARITH(XOg_XOR), r, r); } else { MCode *p = as->mcp; @@ -289,7 +263,6 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) } } -#if LJ_GC64 #define dispofs(as, k) \ ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch)) #define mcpofs(as, k) \ @@ -299,13 +272,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) /* mov r, addr */ #define emit_loada(as, r, addr) \ emit_loadu64(as, (r), (uintptr_t)(addr)) -#else -/* mov r, addr */ -#define emit_loada(as, r, addr) \ - emit_loadi(as, (r), ptr2addr((addr))) -#endif -#if LJ_64 /* mov r, imm64 or shorter 32 bit extended load. */ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) { @@ -315,7 +282,6 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) MCode *p = as->mcp; *(int32_t *)(p-4) = (int32_t)u64; as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); -#if LJ_GC64 } else if (checki32(dispofs(as, u64))) { emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64)); } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) { @@ -323,7 +289,6 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) ** RIP-relative addressing reachability for both as->mcp and as->mctop. */ emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64)); -#endif } else { /* Full-size 64 bit load. */ MCode *p = as->mcp; *(uint64_t *)(p-8) = u64; @@ -333,30 +298,41 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) as->mcp = p; } } -#endif /* op r, [addr] */ static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) { -#if LJ_GC64 if (checki32(dispofs(as, addr))) { emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); - } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) { - emit_rmro(as, xo, rr, rr, 0); - emit_loadu64(as, rr, (uintptr_t)addr); + } else if (!checki32((intptr_t)addr)) { + Reg ra = (rr & 15); + if (xo != XO_MOV) { + /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */ + uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch; + uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0; + ra = RID_DISPATCH; + if (checku32(dispaddr)) { + emit_loadi(as, ra, (int32_t)dispaddr); + } else { /* Full-size 64 bit load. */ + MCode *p = as->mcp; + *(uint64_t *)(p-8) = dispaddr; + p[-9] = (MCode)(XI_MOVri+(ra&7)); + p[-10] = 0x48 + ((ra>>3)&1); + p -= 10; + as->mcp = p; + } + if (xo == XO_GROUP3b) emit_i8(as, i8); + } + emit_rmro(as, xo, rr, ra, 0); + emit_loadu64(as, ra, (uintptr_t)addr); } else -#endif { MCode *p = as->mcp; *(int32_t *)(p-4) = ptr2addr(addr); -#if LJ_64 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); -#else - as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); -#endif } } @@ -375,7 +351,6 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) } if (*k == 0) { emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r); -#if LJ_GC64 } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) || (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) { emit_rma(as, xo, r64, k); @@ -392,12 +367,9 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) ir->i = (int32_t)(as->mctop - as->mcbot); as->mcbot += 8; as->mclim = as->mcbot + MCLIM_REDZONE; + lj_mcode_commitbot(as->J, as->mcbot); } emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i)); -#else - } else { - emit_rma(as, xo, r64, k); -#endif } } @@ -406,18 +378,6 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) /* Label for short jumps. */ typedef MCode *MCLabel; -#if LJ_32 && LJ_HASFFI -/* jmp short target */ -static void emit_sjmp(ASMState *as, MCLabel target) -{ - MCode *p = as->mcp; - ptrdiff_t delta = target - p; - lua_assert(delta == (int8_t)delta); - p[-1] = (MCode)(int8_t)delta; - p[-2] = XI_JMPs; - as->mcp = p - 2; -} -#endif /* jcc short target */ static void emit_sjcc(ASMState *as, int cc, MCLabel target) @@ -480,14 +440,12 @@ static void emit_jmp(ASMState *as, MCode *target) static void emit_call_(ASMState *as, MCode *target) { MCode *p = as->mcp; -#if LJ_64 if (target-p != (int32_t)(target-p)) { /* Assumes RID_RET is never an argument to calls and always clobbered. */ emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET); emit_loadu64(as, RID_RET, (uint64_t)target); return; } -#endif *(int32_t *)(p-4) = jmprel(p, target); p[-5] = XI_CALL; as->mcp = p - 5; @@ -498,13 +456,8 @@ static void emit_call_(ASMState *as, MCode *target) /* -- Emit generic operations --------------------------------------------- */ /* Use 64 bit operations to handle 64 bit IR types. */ -#if LJ_64 #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) #define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0)) -#else -#define REX_64IR(ir, r) (r) -#define VEX_64IR(ir, r) (r) -#endif /* Generic move between two regs. */ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) diff --git a/src/lj_err.c b/src/lj_err.c index 049294ea6f..5965e8ecd0 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -18,55 +18,6 @@ #include "lj_vm.h" #include "lj_strfmt.h" -/* -** LuaJIT can either use internal or external frame unwinding: -** -** - Internal frame unwinding (INT) is free-standing and doesn't require -** any OS or library support. -** -** - External frame unwinding (EXT) uses the system-provided unwind handler. -** -** Pros and Cons: -** -** - EXT requires unwind tables for *all* functions on the C stack between -** the pcall/catch and the error/throw. This is the default on x64, -** but needs to be manually enabled on x86/PPC for non-C++ code. -** -** - INT is faster when actually throwing errors (but this happens rarely). -** Setting up error handlers is zero-cost in any case. -** -** - EXT provides full interoperability with C++ exceptions. You can throw -** Lua errors or C++ exceptions through a mix of Lua frames and C++ frames. -** C++ destructors are called as needed. C++ exceptions caught by pcall -** are converted to the string "C++ exception". Lua errors can be caught -** with catch (...) in C++. -** -** - INT has only limited support for automatically catching C++ exceptions -** on POSIX systems using DWARF2 stack unwinding. Other systems may use -** the wrapper function feature. Lua errors thrown through C++ frames -** cannot be caught by C++ code and C++ destructors are not run. -** -** EXT is the default on x64 systems and on Windows, INT is the default on all -** other systems. -** -** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack -** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled -** with -funwind-tables (or -fexceptions). This includes LuaJIT itself (set -** TARGET_CFLAGS), all of your C/Lua binding code, all loadable C modules -** and all C libraries that have callbacks which may be used to call back -** into Lua. C++ code must *not* be compiled with -fno-exceptions. -** -** EXT is mandatory on WIN64 since the calling convention has an abundance -** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15). -** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4). -*/ - -#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND -#define LJ_UNWIND_EXT 1 -#elif LJ_TARGET_WINDOWS -#define LJ_UNWIND_EXT 1 -#endif - /* -- Error messages ------------------------------------------------------ */ /* Error message strings. */ @@ -115,23 +66,10 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) break; case FRAME_C: /* C frame. */ unwind_c: -#if LJ_UNWIND_EXT - if (errcode) { - L->base = frame_prevd(frame) + 1; - L->cframe = cframe_prev(cf); - unwindstack(L, frame - LJ_FR2); - } else if (cf != stopcf) { - cf = cframe_prev(cf); - frame = frame_prevd(frame); - break; - } - return NULL; /* Continue unwinding. */ -#else UNUSED(stopcf); cf = cframe_prev(cf); frame = frame_prevd(frame); break; -#endif case FRAME_CP: /* Protected C frame. */ if (cframe_canyield(cf)) { /* Resume? */ if (errcode) { @@ -181,349 +119,15 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) return L; /* Anything non-NULL will do. */ } -/* -- External frame unwinding -------------------------------------------- */ - -#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN - -/* -** We have to use our own definitions instead of the mandatory (!) unwind.h, -** since various OS, distros and compilers mess up the header installation. -*/ - -typedef struct _Unwind_Context _Unwind_Context; - -#define _URC_OK 0 -#define _URC_FATAL_PHASE1_ERROR 3 -#define _URC_HANDLER_FOUND 6 -#define _URC_INSTALL_CONTEXT 7 -#define _URC_CONTINUE_UNWIND 8 -#define _URC_FAILURE 9 - -#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */ -#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c)) -#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff) -#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff)) - -#if !LJ_TARGET_ARM - -typedef struct _Unwind_Exception -{ - uint64_t exclass; - void (*excleanup)(int, struct _Unwind_Exception *); - uintptr_t p1, p2; -} __attribute__((__aligned__)) _Unwind_Exception; - -extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); -extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t); -extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t); -extern void _Unwind_DeleteException(_Unwind_Exception *); -extern int _Unwind_RaiseException(_Unwind_Exception *); - -#define _UA_SEARCH_PHASE 1 -#define _UA_CLEANUP_PHASE 2 -#define _UA_HANDLER_FRAME 4 -#define _UA_FORCE_UNWIND 8 - -/* DWARF2 personality handler referenced from interpreter .eh_frame. */ -LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, - uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) -{ - void *cf; - lua_State *L; - if (version != 1) - return _URC_FATAL_PHASE1_ERROR; - UNUSED(uexclass); - cf = (void *)_Unwind_GetCFA(ctx); - L = cframe_L(cf); - if ((actions & _UA_SEARCH_PHASE)) { -#if LJ_UNWIND_EXT - if (err_unwind(L, cf, 0) == NULL) - return _URC_CONTINUE_UNWIND; -#endif - if (!LJ_UEXCLASS_CHECK(uexclass)) { - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); - } - return _URC_HANDLER_FOUND; - } - if ((actions & _UA_CLEANUP_PHASE)) { - int errcode; - if (LJ_UEXCLASS_CHECK(uexclass)) { - errcode = LJ_UEXCLASS_ERRCODE(uexclass); - } else { - if ((actions & _UA_HANDLER_FRAME)) - _Unwind_DeleteException(uex); - errcode = LUA_ERRRUN; - } -#if LJ_UNWIND_EXT - cf = err_unwind(L, cf, errcode); - if ((actions & _UA_FORCE_UNWIND)) { - return _URC_CONTINUE_UNWIND; - } else if (cf) { - _Unwind_SetGR(ctx, LJ_TARGET_EHRETREG, errcode); - _Unwind_SetIP(ctx, (uintptr_t)(cframe_unwind_ff(cf) ? - lj_vm_unwind_ff_eh : - lj_vm_unwind_c_eh)); - return _URC_INSTALL_CONTEXT; - } -#if LJ_TARGET_X86ORX64 - else if ((actions & _UA_HANDLER_FRAME)) { - /* Workaround for ancient libgcc bug. Still present in RHEL 5.5. :-/ - ** Real fix: http://gcc.gnu.org/viewcvs/trunk/gcc/unwind-dw2.c?r1=121165&r2=124837&pathrev=153877&diff_format=h - */ - _Unwind_SetGR(ctx, LJ_TARGET_EHRETREG, errcode); - _Unwind_SetIP(ctx, (uintptr_t)lj_vm_unwind_rethrow); - return _URC_INSTALL_CONTEXT; - } -#endif -#else - /* This is not the proper way to escape from the unwinder. We get away with - ** it on non-x64 because the interpreter restores all callee-saved regs. - */ - lj_err_throw(L, errcode); -#endif - } - return _URC_CONTINUE_UNWIND; -} - -#if LJ_UNWIND_EXT -#if LJ_TARGET_OSX || defined(__OpenBSD__) -/* Sorry, no thread safety for OSX. Complain to Apple, not me. */ -static _Unwind_Exception static_uex; -#else -static __thread _Unwind_Exception static_uex; -#endif - -/* Raise DWARF2 exception. */ -static void err_raise_ext(int errcode) -{ - static_uex.exclass = LJ_UEXCLASS_MAKE(errcode); - static_uex.excleanup = NULL; - _Unwind_RaiseException(&static_uex); -} -#endif - -#else /* LJ_TARGET_ARM */ - -#define _US_VIRTUAL_UNWIND_FRAME 0 -#define _US_UNWIND_FRAME_STARTING 1 -#define _US_ACTION_MASK 3 -#define _US_FORCE_UNWIND 8 - -typedef struct _Unwind_Control_Block _Unwind_Control_Block; - -struct _Unwind_Control_Block { - uint64_t exclass; - uint32_t misc[20]; -}; - -extern int _Unwind_RaiseException(_Unwind_Control_Block *); -extern int __gnu_unwind_frame(_Unwind_Control_Block *, _Unwind_Context *); -extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *); -extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *); - -static inline uint32_t _Unwind_GetGR(_Unwind_Context *ctx, int r) -{ - uint32_t v; - _Unwind_VRS_Get(ctx, 0, r, 0, &v); - return v; -} - -static inline void _Unwind_SetGR(_Unwind_Context *ctx, int r, uint32_t v) -{ - _Unwind_VRS_Set(ctx, 0, r, 0, &v); -} - -extern void lj_vm_unwind_ext(void); - -/* ARM unwinder personality handler referenced from interpreter .ARM.extab. */ -LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb, - _Unwind_Context *ctx) -{ - void *cf = (void *)_Unwind_GetGR(ctx, 13); - lua_State *L = cframe_L(cf); - int errcode; - - switch ((state & _US_ACTION_MASK)) { - case _US_VIRTUAL_UNWIND_FRAME: - if ((state & _US_FORCE_UNWIND)) break; - return _URC_HANDLER_FOUND; - case _US_UNWIND_FRAME_STARTING: - if (LJ_UEXCLASS_CHECK(ucb->exclass)) { - errcode = LJ_UEXCLASS_ERRCODE(ucb->exclass); - } else { - errcode = LUA_ERRRUN; - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); - } - cf = err_unwind(L, cf, errcode); - if ((state & _US_FORCE_UNWIND) || cf == NULL) break; - _Unwind_SetGR(ctx, 15, (uint32_t)lj_vm_unwind_ext); - _Unwind_SetGR(ctx, 0, (uint32_t)ucb); - _Unwind_SetGR(ctx, 1, (uint32_t)errcode); - _Unwind_SetGR(ctx, 2, cframe_unwind_ff(cf) ? - (uint32_t)lj_vm_unwind_ff_eh : - (uint32_t)lj_vm_unwind_c_eh); - return _URC_INSTALL_CONTEXT; - default: - return _URC_FAILURE; - } - if (__gnu_unwind_frame(ucb, ctx) != _URC_OK) - return _URC_FAILURE; - return _URC_CONTINUE_UNWIND; -} - -#if LJ_UNWIND_EXT -static __thread _Unwind_Control_Block static_uex; - -static void err_raise_ext(int errcode) -{ - memset(&static_uex, 0, sizeof(static_uex)); - static_uex.exclass = LJ_UEXCLASS_MAKE(errcode); - _Unwind_RaiseException(&static_uex); -} -#endif - -#endif /* LJ_TARGET_ARM */ - -#elif LJ_ABI_WIN - -/* -** Someone in Redmond owes me several days of my life. A lot of this is -** undocumented or just plain wrong on MSDN. Some of it can be gathered -** from 3rd party docs or must be found by trial-and-error. They really -** don't want you to write your own language-specific exception handler -** or to interact gracefully with MSVC. :-( -** -** Apparently MSVC doesn't call C++ destructors for foreign exceptions -** unless you compile your C++ code with /EHa. Unfortunately this means -** catch (...) also catches things like access violations. The use of -** _set_se_translator doesn't really help, because it requires /EHa, too. -*/ - -#define WIN32_LEAN_AND_MEAN -#include - -#if LJ_TARGET_X64 -/* Taken from: http://www.nynaeve.net/?p=99 */ -typedef struct UndocumentedDispatcherContext { - ULONG64 ControlPc; - ULONG64 ImageBase; - PRUNTIME_FUNCTION FunctionEntry; - ULONG64 EstablisherFrame; - ULONG64 TargetIp; - PCONTEXT ContextRecord; - void (*LanguageHandler)(void); - PVOID HandlerData; - PUNWIND_HISTORY_TABLE HistoryTable; - ULONG ScopeIndex; - ULONG Fill0; -} UndocumentedDispatcherContext; -#else -typedef void *UndocumentedDispatcherContext; -#endif - -/* Another wild guess. */ -extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); - -#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT) -/* Workaround for broken MinGW64 declaration. */ -VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); -#define RtlUnwindEx RtlUnwindEx_FIXED -#endif - -#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363) -#define LJ_GCC_EXCODE ((DWORD)0x20474343) - -#define LJ_EXCODE ((DWORD)0xe24c4a00) -#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c)) -#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) -#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) - -/* Windows exception handler for interpreter frame. */ -LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec, - void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) -{ -#if LJ_TARGET_X64 - void *cf = f; -#else - void *cf = (char *)f - CFRAME_OFS_SEH; -#endif - lua_State *L = cframe_L(cf); - int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? - LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; - if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */ - /* Unwind internal frames. */ - err_unwind(L, cf, errcode); - } else { - void *cf2 = err_unwind(L, cf, 0); - if (cf2) { /* We catch it, so start unwinding the upper frames. */ - if (rec->ExceptionCode == LJ_MSVC_EXCODE || - rec->ExceptionCode == LJ_GCC_EXCODE) { -#if LJ_TARGET_WINDOWS - __DestructExceptionObject(rec, 1); -#endif - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); - } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { - /* Don't catch access violations etc. */ - return 1; /* ExceptionContinueSearch */ - } -#if LJ_TARGET_X64 - /* Unwind the stack and call all handlers for all lower C frames - ** (including ourselves) again with EH_UNWINDING set. Then set - ** rsp = cf, rax = errcode and jump to the specified target. - */ - RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? - lj_vm_unwind_ff_eh : - lj_vm_unwind_c_eh), - rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); - /* RtlUnwindEx should never return. */ -#else - UNUSED(ctx); - UNUSED(dispatch); - /* Call all handlers for all lower C frames (including ourselves) again - ** with EH_UNWINDING set. Then call the specified function, passing cf - ** and errcode. - */ - lj_vm_rtlunwind(cf, (void *)rec, - (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? - (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode); - /* lj_vm_rtlunwind does not return. */ -#endif - } - } - return 1; /* ExceptionContinueSearch */ -} - -/* Raise Windows exception. */ -static void err_raise_ext(int errcode) -{ - RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL); -} - -#endif - /* -- Error handling ------------------------------------------------------ */ /* Throw error. Find catch frame, unwind stack and continue. */ -LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode) +LJ_NOINLINE void lj_err_throw(lua_State *L, int errcode) { global_State *g = G(L); lj_trace_abort(g); setmref(g->jit_base, NULL); - L->status = 0; -#if LJ_UNWIND_EXT - err_raise_ext(errcode); - /* - ** A return from this function signals a corrupt C stack that cannot be - ** unwound. We have no choice but to call the panic function and exit. - ** - ** Usually this is caused by a C function without unwind information. - ** This should never happen on x64, but may happen if you've manually - ** enabled LUAJIT_UNWIND_EXTERNAL and forgot to recompile *every* - ** non-C++ file with -funwind-tables. - */ - if (G(L)->panic) - G(L)->panic(L); -#else + L->status = LUA_OK; { void *cf = err_unwind(L, NULL, errcode); if (cframe_unwind_ff(cf)) @@ -531,7 +135,6 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode) else lj_vm_unwind_c(cframe_raw(cf), errcode); } -#endif exit(EXIT_FAILURE); } @@ -712,7 +315,6 @@ LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg) frame = NULL; } else { pframe = frame_prevd(frame); -#if LJ_HASFFI /* Remove frame for FFI metamethods. */ if (frame_func(frame)->c.ffid >= FF_ffi_meta___index && frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) { @@ -720,7 +322,6 @@ LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg) L->top = frame; setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame)); } -#endif } } lj_debug_addloc(L, msg, pframe, frame); diff --git a/src/lj_err.h b/src/lj_err.h index cba5fb7149..e3d347bf4e 100644 --- a/src/lj_err.h +++ b/src/lj_err.h @@ -21,7 +21,7 @@ LJ_DATA const char *lj_err_allmsg; #define err2msg(em) (lj_err_allmsg+(int)(em)) LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em); -LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode); +LJ_FUNCA_NORET void lj_err_throw(lua_State *L, int errcode); LJ_FUNC_NORET void lj_err_mem(lua_State *L); LJ_FUNC_NORET void lj_err_run(lua_State *L); LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em); diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index 060a9f8975..e1c9c7a40c 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h @@ -57,9 +57,6 @@ ERRDEF(NOENV, "no calling environment") ERRDEF(CYIELD, "attempt to yield across C-call boundary") ERRDEF(BADLU, "bad light userdata pointer") ERRDEF(NOGCMM, "bad action while in __gc metamethod") -#if LJ_TARGET_WINDOWS -ERRDEF(BADFPU, "bad FPU precision (use D3DCREATE_FPU_PRESERVE with DirectX)") -#endif /* Standard library function errors. */ ERRDEF(ASSERT, "assertion failed!") @@ -99,19 +96,10 @@ ERRDEF(STRCAPU, "unfinished capture") ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format")) ERRDEF(STRGSRV, "invalid replacement value (a %s)") ERRDEF(BADMODN, "name conflict for module " LUA_QS) -#if LJ_HASJIT ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") -#if LJ_TARGET_X86ORX64 ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2") -#else -ERRDEF(NOJIT, "JIT compiler disabled") -#endif -#elif defined(LJ_ARCH_NOJIT) -ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") -#else -ERRDEF(NOJIT, "JIT compiler permanently disabled by build option") -#endif ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS) +ERRDEF(AUDITLOG,"auditlog could not be opened (already open?)") /* Lexer/parser errors. */ ERRDEF(XMODE, "attempt to load chunk with wrong mode") @@ -150,7 +138,6 @@ ERRDEF(XGSCOPE, " jumps into the scope of local " LUA_QS) ERRDEF(BCFMT, "cannot load incompatible bytecode") ERRDEF(BCBAD, "cannot load malformed bytecode") -#if LJ_HASFFI /* FFI errors. */ ERRDEF(FFI_INVTYPE, "invalid C type") ERRDEF(FFI_INVSIZE, "size of C type is unknown or too large") @@ -174,14 +161,9 @@ ERRDEF(FFI_BADMM, LUA_QS " has no " LUA_QS " metamethod") ERRDEF(FFI_WRCONST, "attempt to write to constant location") ERRDEF(FFI_NODECL, "missing declaration for symbol " LUA_QS) ERRDEF(FFI_BADCBACK, "bad callback") -#if LJ_OS_NOJIT -ERRDEF(FFI_CBACKOV, "no support for callbacks on this OS") -#else ERRDEF(FFI_CBACKOV, "too many callbacks") -#endif ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields") ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)") -#endif #undef ERRDEF diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index dfdee2dbfe..819b5b4c46 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_err.h" #include "lj_str.h" @@ -61,14 +60,14 @@ */ /* Type of handler to record a fast function. */ -typedef void (LJ_FASTCALL *RecordFunc)(jit_State *J, RecordFFData *rd); +typedef void (*RecordFunc)(jit_State *J, RecordFFData *rd); /* Get runtime value of int argument. */ static int32_t argv2int(jit_State *J, TValue *o) { if (!lj_strscan_numberobj(o)) lj_trace_err(J, LJ_TRERR_BADTYPE); - return tvisint(o) ? intV(o) : lj_num2int(numV(o)); + return lj_num2int(numV(o)); } /* Get runtime value of string argument. */ @@ -118,13 +117,9 @@ static void recff_stitch(jit_State *J) /* Ditto for the IR. */ memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot); -#if LJ_FR2 J->base[2] = TREF_FRAME; J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT; -#else - J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; -#endif J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J))); J->base += 2 + LJ_FR2; J->baseslot += 2 + LJ_FR2; @@ -140,7 +135,7 @@ static void recff_stitch(jit_State *J) } /* Fallback handler for fast functions that are not recorded (yet). */ -static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) +static void recff_nyi(jit_State *J, RecordFFData *rd) { if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) { lj_trace_err_info(J, LJ_TRERR_TRACEUV); @@ -184,27 +179,25 @@ static TRef recff_bufhdr(jit_State *J) /* -- Base library fast functions ----------------------------------------- */ -static void LJ_FASTCALL recff_assert(jit_State *J, RecordFFData *rd) +static void recff_assert(jit_State *J, RecordFFData *rd) { /* Arguments already specialized. The interpreter throws for nil/false. */ rd->nres = J->maxslot; /* Pass through all arguments. */ } -static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd) +static void recff_type(jit_State *J, RecordFFData *rd) { /* Arguments already specialized. Result is a constant string. Neat, huh? */ uint32_t t; if (tvisnumber(&rd->argv[0])) t = ~LJ_TNUMX; - else if (LJ_64 && !LJ_GC64 && tvislightud(&rd->argv[0])) - t = ~LJ_TLIGHTUD; else t = ~itype(&rd->argv[0]); J->base[0] = lj_ir_kstr(J, strV(&J->fn->c.upvalue[t])); UNUSED(rd); } -static void LJ_FASTCALL recff_getmetatable(jit_State *J, RecordFFData *rd) +static void recff_getmetatable(jit_State *J, RecordFFData *rd) { TRef tr = J->base[0]; if (tr) { @@ -218,7 +211,7 @@ static void LJ_FASTCALL recff_getmetatable(jit_State *J, RecordFFData *rd) } /* else: Interpreter will throw. */ } -static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd) +static void recff_setmetatable(jit_State *J, RecordFFData *rd) { TRef tr = J->base[0]; TRef mt = J->base[1]; @@ -238,7 +231,7 @@ static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd) } /* else: Interpreter will throw. */ } -static void LJ_FASTCALL recff_rawget(jit_State *J, RecordFFData *rd) +static void recff_rawget(jit_State *J, RecordFFData *rd) { RecordIndex ix; ix.tab = J->base[0]; ix.key = J->base[1]; @@ -250,7 +243,7 @@ static void LJ_FASTCALL recff_rawget(jit_State *J, RecordFFData *rd) } /* else: Interpreter will throw. */ } -static void LJ_FASTCALL recff_rawset(jit_State *J, RecordFFData *rd) +static void recff_rawset(jit_State *J, RecordFFData *rd) { RecordIndex ix; ix.tab = J->base[0]; ix.key = J->base[1]; ix.val = J->base[2]; @@ -264,7 +257,7 @@ static void LJ_FASTCALL recff_rawset(jit_State *J, RecordFFData *rd) } /* else: Interpreter will throw. */ } -static void LJ_FASTCALL recff_rawequal(jit_State *J, RecordFFData *rd) +static void recff_rawequal(jit_State *J, RecordFFData *rd) { TRef tra = J->base[0]; TRef trb = J->base[1]; @@ -275,7 +268,7 @@ static void LJ_FASTCALL recff_rawequal(jit_State *J, RecordFFData *rd) } #if LJ_52 -static void LJ_FASTCALL recff_rawlen(jit_State *J, RecordFFData *rd) +static void recff_rawlen(jit_State *J, RecordFFData *rd) { TRef tr = J->base[0]; if (tref_isstr(tr)) @@ -306,7 +299,7 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv) } } -static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd) +static void recff_select(jit_State *J, RecordFFData *rd) { TRef tr = J->base[0]; if (tr) { @@ -330,7 +323,7 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd) } /* else: Interpreter will throw. */ } -static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd) +static void recff_tonumber(jit_State *J, RecordFFData *rd) { TRef tr = J->base[0]; TRef base = J->base[1]; @@ -350,11 +343,9 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd) } tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); } -#if LJ_HASFFI } else if (tref_iscdata(tr)) { lj_crecord_tonumber(J, rd); return; -#endif } else { tr = TREF_NIL; } @@ -396,7 +387,7 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm) return 0; } -static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd) +static void recff_tostring(jit_State *J, RecordFFData *rd) { TRef tr = J->base[0]; if (tref_isstr(tr)) { @@ -415,7 +406,7 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd) } } -static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd) +static void recff_ipairs_aux(jit_State *J, RecordFFData *rd) { RecordIndex ix; ix.tab = J->base[0]; @@ -432,10 +423,10 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd) } /* else: Interpreter will throw. */ } -static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd) +static void recff_xpairs(jit_State *J, RecordFFData *rd) { TRef tr = J->base[0]; - if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) && + if (!((LJ_52 || tref_iscdata(tr)) && recff_metacall(J, rd, MM_pairs + rd->data))) { if (tref_istab(tr)) { J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0])); @@ -446,13 +437,11 @@ static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd) } } -static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) +static void recff_pcall(jit_State *J, RecordFFData *rd) { if (J->maxslot >= 1) { -#if LJ_FR2 /* Shift function arguments up. */ memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot); -#endif lj_record_call(J, 0, J->maxslot - 1); rd->nres = -1; /* Pending call. */ } /* else: Interpreter will throw. */ @@ -466,7 +455,7 @@ static TValue *recff_xpcall_cp(lua_State *L, lua_CFunction dummy, void *ud) return NULL; } -static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) +static void recff_xpcall(jit_State *J, RecordFFData *rd) { if (J->maxslot >= 2) { TValue argv0, argv1; @@ -478,10 +467,8 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) copyTV(J->L, &argv1, &rd->argv[1]); copyTV(J->L, &rd->argv[0], &argv1); copyTV(J->L, &rd->argv[1], &argv0); -#if LJ_FR2 /* Shift function arguments up. */ memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1)); -#endif /* Need to protect lj_record_call because it may throw. */ errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); /* Always undo Lua stack swap to avoid confusing the interpreter. */ @@ -493,7 +480,7 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) } /* else: Interpreter will throw. */ } -static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd) +static void recff_getfenv(jit_State *J, RecordFFData *rd) { TRef tr = J->base[0]; /* Only support getfenv(0) for now. */ @@ -507,7 +494,7 @@ static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd) /* -- Math library fast functions ----------------------------------------- */ -static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) +static void recff_math_abs(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_ksimd(J, LJ_KSIMD_ABS)); @@ -515,29 +502,24 @@ static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) } /* Record rounding functions math.floor and math.ceil. */ -static void LJ_FASTCALL recff_math_round(jit_State *J, RecordFFData *rd) +static void recff_math_round(jit_State *J, RecordFFData *rd) { TRef tr = J->base[0]; if (!tref_isinteger(tr)) { /* Pass through integers unmodified. */ tr = emitir(IRTN(IR_FPMATH), lj_ir_tonum(J, tr), rd->data); /* Result is integral (or NaN/Inf), but may not fit an int32_t. */ - if (LJ_DUALNUM) { /* Try to narrow using a guarded conversion to int. */ - lua_Number n = lj_vm_foldfpm(numberVnum(&rd->argv[0]), rd->data); - if (n == (lua_Number)lj_num2int(n)) - tr = emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_CHECK); - } J->base[0] = tr; } } /* Record unary math.* functions, mapped to IR_FPMATH opcode. */ -static void LJ_FASTCALL recff_math_unary(jit_State *J, RecordFFData *rd) +static void recff_math_unary(jit_State *J, RecordFFData *rd) { J->base[0] = emitir(IRTN(IR_FPMATH), lj_ir_tonum(J, J->base[0]), rd->data); } /* Record math.log. */ -static void LJ_FASTCALL recff_math_log(jit_State *J, RecordFFData *rd) +static void recff_math_log(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); if (J->base[1]) { @@ -559,7 +541,7 @@ static void LJ_FASTCALL recff_math_log(jit_State *J, RecordFFData *rd) } /* Record math.atan2. */ -static void LJ_FASTCALL recff_math_atan2(jit_State *J, RecordFFData *rd) +static void recff_math_atan2(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); TRef tr2 = lj_ir_tonum(J, J->base[1]); @@ -568,20 +550,16 @@ static void LJ_FASTCALL recff_math_atan2(jit_State *J, RecordFFData *rd) } /* Record math.ldexp. */ -static void LJ_FASTCALL recff_math_ldexp(jit_State *J, RecordFFData *rd) +static void recff_math_ldexp(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); -#if LJ_TARGET_X86ORX64 TRef tr2 = lj_ir_tonum(J, J->base[1]); -#else - TRef tr2 = lj_opt_narrow_toint(J, J->base[1]); -#endif J->base[0] = emitir(IRTN(IR_LDEXP), tr, tr2); UNUSED(rd); } /* Record math.asin, math.acos, math.atan. */ -static void LJ_FASTCALL recff_math_atrig(jit_State *J, RecordFFData *rd) +static void recff_math_atrig(jit_State *J, RecordFFData *rd) { TRef y = lj_ir_tonum(J, J->base[0]); TRef x = lj_ir_knum_one(J); @@ -595,13 +573,13 @@ static void LJ_FASTCALL recff_math_atrig(jit_State *J, RecordFFData *rd) J->base[0] = emitir(IRTN(IR_ATAN2), y, x); } -static void LJ_FASTCALL recff_math_htrig(jit_State *J, RecordFFData *rd) +static void recff_math_htrig(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); J->base[0] = emitir(IRTN(IR_CALLN), tr, rd->data); } -static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd) +static void recff_math_modf(jit_State *J, RecordFFData *rd) { TRef tr = J->base[0]; if (tref_isinteger(tr)) { @@ -617,14 +595,14 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd) rd->nres = 2; } -static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) +static void recff_math_pow(jit_State *J, RecordFFData *rd) { J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1], &rd->argv[0], &rd->argv[1]); UNUSED(rd); } -static void LJ_FASTCALL recff_math_minmax(jit_State *J, RecordFFData *rd) +static void recff_math_minmax(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonumber(J, J->base[0]); uint32_t op = rd->data; @@ -642,7 +620,7 @@ static void LJ_FASTCALL recff_math_minmax(jit_State *J, RecordFFData *rd) J->base[0] = tr; } -static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) +static void recff_math_random(jit_State *J, RecordFFData *rd) { GCudata *ud = udataV(&J->fn->c.upvalue[0]); TRef tr, one; @@ -672,33 +650,27 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) /* -- Bit library fast functions ------------------------------------------ */ /* Record bit.tobit. */ -static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd) +static void recff_bit_tobit(jit_State *J, RecordFFData *rd) { TRef tr = J->base[0]; -#if LJ_HASFFI if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; } -#endif J->base[0] = lj_opt_narrow_tobit(J, tr); UNUSED(rd); } /* Record unary bit.bnot, bit.bswap. */ -static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) +static void recff_bit_unary(jit_State *J, RecordFFData *rd) { -#if LJ_HASFFI if (recff_bit64_unary(J, rd)) return; -#endif J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0); } /* Record N-ary bit.band, bit.bor, bit.bxor. */ -static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) +static void recff_bit_nary(jit_State *J, RecordFFData *rd) { -#if LJ_HASFFI if (recff_bit64_nary(J, rd)) return; -#endif { TRef tr = lj_opt_narrow_tobit(J, J->base[0]); uint32_t ot = IRTI(rd->data); @@ -710,12 +682,10 @@ static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) } /* Record bit shifts. */ -static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) +static void recff_bit_shift(jit_State *J, RecordFFData *rd) { -#if LJ_HASFFI if (recff_bit64_shift(J, rd)) return; -#endif { TRef tr = lj_opt_narrow_tobit(J, J->base[0]); TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); @@ -733,15 +703,11 @@ static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) } } -static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd) +static void recff_bit_tohex(jit_State *J, RecordFFData *rd) { -#if LJ_HASFFI TRef hdr = recff_bufhdr(J); TRef tr = recff_bit64_tohex(J, rd, hdr); J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); -#else - recff_nyiu(J, rd); /* Don't bother working around this NYI. */ -#endif } /* -- String library fast functions --------------------------------------- */ @@ -773,7 +739,7 @@ static TRef recff_string_start(jit_State *J, GCstr *s, int32_t *st, TRef tr, } /* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ -static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) +static void recff_string_range(jit_State *J, RecordFFData *rd) { TRef trstr = lj_ir_tostr(J, J->base[0]); TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN); @@ -852,7 +818,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) } } -static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd) +static void recff_string_char(jit_State *J, RecordFFData *rd) { TRef k255 = lj_ir_kint(J, 255); BCReg i; @@ -870,7 +836,7 @@ static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd) UNUSED(rd); } -static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd) +static void recff_string_rep(jit_State *J, RecordFFData *rd) { TRef str = lj_ir_tostr(J, J->base[0]); TRef rep = lj_opt_narrow_toint(J, J->base[1]); @@ -896,7 +862,7 @@ static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd) J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); } -static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd) +static void recff_string_op(jit_State *J, RecordFFData *rd) { TRef str = lj_ir_tostr(J, J->base[0]); TRef hdr = recff_bufhdr(J); @@ -904,7 +870,7 @@ static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd) J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr); } -static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) +static void recff_string_find(jit_State *J, RecordFFData *rd) { TRef trstr = lj_ir_tostr(J, J->base[0]); TRef trpat = lj_ir_tostr(J, J->base[1]); @@ -963,7 +929,7 @@ static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) } } -static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) +static void recff_string_format(jit_State *J, RecordFFData *rd) { TRef trfmt = lj_ir_tostr(J, J->base[0]); GCstr *fmt = argv2str(J, &rd->argv[0]); @@ -993,15 +959,10 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT)); } else { -#if LJ_HASFFI tra = emitir(IRT(IR_CONV, IRT_U64), tra, (IRT_INT|(IRT_U64<<5)|IRCONV_SEXT)); tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra); lj_needsplit(J); -#else - recff_nyiu(J, rd); /* Don't bother working around this NYI. */ - return; -#endif } break; case STRFMT_UINT: @@ -1046,7 +1007,7 @@ static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) /* -- Table library fast functions ---------------------------------------- */ -static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) +static void recff_table_insert(jit_State *J, RecordFFData *rd) { RecordIndex ix; ix.tab = J->base[0]; @@ -1068,7 +1029,7 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) } /* else: Interpreter will throw. */ } -static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd) +static void recff_table_concat(jit_State *J, RecordFFData *rd) { TRef tab = J->base[0]; if (tref_istab(tab)) { @@ -1087,7 +1048,7 @@ static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd) UNUSED(rd); } -static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd) +static void recff_table_new(jit_State *J, RecordFFData *rd) { TRef tra = lj_opt_narrow_toint(J, J->base[0]); TRef trh = lj_opt_narrow_toint(J, J->base[1]); @@ -1095,7 +1056,7 @@ static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd) UNUSED(rd); } -static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd) +static void recff_table_clear(jit_State *J, RecordFFData *rd) { TRef tr = J->base[0]; if (tref_istab(tr)) { @@ -1114,13 +1075,8 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id) { TRef tr, ud, fp; if (id) { /* io.func() */ -#if LJ_GC64 /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id])); -#else - tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); - ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); -#endif } else { /* fp:method() */ ud = J->base[0]; if (!tref_isudata(ud)) @@ -1134,7 +1090,7 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id) return fp; } -static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd) +static void recff_io_write(jit_State *J, RecordFFData *rd) { TRef ud, fp = recff_io_fp(J, &ud, rd->data); TRef zero = lj_ir_kint(J, 0); @@ -1161,7 +1117,7 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd) J->base[0] = LJ_52 ? ud : TREF_TRUE; } -static void LJ_FASTCALL recff_io_flush(jit_State *J, RecordFFData *rd) +static void recff_io_flush(jit_State *J, RecordFFData *rd) { TRef ud, fp = recff_io_fp(J, &ud, rd->data); TRef tr = lj_ir_call(J, IRCALL_fflush, fp); @@ -1172,7 +1128,7 @@ static void LJ_FASTCALL recff_io_flush(jit_State *J, RecordFFData *rd) /* -- Debug library fast functions ---------------------------------------- */ -static void LJ_FASTCALL recff_debug_getmetatable(jit_State *J, RecordFFData *rd) +static void recff_debug_getmetatable(jit_State *J, RecordFFData *rd) { GCtab *mt; TRef mtref; @@ -1223,4 +1179,3 @@ void lj_ffrecord_func(jit_State *J) #undef IR #undef emitir -#endif diff --git a/src/lj_ffrecord.h b/src/lj_ffrecord.h index 3b407450d5..83a8abf803 100644 --- a/src/lj_ffrecord.h +++ b/src/lj_ffrecord.h @@ -9,7 +9,6 @@ #include "lj_obj.h" #include "lj_jit.h" -#if LJ_HASJIT /* Data used by handlers to record a fast function. */ typedef struct RecordFFData { TValue *argv; /* Runtime argument values. */ @@ -19,6 +18,5 @@ typedef struct RecordFFData { LJ_FUNC int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv); LJ_FUNC void lj_ffrecord_func(jit_State *J); -#endif #endif diff --git a/src/lj_frame.h b/src/lj_frame.h index 19c49a4aef..07da6d56b9 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -30,7 +30,6 @@ enum { #define FRAME_TYPEP (FRAME_TYPE|FRAME_P) /* Macros to access and modify Lua frames. */ -#if LJ_FR2 /* Two-slot frame info, required for 64 bit PC/GCRef: ** ** base-2 base-1 | base base+1 ... @@ -49,28 +48,6 @@ enum { #define setframe_gc(f, p, tp) (setgcVraw((f)-1, (p), (tp))) #define setframe_ftsz(f, sz) ((f)->ftsz = (sz)) #define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc)) -#else -/* One-slot frame info, sufficient for 32 bit PC/GCRef: -** -** base-1 | base base+1 ... -** lo hi | -** [func | PC/delta/ft] | [slots ...] -** ^-- frame | ^-- base ^-- top -** -** Continuation frames: -** -** base-2 base-1 | base base+1 ... -** lo hi lo hi | -** [cont | PC] [func | PC/delta/ft] | [slots ...] -** ^-- frame | ^-- base ^-- top -*/ -#define frame_gc(f) (gcref((f)->fr.func)) -#define frame_ftsz(f) ((ptrdiff_t)(f)->fr.tp.ftsz) -#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) -#define setframe_gc(f, p, tp) (setgcref((f)->fr.func, (p)), UNUSED(tp)) -#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (int32_t)(sz)) -#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc))) -#endif #define frame_type(f) (frame_ftsz(f) & FRAME_TYPE) #define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP) @@ -86,24 +63,10 @@ enum { enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ -#if LJ_FR2 #define frame_contpc(f) (frame_pc((f)-2)) #define frame_contv(f) (((f)-3)->u64) -#else -#define frame_contpc(f) (frame_pc((f)-1)) -#define frame_contv(f) (((f)-1)->u32.lo) -#endif -#if LJ_FR2 #define frame_contf(f) ((ASMFunction)(uintptr_t)((f)-3)->u64) -#elif LJ_64 -#define frame_contf(f) \ - ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \ - (intptr_t)(int32_t)((f)-1)->u32.lo)) -#else -#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void)) -#endif -#define frame_iscont_fficb(f) \ - (LJ_HASFFI && frame_contv(f) == LJ_CONT_FFI_CALLBACK) +#define frame_iscont_fficb(f) (frame_contv(f) == LJ_CONT_FFI_CALLBACK) #define frame_prevl(f) ((f) - (1+LJ_FR2+bc_a(frame_pc(f)[-1]))) #define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f))) @@ -115,158 +78,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ /* Macros to access and modify the C stack frame chain. */ /* These definitions must match with the arch-specific *.dasc files. */ -#if LJ_TARGET_X86 -#if LJ_ABI_WIN -#define CFRAME_OFS_ERRF (19*4) -#define CFRAME_OFS_NRES (18*4) -#define CFRAME_OFS_PREV (17*4) -#define CFRAME_OFS_L (16*4) -#define CFRAME_OFS_SEH (9*4) -#define CFRAME_OFS_PC (6*4) -#define CFRAME_OFS_MULTRES (5*4) -#define CFRAME_SIZE (16*4) -#define CFRAME_SHIFT_MULTRES 0 -#else -#define CFRAME_OFS_ERRF (15*4) -#define CFRAME_OFS_NRES (14*4) -#define CFRAME_OFS_PREV (13*4) -#define CFRAME_OFS_L (12*4) -#define CFRAME_OFS_PC (6*4) -#define CFRAME_OFS_MULTRES (5*4) -#define CFRAME_SIZE (12*4) -#define CFRAME_SHIFT_MULTRES 0 -#endif -#elif LJ_TARGET_X64 -#if LJ_ABI_WIN -#define CFRAME_OFS_PREV (13*8) -#if LJ_GC64 -#define CFRAME_OFS_PC (12*8) -#define CFRAME_OFS_L (11*8) -#define CFRAME_OFS_ERRF (21*4) -#define CFRAME_OFS_NRES (20*4) -#define CFRAME_OFS_MULTRES (8*4) -#else -#define CFRAME_OFS_PC (25*4) -#define CFRAME_OFS_L (24*4) -#define CFRAME_OFS_ERRF (23*4) -#define CFRAME_OFS_NRES (22*4) -#define CFRAME_OFS_MULTRES (21*4) -#endif -#define CFRAME_SIZE (10*8) -#define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8) -#define CFRAME_SHIFT_MULTRES 0 -#else #define CFRAME_OFS_PREV (4*8) -#if LJ_GC64 #define CFRAME_OFS_PC (3*8) #define CFRAME_OFS_L (2*8) #define CFRAME_OFS_ERRF (3*4) #define CFRAME_OFS_NRES (2*4) #define CFRAME_OFS_MULTRES (0*4) -#else -#define CFRAME_OFS_PC (7*4) -#define CFRAME_OFS_L (6*4) -#define CFRAME_OFS_ERRF (5*4) -#define CFRAME_OFS_NRES (4*4) -#define CFRAME_OFS_MULTRES (1*4) -#endif -#if LJ_NO_UNWIND #define CFRAME_SIZE (12*8) -#else -#define CFRAME_SIZE (10*8) -#endif #define CFRAME_SIZE_JIT (CFRAME_SIZE + 16) #define CFRAME_SHIFT_MULTRES 0 -#endif -#elif LJ_TARGET_ARM -#define CFRAME_OFS_ERRF 24 -#define CFRAME_OFS_NRES 20 -#define CFRAME_OFS_PREV 16 -#define CFRAME_OFS_L 12 -#define CFRAME_OFS_PC 8 -#define CFRAME_OFS_MULTRES 4 -#if LJ_ARCH_HASFPU -#define CFRAME_SIZE 128 -#else -#define CFRAME_SIZE 64 -#endif -#define CFRAME_SHIFT_MULTRES 3 -#elif LJ_TARGET_ARM64 -#define CFRAME_OFS_ERRF 196 -#define CFRAME_OFS_NRES 200 -#define CFRAME_OFS_PREV 160 -#define CFRAME_OFS_L 176 -#define CFRAME_OFS_PC 168 -#define CFRAME_OFS_MULTRES 192 -#define CFRAME_SIZE 208 -#define CFRAME_SHIFT_MULTRES 3 -#elif LJ_TARGET_PPC -#if LJ_TARGET_XBOX360 -#define CFRAME_OFS_ERRF 424 -#define CFRAME_OFS_NRES 420 -#define CFRAME_OFS_PREV 400 -#define CFRAME_OFS_L 416 -#define CFRAME_OFS_PC 412 -#define CFRAME_OFS_MULTRES 408 -#define CFRAME_SIZE 384 -#define CFRAME_SHIFT_MULTRES 3 -#elif LJ_ARCH_PPC32ON64 -#define CFRAME_OFS_ERRF 472 -#define CFRAME_OFS_NRES 468 -#define CFRAME_OFS_PREV 448 -#define CFRAME_OFS_L 464 -#define CFRAME_OFS_PC 460 -#define CFRAME_OFS_MULTRES 456 -#define CFRAME_SIZE 400 -#define CFRAME_SHIFT_MULTRES 3 -#else -#define CFRAME_OFS_ERRF 48 -#define CFRAME_OFS_NRES 44 -#define CFRAME_OFS_PREV 40 -#define CFRAME_OFS_L 36 -#define CFRAME_OFS_PC 32 -#define CFRAME_OFS_MULTRES 28 -#define CFRAME_SIZE 272 -#define CFRAME_SHIFT_MULTRES 3 -#endif -#elif LJ_TARGET_MIPS32 -#if LJ_ARCH_HASFPU -#define CFRAME_OFS_ERRF 124 -#define CFRAME_OFS_NRES 120 -#define CFRAME_OFS_PREV 116 -#define CFRAME_OFS_L 112 -#define CFRAME_SIZE 112 -#else -#define CFRAME_OFS_ERRF 76 -#define CFRAME_OFS_NRES 72 -#define CFRAME_OFS_PREV 68 -#define CFRAME_OFS_L 64 -#define CFRAME_SIZE 64 -#endif -#define CFRAME_OFS_PC 20 -#define CFRAME_OFS_MULTRES 16 -#define CFRAME_SHIFT_MULTRES 3 -#elif LJ_TARGET_MIPS64 -#if LJ_ARCH_HASFPU -#define CFRAME_OFS_ERRF 188 -#define CFRAME_OFS_NRES 184 -#define CFRAME_OFS_PREV 176 -#define CFRAME_OFS_L 168 -#define CFRAME_OFS_PC 160 -#define CFRAME_SIZE 192 -#else -#define CFRAME_OFS_ERRF 124 -#define CFRAME_OFS_NRES 120 -#define CFRAME_OFS_PREV 112 -#define CFRAME_OFS_L 104 -#define CFRAME_OFS_PC 96 -#define CFRAME_SIZE 128 -#endif -#define CFRAME_OFS_MULTRES 0 -#define CFRAME_SHIFT_MULTRES 3 -#else -#error "Missing CFRAME_* definitions for this architecture" -#endif #ifndef CFRAME_SIZE_JIT #define CFRAME_SIZE_JIT CFRAME_SIZE diff --git a/src/lj_func.c b/src/lj_func.c index 639dad8768..c885a86be2 100644 --- a/src/lj_func.c +++ b/src/lj_func.c @@ -17,7 +17,7 @@ /* -- Prototypes ---------------------------------------------------------- */ -void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt) +void lj_func_freeproto(global_State *g, GCproto *pt) { lj_mem_free(g, pt, pt->sizept); } @@ -77,7 +77,7 @@ static GCupval *func_emptyuv(lua_State *L) } /* Close all open upvalues pointing to some stack level or above. */ -void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level) +void lj_func_closeuv(lua_State *L, TValue *level) { GCupval *uv; global_State *g = G(L); @@ -95,7 +95,7 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level) } } -void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv) +void lj_func_freeuv(global_State *g, GCupval *uv) { if (!uv->closed) unlinkuv(uv); @@ -178,7 +178,7 @@ GCfunc *lj_func_newL_gc(lua_State *L, GCproto *pt, GCfuncL *parent) return fn; } -void LJ_FASTCALL lj_func_free(global_State *g, GCfunc *fn) +void lj_func_free(global_State *g, GCfunc *fn) { MSize size = isluafunc(fn) ? sizeLfunc((MSize)fn->l.nupvalues) : sizeCfunc((MSize)fn->c.nupvalues); diff --git a/src/lj_func.h b/src/lj_func.h index 901751b981..25ea614f6c 100644 --- a/src/lj_func.h +++ b/src/lj_func.h @@ -9,16 +9,16 @@ #include "lj_obj.h" /* Prototypes. */ -LJ_FUNC void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt); +LJ_FUNC void lj_func_freeproto(global_State *g, GCproto *pt); /* Upvalues. */ -LJ_FUNCA void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level); -LJ_FUNC void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv); +LJ_FUNCA void lj_func_closeuv(lua_State *L, TValue *level); +LJ_FUNC void lj_func_freeuv(global_State *g, GCupval *uv); /* Functions (closures). */ LJ_FUNC GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env); LJ_FUNC GCfunc *lj_func_newL_empty(lua_State *L, GCproto *pt, GCtab *env); LJ_FUNCA GCfunc *lj_func_newL_gc(lua_State *L, GCproto *pt, GCfuncL *parent); -LJ_FUNC void LJ_FASTCALL lj_func_free(global_State *g, GCfunc *c); +LJ_FUNC void lj_func_free(global_State *g, GCfunc *c); #endif diff --git a/src/lj_gc.c b/src/lj_gc.c index 2aaf5b2c4f..7347b23518 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c @@ -20,10 +20,8 @@ #include "lj_meta.h" #include "lj_state.h" #include "lj_frame.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_cdata.h" -#endif #include "lj_trace.h" #include "lj_vm.h" @@ -171,12 +169,10 @@ static int gc_traverse_tab(global_State *g, GCtab *t) else if (c == 'v') weak |= LJ_GC_WEAKVAL; } if (weak) { /* Weak tables are cleared in the atomic phase. */ -#if LJ_HASFFI CTState *cts = ctype_ctsG(g); if (cts && cts->finalizer == t) { weak = (int)(~0u & ~LJ_GC_WEAKVAL); } else -#endif { t->marked = (uint8_t)((t->marked & ~LJ_GC_WEAK) | weak); setgcrefr(t->gclist, g->gc.weak); @@ -223,7 +219,6 @@ static void gc_traverse_func(global_State *g, GCfunc *fn) } } -#if LJ_HASJIT /* Mark a trace. */ static void gc_marktrace(global_State *g, TraceNo traceno) { @@ -256,9 +251,6 @@ static void gc_traverse_trace(global_State *g, GCtrace *T) /* The current trace is a GC root while not anchored in the prototype (yet). */ #define gc_traverse_curtrace(g) gc_traverse_trace(g, &G2J(g)->cur) -#else -#define gc_traverse_curtrace(g) UNUSED(g) -#endif /* Traverse a prototype. */ static void gc_traverse_proto(global_State *g, GCproto *pt) @@ -267,9 +259,7 @@ static void gc_traverse_proto(global_State *g, GCproto *pt) gc_mark_str(proto_chunkname(pt)); for (i = -(ptrdiff_t)pt->sizekgc; i < 0; i++) /* Mark collectable consts. */ gc_markobj(g, proto_kgc(pt, i)); -#if LJ_HASJIT if (pt->trace) gc_marktrace(g, pt->trace); -#endif } /* Traverse the frame structure of a stack. */ @@ -335,15 +325,10 @@ static size_t propagatemark(global_State *g) gc_traverse_thread(g, th); return sizeof(lua_State) + sizeof(TValue) * th->stacksize; } else { -#if LJ_HASJIT GCtrace *T = gco2trace(o); gc_traverse_trace(g, T); return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) + T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry); -#else - lua_assert(0); - return 0; -#endif } } @@ -359,7 +344,7 @@ static size_t gc_propagate_gray(global_State *g) /* -- Sweep phase --------------------------------------------------------- */ /* Type of GC free functions. */ -typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o); +typedef void (*GCFreeFunc)(global_State *g, GCobj *o); /* GC free functions for LJ_TSTR .. LJ_TUDATA. ORDER LJ_T */ static const GCFreeFunc gc_freefunc[] = { @@ -368,16 +353,8 @@ static const GCFreeFunc gc_freefunc[] = { (GCFreeFunc)lj_state_free, (GCFreeFunc)lj_func_freeproto, (GCFreeFunc)lj_func_free, -#if LJ_HASJIT (GCFreeFunc)lj_trace_free, -#else - (GCFreeFunc)0, -#endif -#if LJ_HASFFI (GCFreeFunc)lj_cdata_free, -#else - (GCFreeFunc)0, -#endif (GCFreeFunc)lj_tab_free, (GCFreeFunc)lj_udata_free }; @@ -491,7 +468,6 @@ static void gc_finalize(lua_State *L) setgcrefnull(g->gc.mmudata); else setgcrefr(gcref(g->gc.mmudata)->gch.nextgc, o->gch.nextgc); -#if LJ_HASFFI if (o->gch.gct == ~LJ_TCDATA) { TValue tmp, *tv; /* Add cdata back to the GC list and make it white. */ @@ -510,7 +486,6 @@ static void gc_finalize(lua_State *L) } return; } -#endif /* Add userdata back to the main userdata list and make it white. */ setgcrefr(o->gch.nextgc, mainthread(g)->nextgc); setgcref(mainthread(g)->nextgc, o); @@ -528,7 +503,6 @@ void lj_gc_finalize_udata(lua_State *L) gc_finalize(L); } -#if LJ_HASFFI /* Finalize all cdata objects from finalizer table. */ void lj_gc_finalize_cdata(lua_State *L) { @@ -551,7 +525,6 @@ void lj_gc_finalize_cdata(lua_State *L) } } } -#endif /* Free all remaining GC objects. */ void lj_gc_freeall(global_State *g) @@ -642,9 +615,7 @@ static size_t gc_onestep(lua_State *L) lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */ if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ g->gc.state = GCSfinalize; -#if LJ_HASFFI g->gc.nocdatafin = 1; -#endif } else { /* Otherwise skip this phase to help the JIT. */ g->gc.state = GCSpause; /* End of GC cycle. */ g->gc.debt = 0; @@ -661,9 +632,7 @@ static size_t gc_onestep(lua_State *L) g->gc.estimate -= GCFINALIZECOST; return GCFINALIZECOST; } -#if LJ_HASFFI if (!g->gc.nocdatafin) lj_tab_rehash(L, ctype_ctsG(g)->finalizer); -#endif g->gc.state = GCSpause; /* End of GC cycle. */ g->gc.debt = 0; return 0; @@ -674,11 +643,12 @@ static size_t gc_onestep(lua_State *L) } /* Perform a limited amount of incremental GC steps. */ -int LJ_FASTCALL lj_gc_step(lua_State *L) +int lj_gc_step(lua_State *L) { global_State *g = G(L); GCSize lim; int32_t ostate = g->vmstate; + g->gcvmstate = ostate; setvmstate(g, GC); lim = (GCSTEPSIZE/100) * g->gc.stepmul; if (lim == 0) @@ -706,15 +676,14 @@ int LJ_FASTCALL lj_gc_step(lua_State *L) } /* Ditto, but fix the stack top first. */ -void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L) +void lj_gc_step_fixtop(lua_State *L) { if (curr_funcisL(L)) L->top = curr_topL(L); lj_gc_step(L); } -#if LJ_HASJIT /* Perform multiple GC steps. Called from JIT-compiled code. */ -int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) +int lj_gc_step_jit(global_State *g, MSize steps) { lua_State *L = gco2th(gcref(g->cur_L)); L->base = tvref(G(L)->jit_base); @@ -724,13 +693,13 @@ int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) /* Return 1 to force a trace exit. */ return (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize); } -#endif /* Perform a full GC cycle. */ void lj_gc_fullgc(lua_State *L) { global_State *g = G(L); int32_t ostate = g->vmstate; + g->gcvmstate = ostate; setvmstate(g, GC); if (g->gc.state <= GCSatomic) { /* Caught somewhere in the middle. */ setmref(g->gc.sweep, &g->gc.root); /* Sweep everything (preserving it). */ @@ -766,7 +735,7 @@ void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v) } /* Specialized barrier for closed upvalue. Pass &uv->tv. */ -void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv) +void lj_gc_barrieruv(global_State *g, TValue *tv) { #define TV2MARKED(x) \ (*((uint8_t *)(x) - offsetof(GCupval, tv) + offsetof(GCupval, marked))) @@ -799,14 +768,12 @@ void lj_gc_closeuv(global_State *g, GCupval *uv) } } -#if LJ_HASJIT /* Mark a trace if it's saved during the propagation phase. */ void lj_gc_barriertrace(global_State *g, uint32_t traceno) { if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic) gc_marktrace(g, traceno); } -#endif /* -- Allocator ----------------------------------------------------------- */ @@ -825,7 +792,7 @@ void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz) } /* Allocate new GC object and link it to the root set. */ -void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size) +void * lj_mem_newgco(lua_State *L, GCSize size) { global_State *g = G(L); GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); diff --git a/src/lj_gc.h b/src/lj_gc.h index 669bbe9240..2e27b31889 100644 --- a/src/lj_gc.h +++ b/src/lj_gc.h @@ -48,17 +48,11 @@ enum { /* Collector. */ LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all); LJ_FUNC void lj_gc_finalize_udata(lua_State *L); -#if LJ_HASFFI LJ_FUNC void lj_gc_finalize_cdata(lua_State *L); -#else -#define lj_gc_finalize_cdata(L) UNUSED(L) -#endif LJ_FUNC void lj_gc_freeall(global_State *g); -LJ_FUNCA int LJ_FASTCALL lj_gc_step(lua_State *L); -LJ_FUNCA void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L); -#if LJ_HASJIT -LJ_FUNC int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps); -#endif +LJ_FUNCA int lj_gc_step(lua_State *L); +LJ_FUNCA void lj_gc_step_fixtop(lua_State *L); +LJ_FUNC int lj_gc_step_jit(global_State *g, MSize steps); LJ_FUNC void lj_gc_fullgc(lua_State *L); /* GC check: drive collector forward if the GC threshold has been reached. */ @@ -71,11 +65,9 @@ LJ_FUNC void lj_gc_fullgc(lua_State *L); /* Write barriers. */ LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v); -LJ_FUNCA void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv); +LJ_FUNCA void lj_gc_barrieruv(global_State *g, TValue *tv); LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv); -#if LJ_HASJIT LJ_FUNC void lj_gc_barriertrace(global_State *g, uint32_t traceno); -#endif /* Move the GC propagation frontier back for tables (make it gray again). */ static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t) @@ -108,7 +100,7 @@ static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t) /* Allocator. */ LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz); -LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size); +LJ_FUNC void * lj_mem_newgco(lua_State *L, GCSize size); LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, MSize *szp, MSize lim, MSize esz); diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index c219ffac0a..246fa69d78 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_gc.h" #include "lj_err.h" @@ -22,796 +21,3 @@ /* This is not compiled in by default. ** Enable with -DLUAJIT_USE_GDBJIT in the Makefile and recompile everything. */ -#ifdef LUAJIT_USE_GDBJIT - -/* The GDB JIT API allows JIT compilers to pass debug information about -** JIT-compiled code back to GDB. You need at least GDB 7.0 or higher -** to see it in action. -** -** This is a passive API, so it works even when not running under GDB -** or when attaching to an already running process. Alas, this implies -** enabling it always has a non-negligible overhead -- do not use in -** release mode! -** -** The LuaJIT GDB JIT client is rather minimal at the moment. It gives -** each trace a symbol name and adds a source location and frame unwind -** information. Obviously LuaJIT itself and any embedding C application -** should be compiled with debug symbols, too (see the Makefile). -** -** Traces are named TRACE_1, TRACE_2, ... these correspond to the trace -** numbers from -jv or -jdump. Use "break TRACE_1" or "tbreak TRACE_1" etc. -** to set breakpoints on specific traces (even ahead of their creation). -** -** The source location for each trace allows listing the corresponding -** source lines with the GDB command "list" (but only if the Lua source -** has been loaded from a file). Currently this is always set to the -** location where the trace has been started. -** -** Frame unwind information can be inspected with the GDB command -** "info frame". This also allows proper backtraces across JIT-compiled -** code with the GDB command "bt". -** -** You probably want to add the following settings to a .gdbinit file -** (or add them to ~/.gdbinit): -** set disassembly-flavor intel -** set breakpoint pending on -** -** Here's a sample GDB session: -** ------------------------------------------------------------------------ - -$ cat >x.lua -for outer=1,100 do - for inner=1,100 do end -end -^D - -$ luajit -jv x.lua -[TRACE 1 x.lua:2] -[TRACE 2 (1/3) x.lua:1 -> 1] - -$ gdb --quiet --args luajit x.lua -(gdb) tbreak TRACE_1 -Function "TRACE_1" not defined. -Temporary breakpoint 1 (TRACE_1) pending. -(gdb) run -Starting program: luajit x.lua - -Temporary breakpoint 1, TRACE_1 () at x.lua:2 -2 for inner=1,100 do end -(gdb) list -1 for outer=1,100 do -2 for inner=1,100 do end -3 end -(gdb) bt -#0 TRACE_1 () at x.lua:2 -#1 0x08053690 in lua_pcall [...] -[...] -#7 0x0806ff90 in main [...] -(gdb) disass TRACE_1 -Dump of assembler code for function TRACE_1: -0xf7fd9fba : mov DWORD PTR ds:0xf7e0e2a0,0x1 -0xf7fd9fc4 : movsd xmm7,QWORD PTR [edx+0x20] -[...] -0xf7fd9ff8 : jmp 0xf7fd2014 -End of assembler dump. -(gdb) tbreak TRACE_2 -Function "TRACE_2" not defined. -Temporary breakpoint 2 (TRACE_2) pending. -(gdb) cont -Continuing. - -Temporary breakpoint 2, TRACE_2 () at x.lua:1 -1 for outer=1,100 do -(gdb) info frame -Stack level 0, frame at 0xffffd7c0: - eip = 0xf7fd9f60 in TRACE_2 (x.lua:1); saved eip 0x8053690 - called by frame at 0xffffd7e0 - source language unknown. - Arglist at 0xffffd78c, args: - Locals at 0xffffd78c, Previous frame's sp is 0xffffd7c0 - Saved registers: - ebx at 0xffffd7ac, ebp at 0xffffd7b8, esi at 0xffffd7b0, edi at 0xffffd7b4, - eip at 0xffffd7bc -(gdb) - -** ------------------------------------------------------------------------ -*/ - -/* -- GDB JIT API --------------------------------------------------------- */ - -/* GDB JIT actions. */ -enum { - GDBJIT_NOACTION = 0, - GDBJIT_REGISTER, - GDBJIT_UNREGISTER -}; - -/* GDB JIT entry. */ -typedef struct GDBJITentry { - struct GDBJITentry *next_entry; - struct GDBJITentry *prev_entry; - const char *symfile_addr; - uint64_t symfile_size; -} GDBJITentry; - -/* GDB JIT descriptor. */ -typedef struct GDBJITdesc { - uint32_t version; - uint32_t action_flag; - GDBJITentry *relevant_entry; - GDBJITentry *first_entry; -} GDBJITdesc; - -GDBJITdesc __jit_debug_descriptor = { - 1, GDBJIT_NOACTION, NULL, NULL -}; - -/* GDB sets a breakpoint at this function. */ -void LJ_NOINLINE __jit_debug_register_code() -{ - __asm__ __volatile__(""); -}; - -/* -- In-memory ELF object definitions ------------------------------------ */ - -/* ELF definitions. */ -typedef struct ELFheader { - uint8_t emagic[4]; - uint8_t eclass; - uint8_t eendian; - uint8_t eversion; - uint8_t eosabi; - uint8_t eabiversion; - uint8_t epad[7]; - uint16_t type; - uint16_t machine; - uint32_t version; - uintptr_t entry; - uintptr_t phofs; - uintptr_t shofs; - uint32_t flags; - uint16_t ehsize; - uint16_t phentsize; - uint16_t phnum; - uint16_t shentsize; - uint16_t shnum; - uint16_t shstridx; -} ELFheader; - -typedef struct ELFsectheader { - uint32_t name; - uint32_t type; - uintptr_t flags; - uintptr_t addr; - uintptr_t ofs; - uintptr_t size; - uint32_t link; - uint32_t info; - uintptr_t align; - uintptr_t entsize; -} ELFsectheader; - -#define ELFSECT_IDX_ABS 0xfff1 - -enum { - ELFSECT_TYPE_PROGBITS = 1, - ELFSECT_TYPE_SYMTAB = 2, - ELFSECT_TYPE_STRTAB = 3, - ELFSECT_TYPE_NOBITS = 8 -}; - -#define ELFSECT_FLAGS_WRITE 1 -#define ELFSECT_FLAGS_ALLOC 2 -#define ELFSECT_FLAGS_EXEC 4 - -typedef struct ELFsymbol { -#if LJ_64 - uint32_t name; - uint8_t info; - uint8_t other; - uint16_t sectidx; - uintptr_t value; - uint64_t size; -#else - uint32_t name; - uintptr_t value; - uint32_t size; - uint8_t info; - uint8_t other; - uint16_t sectidx; -#endif -} ELFsymbol; - -enum { - ELFSYM_TYPE_FUNC = 2, - ELFSYM_TYPE_FILE = 4, - ELFSYM_BIND_LOCAL = 0 << 4, - ELFSYM_BIND_GLOBAL = 1 << 4, -}; - -/* DWARF definitions. */ -#define DW_CIE_VERSION 1 - -enum { - DW_CFA_nop = 0x0, - DW_CFA_offset_extended = 0x5, - DW_CFA_def_cfa = 0xc, - DW_CFA_def_cfa_offset = 0xe, - DW_CFA_offset_extended_sf = 0x11, - DW_CFA_advance_loc = 0x40, - DW_CFA_offset = 0x80 -}; - -enum { - DW_EH_PE_udata4 = 3, - DW_EH_PE_textrel = 0x20 -}; - -enum { - DW_TAG_compile_unit = 0x11 -}; - -enum { - DW_children_no = 0, - DW_children_yes = 1 -}; - -enum { - DW_AT_name = 0x03, - DW_AT_stmt_list = 0x10, - DW_AT_low_pc = 0x11, - DW_AT_high_pc = 0x12 -}; - -enum { - DW_FORM_addr = 0x01, - DW_FORM_data4 = 0x06, - DW_FORM_string = 0x08 -}; - -enum { - DW_LNS_extended_op = 0, - DW_LNS_copy = 1, - DW_LNS_advance_pc = 2, - DW_LNS_advance_line = 3 -}; - -enum { - DW_LNE_end_sequence = 1, - DW_LNE_set_address = 2 -}; - -enum { -#if LJ_TARGET_X86 - DW_REG_AX, DW_REG_CX, DW_REG_DX, DW_REG_BX, - DW_REG_SP, DW_REG_BP, DW_REG_SI, DW_REG_DI, - DW_REG_RA, -#elif LJ_TARGET_X64 - /* Yes, the order is strange, but correct. */ - DW_REG_AX, DW_REG_DX, DW_REG_CX, DW_REG_BX, - DW_REG_SI, DW_REG_DI, DW_REG_BP, DW_REG_SP, - DW_REG_8, DW_REG_9, DW_REG_10, DW_REG_11, - DW_REG_12, DW_REG_13, DW_REG_14, DW_REG_15, - DW_REG_RA, -#elif LJ_TARGET_ARM - DW_REG_SP = 13, - DW_REG_RA = 14, -#elif LJ_TARGET_ARM64 - DW_REG_SP = 31, - DW_REG_RA = 30, -#elif LJ_TARGET_PPC - DW_REG_SP = 1, - DW_REG_RA = 65, - DW_REG_CR = 70, -#elif LJ_TARGET_MIPS - DW_REG_SP = 29, - DW_REG_RA = 31, -#else -#error "Unsupported target architecture" -#endif -}; - -/* Minimal list of sections for the in-memory ELF object. */ -enum { - GDBJIT_SECT_NULL, - GDBJIT_SECT_text, - GDBJIT_SECT_eh_frame, - GDBJIT_SECT_shstrtab, - GDBJIT_SECT_strtab, - GDBJIT_SECT_symtab, - GDBJIT_SECT_debug_info, - GDBJIT_SECT_debug_abbrev, - GDBJIT_SECT_debug_line, - GDBJIT_SECT__MAX -}; - -enum { - GDBJIT_SYM_UNDEF, - GDBJIT_SYM_FILE, - GDBJIT_SYM_FUNC, - GDBJIT_SYM__MAX -}; - -/* In-memory ELF object. */ -typedef struct GDBJITobj { - ELFheader hdr; /* ELF header. */ - ELFsectheader sect[GDBJIT_SECT__MAX]; /* ELF sections. */ - ELFsymbol sym[GDBJIT_SYM__MAX]; /* ELF symbol table. */ - uint8_t space[4096]; /* Space for various section data. */ -} GDBJITobj; - -/* Combined structure for GDB JIT entry and ELF object. */ -typedef struct GDBJITentryobj { - GDBJITentry entry; - size_t sz; - GDBJITobj obj; -} GDBJITentryobj; - -/* Template for in-memory ELF header. */ -static const ELFheader elfhdr_template = { - .emagic = { 0x7f, 'E', 'L', 'F' }, - .eclass = LJ_64 ? 2 : 1, - .eendian = LJ_ENDIAN_SELECT(1, 2), - .eversion = 1, -#if LJ_TARGET_LINUX - .eosabi = 0, /* Nope, it's not 3. */ -#elif defined(__FreeBSD__) - .eosabi = 9, -#elif defined(__NetBSD__) - .eosabi = 2, -#elif defined(__OpenBSD__) - .eosabi = 12, -#elif defined(__DragonFly__) - .eosabi = 0, -#elif (defined(__sun__) && defined(__svr4__)) - .eosabi = 6, -#else - .eosabi = 0, -#endif - .eabiversion = 0, - .epad = { 0, 0, 0, 0, 0, 0, 0 }, - .type = 1, -#if LJ_TARGET_X86 - .machine = 3, -#elif LJ_TARGET_X64 - .machine = 62, -#elif LJ_TARGET_ARM - .machine = 40, -#elif LJ_TARGET_ARM64 - .machine = 183, -#elif LJ_TARGET_PPC - .machine = 20, -#elif LJ_TARGET_MIPS - .machine = 8, -#else -#error "Unsupported target architecture" -#endif - .version = 1, - .entry = 0, - .phofs = 0, - .shofs = offsetof(GDBJITobj, sect), - .flags = 0, - .ehsize = sizeof(ELFheader), - .phentsize = 0, - .phnum = 0, - .shentsize = sizeof(ELFsectheader), - .shnum = GDBJIT_SECT__MAX, - .shstridx = GDBJIT_SECT_shstrtab -}; - -/* -- In-memory ELF object generation ------------------------------------- */ - -/* Context for generating the ELF object for the GDB JIT API. */ -typedef struct GDBJITctx { - uint8_t *p; /* Pointer to next address in obj.space. */ - uint8_t *startp; /* Pointer to start address in obj.space. */ - GCtrace *T; /* Generate symbols for this trace. */ - uintptr_t mcaddr; /* Machine code address. */ - MSize szmcode; /* Size of machine code. */ - MSize spadjp; /* Stack adjustment for parent trace or interpreter. */ - MSize spadj; /* Stack adjustment for trace itself. */ - BCLine lineno; /* Starting line number. */ - const char *filename; /* Starting file name. */ - size_t objsize; /* Final size of ELF object. */ - GDBJITobj obj; /* In-memory ELF object. */ -} GDBJITctx; - -/* Add a zero-terminated string. */ -static uint32_t gdbjit_strz(GDBJITctx *ctx, const char *str) -{ - uint8_t *p = ctx->p; - uint32_t ofs = (uint32_t)(p - ctx->startp); - do { - *p++ = (uint8_t)*str; - } while (*str++); - ctx->p = p; - return ofs; -} - -/* Append a decimal number. */ -static void gdbjit_catnum(GDBJITctx *ctx, uint32_t n) -{ - if (n >= 10) { uint32_t m = n / 10; n = n % 10; gdbjit_catnum(ctx, m); } - *ctx->p++ = '0' + n; -} - -/* Add a SLEB128 value. */ -static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) -{ - uint8_t *p = ctx->p; - for (; (uint32_t)(v+0x40) >= 0x80; v >>= 7) - *p++ = (uint8_t)((v & 0x7f) | 0x80); - *p++ = (uint8_t)(v & 0x7f); - ctx->p = p; -} - -/* Shortcuts to generate DWARF structures. */ -#define DB(x) (*p++ = (x)) -#define DI8(x) (*(int8_t *)p = (x), p++) -#define DU16(x) (*(uint16_t *)p = (x), p += 2) -#define DU32(x) (*(uint32_t *)p = (x), p += 4) -#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) -#define DUV(x) (p = (uint8_t *)lj_strfmt_wuleb128((char *)p, (x))) -#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) -#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) -#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop -#define DSECT(name, stmt) \ - { uint32_t *szp_##name = (uint32_t *)p; p += 4; stmt \ - *szp_##name = (uint32_t)((p-(uint8_t *)szp_##name)-4); } \ - -/* Initialize ELF section headers. */ -static void LJ_FASTCALL gdbjit_secthdr(GDBJITctx *ctx) -{ - ELFsectheader *sect; - - *ctx->p++ = '\0'; /* Empty string at start of string table. */ - -#define SECTDEF(id, tp, al) \ - sect = &ctx->obj.sect[GDBJIT_SECT_##id]; \ - sect->name = gdbjit_strz(ctx, "." #id); \ - sect->type = ELFSECT_TYPE_##tp; \ - sect->align = (al) - - SECTDEF(text, NOBITS, 16); - sect->flags = ELFSECT_FLAGS_ALLOC|ELFSECT_FLAGS_EXEC; - sect->addr = ctx->mcaddr; - sect->ofs = 0; - sect->size = ctx->szmcode; - - SECTDEF(eh_frame, PROGBITS, sizeof(uintptr_t)); - sect->flags = ELFSECT_FLAGS_ALLOC; - - SECTDEF(shstrtab, STRTAB, 1); - SECTDEF(strtab, STRTAB, 1); - - SECTDEF(symtab, SYMTAB, sizeof(uintptr_t)); - sect->ofs = offsetof(GDBJITobj, sym); - sect->size = sizeof(ctx->obj.sym); - sect->link = GDBJIT_SECT_strtab; - sect->entsize = sizeof(ELFsymbol); - sect->info = GDBJIT_SYM_FUNC; - - SECTDEF(debug_info, PROGBITS, 1); - SECTDEF(debug_abbrev, PROGBITS, 1); - SECTDEF(debug_line, PROGBITS, 1); - -#undef SECTDEF -} - -/* Initialize symbol table. */ -static void LJ_FASTCALL gdbjit_symtab(GDBJITctx *ctx) -{ - ELFsymbol *sym; - - *ctx->p++ = '\0'; /* Empty string at start of string table. */ - - sym = &ctx->obj.sym[GDBJIT_SYM_FILE]; - sym->name = gdbjit_strz(ctx, "JIT mcode"); - sym->sectidx = ELFSECT_IDX_ABS; - sym->info = ELFSYM_TYPE_FILE|ELFSYM_BIND_LOCAL; - - sym = &ctx->obj.sym[GDBJIT_SYM_FUNC]; - sym->name = gdbjit_strz(ctx, "TRACE_"); ctx->p--; - gdbjit_catnum(ctx, ctx->T->traceno); *ctx->p++ = '\0'; - sym->sectidx = GDBJIT_SECT_text; - sym->value = 0; - sym->size = ctx->szmcode; - sym->info = ELFSYM_TYPE_FUNC|ELFSYM_BIND_GLOBAL; -} - -/* Initialize .eh_frame section. */ -static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) -{ - uint8_t *p = ctx->p; - uint8_t *framep = p; - - /* Emit DWARF EH CIE. */ - DSECT(CIE, - DU32(0); /* Offset to CIE itself. */ - DB(DW_CIE_VERSION); - DSTR("zR"); /* Augmentation. */ - DUV(1); /* Code alignment factor. */ - DSV(-(int32_t)sizeof(uintptr_t)); /* Data alignment factor. */ - DB(DW_REG_RA); /* Return address register. */ - DB(1); DB(DW_EH_PE_textrel|DW_EH_PE_udata4); /* Augmentation data. */ - DB(DW_CFA_def_cfa); DUV(DW_REG_SP); DUV(sizeof(uintptr_t)); -#if LJ_TARGET_PPC - DB(DW_CFA_offset_extended_sf); DB(DW_REG_RA); DSV(-1); -#else - DB(DW_CFA_offset|DW_REG_RA); DUV(1); -#endif - DALIGNNOP(sizeof(uintptr_t)); - ) - - /* Emit DWARF EH FDE. */ - DSECT(FDE, - DU32((uint32_t)(p-framep)); /* Offset to CIE. */ - DU32(0); /* Machine code offset relative to .text. */ - DU32(ctx->szmcode); /* Machine code length. */ - DB(0); /* Augmentation data. */ - /* Registers saved in CFRAME. */ -#if LJ_TARGET_X86 - DB(DW_CFA_offset|DW_REG_BP); DUV(2); - DB(DW_CFA_offset|DW_REG_DI); DUV(3); - DB(DW_CFA_offset|DW_REG_SI); DUV(4); - DB(DW_CFA_offset|DW_REG_BX); DUV(5); -#elif LJ_TARGET_X64 - DB(DW_CFA_offset|DW_REG_BP); DUV(2); - DB(DW_CFA_offset|DW_REG_BX); DUV(3); - DB(DW_CFA_offset|DW_REG_15); DUV(4); - DB(DW_CFA_offset|DW_REG_14); DUV(5); - /* Extra registers saved for JIT-compiled code. */ - DB(DW_CFA_offset|DW_REG_13); DUV(LJ_GC64 ? 10 : 9); - DB(DW_CFA_offset|DW_REG_12); DUV(LJ_GC64 ? 11 : 10); -#elif LJ_TARGET_ARM - { - int i; - for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } - } -#elif LJ_TARGET_ARM64 - { - int i; - DB(DW_CFA_offset|31); DUV(2); - for (i = 28; i >= 19; i--) { DB(DW_CFA_offset|i); DUV(3+(28-i)); } - for (i = 15; i >= 8; i--) { DB(DW_CFA_offset|32|i); DUV(28-i); } - } -#elif LJ_TARGET_PPC - { - int i; - DB(DW_CFA_offset_extended); DB(DW_REG_CR); DUV(55); - for (i = 14; i <= 31; i++) { - DB(DW_CFA_offset|i); DUV(37+(31-i)); - DB(DW_CFA_offset|32|i); DUV(2+2*(31-i)); - } - } -#elif LJ_TARGET_MIPS - { - int i; - DB(DW_CFA_offset|30); DUV(2); - for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); } - for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); } - } -#else -#error "Unsupported target architecture" -#endif - if (ctx->spadjp != ctx->spadj) { /* Parent/interpreter stack frame size. */ - DB(DW_CFA_def_cfa_offset); DUV(ctx->spadjp); - DB(DW_CFA_advance_loc|1); /* Only an approximation. */ - } - DB(DW_CFA_def_cfa_offset); DUV(ctx->spadj); /* Trace stack frame size. */ - DALIGNNOP(sizeof(uintptr_t)); - ) - - ctx->p = p; -} - -/* Initialize .debug_info section. */ -static void LJ_FASTCALL gdbjit_debuginfo(GDBJITctx *ctx) -{ - uint8_t *p = ctx->p; - - DSECT(info, - DU16(2); /* DWARF version. */ - DU32(0); /* Abbrev offset. */ - DB(sizeof(uintptr_t)); /* Pointer size. */ - - DUV(1); /* Abbrev #1: DW_TAG_compile_unit. */ - DSTR(ctx->filename); /* DW_AT_name. */ - DADDR(ctx->mcaddr); /* DW_AT_low_pc. */ - DADDR(ctx->mcaddr + ctx->szmcode); /* DW_AT_high_pc. */ - DU32(0); /* DW_AT_stmt_list. */ - ) - - ctx->p = p; -} - -/* Initialize .debug_abbrev section. */ -static void LJ_FASTCALL gdbjit_debugabbrev(GDBJITctx *ctx) -{ - uint8_t *p = ctx->p; - - /* Abbrev #1: DW_TAG_compile_unit. */ - DUV(1); DUV(DW_TAG_compile_unit); - DB(DW_children_no); - DUV(DW_AT_name); DUV(DW_FORM_string); - DUV(DW_AT_low_pc); DUV(DW_FORM_addr); - DUV(DW_AT_high_pc); DUV(DW_FORM_addr); - DUV(DW_AT_stmt_list); DUV(DW_FORM_data4); - DB(0); DB(0); - - ctx->p = p; -} - -#define DLNE(op, s) (DB(DW_LNS_extended_op), DUV(1+(s)), DB((op))) - -/* Initialize .debug_line section. */ -static void LJ_FASTCALL gdbjit_debugline(GDBJITctx *ctx) -{ - uint8_t *p = ctx->p; - - DSECT(line, - DU16(2); /* DWARF version. */ - DSECT(header, - DB(1); /* Minimum instruction length. */ - DB(1); /* is_stmt. */ - DI8(0); /* Line base for special opcodes. */ - DB(2); /* Line range for special opcodes. */ - DB(3+1); /* Opcode base at DW_LNS_advance_line+1. */ - DB(0); DB(1); DB(1); /* Standard opcode lengths. */ - /* Directory table. */ - DB(0); - /* File name table. */ - DSTR(ctx->filename); DUV(0); DUV(0); DUV(0); - DB(0); - ) - - DLNE(DW_LNE_set_address, sizeof(uintptr_t)); DADDR(ctx->mcaddr); - if (ctx->lineno) { - DB(DW_LNS_advance_line); DSV(ctx->lineno-1); - } - DB(DW_LNS_copy); - DB(DW_LNS_advance_pc); DUV(ctx->szmcode); - DLNE(DW_LNE_end_sequence, 0); - ) - - ctx->p = p; -} - -#undef DLNE - -/* Undef shortcuts. */ -#undef DB -#undef DI8 -#undef DU16 -#undef DU32 -#undef DADDR -#undef DUV -#undef DSV -#undef DSTR -#undef DALIGNNOP -#undef DSECT - -/* Type of a section initializer callback. */ -typedef void (LJ_FASTCALL *GDBJITinitf)(GDBJITctx *ctx); - -/* Call section initializer and set the section offset and size. */ -static void gdbjit_initsect(GDBJITctx *ctx, int sect, GDBJITinitf initf) -{ - ctx->startp = ctx->p; - ctx->obj.sect[sect].ofs = (uintptr_t)((char *)ctx->p - (char *)&ctx->obj); - initf(ctx); - ctx->obj.sect[sect].size = (uintptr_t)(ctx->p - ctx->startp); -} - -#define SECTALIGN(p, a) \ - ((p) = (uint8_t *)(((uintptr_t)(p) + ((a)-1)) & ~(uintptr_t)((a)-1))) - -/* Build in-memory ELF object. */ -static void gdbjit_buildobj(GDBJITctx *ctx) -{ - GDBJITobj *obj = &ctx->obj; - /* Fill in ELF header and clear structures. */ - memcpy(&obj->hdr, &elfhdr_template, sizeof(ELFheader)); - memset(&obj->sect, 0, sizeof(ELFsectheader)*GDBJIT_SECT__MAX); - memset(&obj->sym, 0, sizeof(ELFsymbol)*GDBJIT_SYM__MAX); - /* Initialize sections. */ - ctx->p = obj->space; - gdbjit_initsect(ctx, GDBJIT_SECT_shstrtab, gdbjit_secthdr); - gdbjit_initsect(ctx, GDBJIT_SECT_strtab, gdbjit_symtab); - gdbjit_initsect(ctx, GDBJIT_SECT_debug_info, gdbjit_debuginfo); - gdbjit_initsect(ctx, GDBJIT_SECT_debug_abbrev, gdbjit_debugabbrev); - gdbjit_initsect(ctx, GDBJIT_SECT_debug_line, gdbjit_debugline); - SECTALIGN(ctx->p, sizeof(uintptr_t)); - gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe); - ctx->objsize = (size_t)((char *)ctx->p - (char *)obj); - lua_assert(ctx->objsize < sizeof(GDBJITobj)); -} - -#undef SECTALIGN - -/* -- Interface to GDB JIT API -------------------------------------------- */ - -static int gdbjit_lock; - -static void gdbjit_lock_acquire() -{ - while (__sync_lock_test_and_set(&gdbjit_lock, 1)) { - /* Just spin; futexes or pthreads aren't worth the portability cost. */ - } -} - -static void gdbjit_lock_release() -{ - __sync_lock_release(&gdbjit_lock); -} - -/* Add new entry to GDB JIT symbol chain. */ -static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) -{ - /* Allocate memory for GDB JIT entry and ELF object. */ - MSize sz = (MSize)(sizeof(GDBJITentryobj) - sizeof(GDBJITobj) + ctx->objsize); - GDBJITentryobj *eo = lj_mem_newt(L, sz, GDBJITentryobj); - memcpy(&eo->obj, &ctx->obj, ctx->objsize); /* Copy ELF object. */ - eo->sz = sz; - ctx->T->gdbjit_entry = (void *)eo; - /* Link new entry to chain and register it. */ - eo->entry.prev_entry = NULL; - gdbjit_lock_acquire(); - eo->entry.next_entry = __jit_debug_descriptor.first_entry; - if (eo->entry.next_entry) - eo->entry.next_entry->prev_entry = &eo->entry; - eo->entry.symfile_addr = (const char *)&eo->obj; - eo->entry.symfile_size = ctx->objsize; - __jit_debug_descriptor.first_entry = &eo->entry; - __jit_debug_descriptor.relevant_entry = &eo->entry; - __jit_debug_descriptor.action_flag = GDBJIT_REGISTER; - __jit_debug_register_code(); - gdbjit_lock_release(); -} - -/* Add debug info for newly compiled trace and notify GDB. */ -void lj_gdbjit_addtrace(jit_State *J, GCtrace *T) -{ - GDBJITctx ctx; - GCproto *pt = &gcref(T->startpt)->pt; - TraceNo parent = T->ir[REF_BASE].op1; - const BCIns *startpc = mref(T->startpc, const BCIns); - ctx.T = T; - ctx.mcaddr = (uintptr_t)T->mcode; - ctx.szmcode = T->szmcode; - ctx.spadjp = CFRAME_SIZE_JIT + - (MSize)(parent ? traceref(J, parent)->spadjust : 0); - ctx.spadj = CFRAME_SIZE_JIT + T->spadjust; - lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); - ctx.lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); - ctx.filename = proto_chunknamestr(pt); - if (*ctx.filename == '@' || *ctx.filename == '=') - ctx.filename++; - else - ctx.filename = "(string)"; - gdbjit_buildobj(&ctx); - gdbjit_newentry(J->L, &ctx); -} - -/* Delete debug info for trace and notify GDB. */ -void lj_gdbjit_deltrace(jit_State *J, GCtrace *T) -{ - GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; - if (eo) { - gdbjit_lock_acquire(); - if (eo->entry.prev_entry) - eo->entry.prev_entry->next_entry = eo->entry.next_entry; - else - __jit_debug_descriptor.first_entry = eo->entry.next_entry; - if (eo->entry.next_entry) - eo->entry.next_entry->prev_entry = eo->entry.prev_entry; - __jit_debug_descriptor.relevant_entry = &eo->entry; - __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; - __jit_debug_register_code(); - gdbjit_lock_release(); - lj_mem_free(J2G(J), eo, eo->sz); - } -} - -#endif -#endif diff --git a/src/lj_gdbjit.h b/src/lj_gdbjit.h index bbaa1568c4..d2c8eb8f6e 100644 --- a/src/lj_gdbjit.h +++ b/src/lj_gdbjit.h @@ -9,14 +9,7 @@ #include "lj_obj.h" #include "lj_jit.h" -#if LJ_HASJIT && defined(LUAJIT_USE_GDBJIT) - -LJ_FUNC void lj_gdbjit_addtrace(jit_State *J, GCtrace *T); -LJ_FUNC void lj_gdbjit_deltrace(jit_State *J, GCtrace *T); - -#else #define lj_gdbjit_addtrace(J, T) UNUSED(T) #define lj_gdbjit_deltrace(J, T) UNUSED(T) -#endif #endif diff --git a/src/lj_ir.c b/src/lj_ir.c index 5baece67e6..bd4ee1692a 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -12,7 +12,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_gc.h" #include "lj_buf.h" @@ -23,11 +22,9 @@ #include "lj_ircall.h" #include "lj_iropt.h" #include "lj_trace.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_cdata.h" #include "lj_carith.h" -#endif #include "lj_vm.h" #include "lj_strscan.h" #include "lj_strfmt.h" @@ -68,51 +65,8 @@ IRCALLDEF(IRCALLCI) /* -- IR emitter ---------------------------------------------------------- */ -/* Grow IR buffer at the top. */ -void LJ_FASTCALL lj_ir_growtop(jit_State *J) -{ - IRIns *baseir = J->irbuf + J->irbotlim; - MSize szins = J->irtoplim - J->irbotlim; - if (szins) { - baseir = (IRIns *)lj_mem_realloc(J->L, baseir, szins*sizeof(IRIns), - 2*szins*sizeof(IRIns)); - J->irtoplim = J->irbotlim + 2*szins; - } else { - baseir = (IRIns *)lj_mem_realloc(J->L, NULL, 0, LJ_MIN_IRSZ*sizeof(IRIns)); - J->irbotlim = REF_BASE - LJ_MIN_IRSZ/4; - J->irtoplim = J->irbotlim + LJ_MIN_IRSZ; - } - J->cur.ir = J->irbuf = baseir - J->irbotlim; -} - -/* Grow IR buffer at the bottom or shift it up. */ -static void lj_ir_growbot(jit_State *J) -{ - IRIns *baseir = J->irbuf + J->irbotlim; - MSize szins = J->irtoplim - J->irbotlim; - lua_assert(szins != 0); - lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim); - if (J->cur.nins + (szins >> 1) < J->irtoplim) { - /* More than half of the buffer is free on top: shift up by a quarter. */ - MSize ofs = szins >> 2; - memmove(baseir + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns)); - J->irbotlim -= ofs; - J->irtoplim -= ofs; - J->cur.ir = J->irbuf = baseir - J->irbotlim; - } else { - /* Double the buffer size, but split the growth amongst top/bottom. */ - IRIns *newbase = lj_mem_newt(J->L, 2*szins*sizeof(IRIns), IRIns); - MSize ofs = szins >= 256 ? 128 : (szins >> 1); /* Limit bottom growth. */ - memcpy(newbase + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns)); - lj_mem_free(G(J->L), baseir, szins*sizeof(IRIns)); - J->irbotlim -= ofs; - J->irtoplim = J->irbotlim + 2*szins; - J->cur.ir = J->irbuf = newbase - J->irbotlim; - } -} - /* Emit IR without any optimizations. */ -TRef LJ_FASTCALL lj_ir_emit(jit_State *J) +TRef lj_ir_emit(jit_State *J) { IRRef ref = lj_ir_nextins(J); IRIns *ir = IR(ref); @@ -164,37 +118,27 @@ LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs) ** comparisons. The same constant must get the same reference. */ -/* Get ref of next IR constant and optionally grow IR. -** Note: this may invalidate all IRIns *! -*/ +/* Get ref of next IR constant. */ static LJ_AINLINE IRRef ir_nextk(jit_State *J) { IRRef ref = J->cur.nk; - if (LJ_UNLIKELY(ref <= J->irbotlim)) lj_ir_growbot(J); J->cur.nk = --ref; return ref; } -/* Get ref of next 64 bit IR constant and optionally grow IR. -** Note: this may invalidate all IRIns *! -*/ +/* Get ref of next 64 bit IR constant. */ static LJ_AINLINE IRRef ir_nextk64(jit_State *J) { IRRef ref = J->cur.nk - 2; lua_assert(J->state != LJ_TRACE_ASM); - if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J); J->cur.nk = ref; return ref; } -#if LJ_GC64 #define ir_nextkgc ir_nextk64 -#else -#define ir_nextkgc ir_nextk -#endif /* Intern int32_t constant. */ -TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) +TRef lj_ir_kint(jit_State *J, int32_t k) { IRIns *ir, *cir = J->cur.ir; IRRef ref; @@ -285,7 +229,7 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t) ir = IR(ref); /* NOBARRIER: Current trace is a GC root. */ ir->op12 = 0; - setgcref(ir[LJ_GC64].gcr, o); + setgcref(ir[1].gcr, o); ir->t.irt = (uint8_t)t; ir->o = IR_KGC; ir->prev = J->chain[IR_KGC]; @@ -301,7 +245,7 @@ TRef lj_ir_ktrace(jit_State *J) IRIns *ir = IR(ref); lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE); ir->t.irt = IRT_P64; - ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */ + ir->o = IR_KNUM; /* Not IR_KGC yet, but same size. */ ir->op12 = 0; ir->prev = 0; return TREF(ref, IRT_P64); @@ -312,20 +256,13 @@ TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) { IRIns *ir, *cir = J->cur.ir; IRRef ref; -#if LJ_64 && !LJ_GC64 - lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr); -#endif for (ref = J->chain[op]; ref; ref = cir[ref].prev) if (ir_kptr(&cir[ref]) == ptr) goto found; -#if LJ_GC64 ref = ir_nextk64(J); -#else - ref = ir_nextk(J); -#endif ir = IR(ref); ir->op12 = 0; - setmref(ir[LJ_GC64].ptr, ptr); + setmref(ir[1].ptr, ptr); ir->t.irt = IRT_PGC; ir->o = op; ir->prev = J->chain[op]; @@ -389,14 +326,12 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) case IR_KPTR: case IR_KKPTR: setlightudV(tv, ir_kptr(ir)); break; case IR_KNULL: setlightudV(tv, NULL); break; case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; -#if LJ_HASFFI case IR_KINT64: { GCcdata *cd = lj_cdata_new_(L, CTID_INT64, 8); *(uint64_t *)cdataptr(cd) = ir_kint64(ir)->u64; setcdataV(L, tv, cd); break; } -#endif default: lua_assert(0); break; } } @@ -404,7 +339,7 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) /* -- Convert IR operand types -------------------------------------------- */ /* Convert from string to number. */ -TRef LJ_FASTCALL lj_ir_tonumber(jit_State *J, TRef tr) +TRef lj_ir_tonumber(jit_State *J, TRef tr) { if (!tref_isnumber(tr)) { if (tref_isstr(tr)) @@ -416,7 +351,7 @@ TRef LJ_FASTCALL lj_ir_tonumber(jit_State *J, TRef tr) } /* Convert from integer or string to number. */ -TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr) +TRef lj_ir_tonum(jit_State *J, TRef tr) { if (!tref_isnum(tr)) { if (tref_isinteger(tr)) @@ -430,7 +365,7 @@ TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr) } /* Convert from integer or number to string. */ -TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr) +TRef lj_ir_tostr(jit_State *J, TRef tr) { if (!tref_isstr(tr)) { if (!tref_isnumber(tr)) @@ -491,4 +426,3 @@ void lj_ir_rollback(jit_State *J, IRRef ref) #undef fins #undef emitir -#endif diff --git a/src/lj_ir.h b/src/lj_ir.h index 34c2785394..467fb0b9ca 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -40,7 +40,6 @@ _(USE, S , ref, ___) \ _(PHI, S , ref, ref) \ _(RENAME, S , ref, lit) \ - _(PROF, S , ___, ___) \ \ /* Constants. */ \ _(KPRI, N , ___, ___) \ @@ -294,7 +293,7 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; /* -- IR instruction types ------------------------------------------------ */ -#define IRTSIZE_PGC (LJ_GC64 ? 8 : 4) +#define IRTSIZE_PGC 8 /* Map of itypes to non-negative numbers and their sizes. ORDER LJ_T. ** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for @@ -304,7 +303,7 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; ** contiguous and next to IRT_NUM (see the typerange macros below). */ #define IRTDEF(_) \ - _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) \ + _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, 8) \ _(STR, IRTSIZE_PGC) _(P32, 4) _(THREAD, IRTSIZE_PGC) _(PROTO, IRTSIZE_PGC) \ _(FUNC, IRTSIZE_PGC) _(P64, 8) _(CDATA, IRTSIZE_PGC) _(TAB, IRTSIZE_PGC) \ _(UDATA, IRTSIZE_PGC) \ @@ -320,11 +319,11 @@ IRTDEF(IRTENUM) IRT__MAX, /* Native pointer type and the corresponding integer type. */ - IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32, - IRT_PGC = LJ_GC64 ? IRT_P64 : IRT_P32, - IRT_IGC = LJ_GC64 ? IRT_I64 : IRT_INT, - IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT, - IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32, + IRT_PTR = IRT_P64, + IRT_PGC = IRT_P64, + IRT_IGC = IRT_I64, + IRT_INTP = IRT_I64, + IRT_UINTP = IRT_U64, /* Additional flags. */ IRT_MARK = 0x20, /* Marker for misc. purposes. */ @@ -376,18 +375,12 @@ typedef struct IRType1 { uint8_t irt; } IRType1; #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) -#if LJ_GC64 +/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */ #define IRT_IS64 \ ((1u<> irt_type(t)) & 1) #define irt_is64orfp(t) (((IRT_IS64|(1u<>irt_type(t)) & 1) @@ -398,27 +391,16 @@ LJ_DATA const uint8_t lj_ir_type_size[]; static LJ_AINLINE IRType itype2irt(const TValue *tv) { - if (tvisint(tv)) - return IRT_INT; - else if (tvisnum(tv)) + if (tvisnum(tv)) return IRT_NUM; -#if LJ_64 && !LJ_GC64 - else if (tvislightud(tv)) - return IRT_LIGHTUD; -#endif else return (IRType)~itype(tv); } static LJ_AINLINE uint32_t irt_toitype_(IRType t) { - lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD); - if (LJ_DUALNUM && t > IRT_NUM) { - return LJ_TISNUM; - } else { - lua_assert(t <= IRT_NUM); - return ~(uint32_t)t; - } + lua_assert(t <= IRT_NUM); + return ~(uint32_t)t; } #define irt_toitype(t) irt_toitype_(irt_type((t))) @@ -560,22 +542,21 @@ typedef union IRIns { TValue tv; /* TValue constant (overlaps entire slot). */ } IRIns; -#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr)) +#define ir_isk64(ir) ((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ + (ir)->o == IR_KGC || \ + (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR) + +#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[1].gcr)) #define ir_kstr(ir) (gco2str(ir_kgc((ir)))) #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) #define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) #define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv) #define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv) -#define ir_k64(ir) \ - check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ - (LJ_GC64 && \ - ((ir)->o == IR_KGC || \ - (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)), \ - &(ir)[1].tv) +#define ir_k64(ir) check_exp(ir_isk64(ir), &(ir)[1].tv) #define ir_kptr(ir) \ check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \ - mref((ir)[LJ_GC64].ptr, void)) + mref((ir)[1].ptr, void)) /* A store or any other op with a non-weak guard has a side-effect. */ static LJ_AINLINE int ir_sideeff(IRIns *ir) diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 973c36e6ec..45b9268158 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -51,74 +51,34 @@ typedef struct CCallInfo { #define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3) #define CCI_XA (1u << CCI_XARGS_SHIFT) -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) -#define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci))) -#else #define CCI_XNARGS(ci) CCI_NARGS((ci)) -#endif /* Helpers for conditional function definitions. */ #define IRCALLCOND_ANY(x) x -#if LJ_TARGET_X86ORX64 #define IRCALLCOND_FPMATH(x) NULL -#else -#define IRCALLCOND_FPMATH(x) x -#endif -#if LJ_SOFTFP -#define IRCALLCOND_SOFTFP(x) x -#if LJ_HASFFI -#define IRCALLCOND_SOFTFP_FFI(x) x -#else -#define IRCALLCOND_SOFTFP_FFI(x) NULL -#endif -#else #define IRCALLCOND_SOFTFP(x) NULL #define IRCALLCOND_SOFTFP_FFI(x) NULL -#endif -#if LJ_SOFTFP && LJ_TARGET_MIPS32 -#define IRCALLCOND_SOFTFP_MIPS(x) x -#else #define IRCALLCOND_SOFTFP_MIPS(x) NULL -#endif #define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS32) -#if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) +#if (LJ_SOFTFP || LJ_NEED_FP64) #define IRCALLCOND_FP64_FFI(x) x #else #define IRCALLCOND_FP64_FFI(x) NULL #endif -#if LJ_HASFFI #define IRCALLCOND_FFI(x) x -#if LJ_32 -#define IRCALLCOND_FFI32(x) x -#else #define IRCALLCOND_FFI32(x) NULL -#endif -#else -#define IRCALLCOND_FFI(x) NULL -#define IRCALLCOND_FFI32(x) NULL -#endif -#if LJ_SOFTFP -#define XA_FP CCI_XA -#define XA2_FP (CCI_XA+CCI_XA) -#else #define XA_FP 0 #define XA2_FP 0 -#endif -#if LJ_32 -#define XA_64 CCI_XA -#define XA2_64 (CCI_XA+CCI_XA) -#else #define XA_64 0 #define XA2_64 0 -#endif /* Function definitions for CALL* instructions. */ #define IRCALLDEF(_) \ @@ -240,81 +200,8 @@ LJ_FUNC TRef lj_ir_call(jit_State *J, IRCallID id, ...); LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; /* Soft-float declarations. */ -#if LJ_SOFTFP -#if LJ_TARGET_ARM -#define softfp_add __aeabi_dadd -#define softfp_sub __aeabi_dsub -#define softfp_mul __aeabi_dmul -#define softfp_div __aeabi_ddiv -#define softfp_cmp __aeabi_cdcmple -#define softfp_i2d __aeabi_i2d -#define softfp_d2i __aeabi_d2iz -#define softfp_ui2d __aeabi_ui2d -#define softfp_f2d __aeabi_f2d -#define softfp_d2ui __aeabi_d2uiz -#define softfp_d2f __aeabi_d2f -#define softfp_i2f __aeabi_i2f -#define softfp_ui2f __aeabi_ui2f -#define softfp_f2i __aeabi_f2iz -#define softfp_f2ui __aeabi_f2uiz -#define fp64_l2d __aeabi_l2d -#define fp64_ul2d __aeabi_ul2d -#define fp64_l2f __aeabi_l2f -#define fp64_ul2f __aeabi_ul2f -#if LJ_TARGET_IOS -#define fp64_d2l __fixdfdi -#define fp64_d2ul __fixunsdfdi -#define fp64_f2l __fixsfdi -#define fp64_f2ul __fixunssfdi -#else -#define fp64_d2l __aeabi_d2lz -#define fp64_d2ul __aeabi_d2ulz -#define fp64_f2l __aeabi_f2lz -#define fp64_f2ul __aeabi_f2ulz -#endif -#elif LJ_TARGET_MIPS -#define softfp_add __adddf3 -#define softfp_sub __subdf3 -#define softfp_mul __muldf3 -#define softfp_div __divdf3 -#define softfp_cmp __ledf2 -#define softfp_i2d __floatsidf -#define softfp_d2i __fixdfsi -#define softfp_ui2d __floatunsidf -#define softfp_f2d __extendsfdf2 -#define softfp_d2ui __fixunsdfsi -#define softfp_d2f __truncdfsf2 -#define softfp_i2f __floatsisf -#define softfp_ui2f __floatunsisf -#define softfp_f2i __fixsfsi -#define softfp_f2ui __fixunssfsi -#else -#error "Missing soft-float definitions for target architecture" -#endif -extern double softfp_add(double a, double b); -extern double softfp_sub(double a, double b); -extern double softfp_mul(double a, double b); -extern double softfp_div(double a, double b); -extern void softfp_cmp(double a, double b); -extern double softfp_i2d(int32_t a); -extern int32_t softfp_d2i(double a); -#if LJ_HASFFI -extern double softfp_ui2d(uint32_t a); -extern double softfp_f2d(float a); -extern uint32_t softfp_d2ui(double a); -extern float softfp_d2f(double a); -extern float softfp_i2f(int32_t a); -extern float softfp_ui2f(uint32_t a); -extern int32_t softfp_f2i(float a); -extern uint32_t softfp_f2ui(float a); -#endif -#if LJ_TARGET_MIPS -extern double lj_vm_sfmin(double a, double b); -extern double lj_vm_sfmax(double a, double b); -#endif -#endif -#if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) +#if LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) #ifdef __GNUC__ #define fp64_l2d __floatdidf #define fp64_ul2d __floatundidf @@ -329,7 +216,7 @@ extern double lj_vm_sfmax(double a, double b); #endif #endif -#if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) +#if (LJ_SOFTFP || LJ_NEED_FP64) extern double fp64_l2d(int64_t a); extern double fp64_ul2d(uint64_t a); extern float fp64_l2f(int64_t a); diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 73aef0ef38..1305541607 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -11,10 +11,9 @@ #include "lj_obj.h" #include "lj_jit.h" -#if LJ_HASJIT /* IR emitter. */ -LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J); +LJ_FUNC void lj_ir_growtop(jit_State *J); +LJ_FUNC TRef lj_ir_emit(jit_State *J); /* Save current IR in J->fold.ins, but do not emit it (yet). */ static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b) @@ -25,13 +24,10 @@ static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b) #define lj_ir_set(J, ot, a, b) \ lj_ir_set_(J, (uint16_t)(ot), (IRRef1)(a), (IRRef1)(b)) -/* Get ref of next IR instruction and optionally grow IR. -** Note: this may invalidate all IRIns*! -*/ +/* Get ref of next IR instruction. */ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J) { IRRef ref = J->cur.nins; - if (LJ_UNLIKELY(ref >= J->irtoplim)) lj_ir_growtop(J); J->cur.nins = ref + 1; return ref; } @@ -39,7 +35,7 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J) LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs); /* Interning of constants. */ -LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); +LJ_FUNC TRef lj_ir_kint(jit_State *J, int32_t k); LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64); LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); @@ -50,11 +46,7 @@ LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); LJ_FUNC TRef lj_ir_ktrace(jit_State *J); -#if LJ_64 #define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k)) -#else -#define lj_ir_kintp(J, k) lj_ir_kint(J, (int32_t)(k)) -#endif static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) { @@ -82,9 +74,9 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); /* Convert IR operand types. */ -LJ_FUNC TRef LJ_FASTCALL lj_ir_tonumber(jit_State *J, TRef tr); -LJ_FUNC TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr); -LJ_FUNC TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr); +LJ_FUNC TRef lj_ir_tonumber(jit_State *J, TRef tr); +LJ_FUNC TRef lj_ir_tonum(jit_State *J, TRef tr); +LJ_FUNC TRef lj_ir_tostr(jit_State *J, TRef tr); /* Miscellaneous IR ops. */ LJ_FUNC int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op); @@ -92,9 +84,9 @@ LJ_FUNC int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op); LJ_FUNC void lj_ir_rollback(jit_State *J, IRRef ref); /* Emit IR instructions with on-the-fly optimizations. */ -LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim); +LJ_FUNC TRef lj_opt_fold(jit_State *J); +LJ_FUNC TRef lj_opt_cse(jit_State *J); +LJ_FUNC TRef lj_opt_cselim(jit_State *J, IRRef lim); /* Special return values for the fold functions. */ enum { @@ -115,31 +107,29 @@ enum { #define EMITFOLD (lj_ir_emit(J)) /* Load/store forwarding. */ -LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J); -LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); -LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); +LJ_FUNC TRef lj_opt_fwd_aload(jit_State *J); +LJ_FUNC TRef lj_opt_fwd_hload(jit_State *J); +LJ_FUNC TRef lj_opt_fwd_uload(jit_State *J); +LJ_FUNC TRef lj_opt_fwd_fload(jit_State *J); +LJ_FUNC TRef lj_opt_fwd_xload(jit_State *J); +LJ_FUNC TRef lj_opt_fwd_tab_len(jit_State *J); +LJ_FUNC TRef lj_opt_fwd_hrefk(jit_State *J); +LJ_FUNC int lj_opt_fwd_href_nokey(jit_State *J); +LJ_FUNC int lj_opt_fwd_tptr(jit_State *J, IRRef lim); LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); /* Dead-store elimination. */ -LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J); +LJ_FUNC TRef lj_opt_dse_ahstore(jit_State *J); +LJ_FUNC TRef lj_opt_dse_ustore(jit_State *J); +LJ_FUNC TRef lj_opt_dse_fstore(jit_State *J); +LJ_FUNC TRef lj_opt_dse_xstore(jit_State *J); /* Narrowing. */ -LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef key); -LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr); -LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr); -#if LJ_HASFFI -LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef key); -#endif +LJ_FUNC TRef lj_opt_narrow_convert(jit_State *J); +LJ_FUNC TRef lj_opt_narrow_index(jit_State *J, TRef key); +LJ_FUNC TRef lj_opt_narrow_toint(jit_State *J, TRef tr); +LJ_FUNC TRef lj_opt_narrow_tobit(jit_State *J, TRef tr); +LJ_FUNC TRef lj_opt_narrow_cindex(jit_State *J, TRef key); LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc, IROp op); LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc); @@ -150,13 +140,8 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); /* Optimization passes. */ LJ_FUNC void lj_opt_dce(jit_State *J); LJ_FUNC int lj_opt_loop(jit_State *J); -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) -LJ_FUNC void lj_opt_split(jit_State *J); -#else #define lj_opt_split(J) UNUSED(J) -#endif LJ_FUNC void lj_opt_sink(jit_State *J); -#endif #endif diff --git a/src/lj_jit.h b/src/lj_jit.h index 92054e3df6..6be05e9e63 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -13,7 +13,6 @@ #define JIT_F_ON 0x00000001 /* CPU-specific JIT engine flags. */ -#if LJ_TARGET_X86ORX64 #define JIT_F_SSE2 0x00000010 #define JIT_F_SSE3 0x00000020 #define JIT_F_SSE4_1 0x00000040 @@ -24,41 +23,6 @@ /* Names for the CPU-specific flags. Must match the order above. */ #define JIT_F_CPU_FIRST JIT_F_SSE2 #define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2" -#elif LJ_TARGET_ARM -#define JIT_F_ARMV6_ 0x00000010 -#define JIT_F_ARMV6T2_ 0x00000020 -#define JIT_F_ARMV7 0x00000040 -#define JIT_F_VFPV2 0x00000080 -#define JIT_F_VFPV3 0x00000100 - -#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) -#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) -#define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) - -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_ARMV6_ -#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3" -#elif LJ_TARGET_PPC -#define JIT_F_SQRT 0x00000010 -#define JIT_F_ROUND 0x00000020 - -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_SQRT -#define JIT_F_CPUSTRING "\4SQRT\5ROUND" -#elif LJ_TARGET_MIPS -#define JIT_F_MIPSXXR2 0x00000010 - -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 -#if LJ_TARGET_MIPS32 -#define JIT_F_CPUSTRING "\010MIPS32R2" -#else -#define JIT_F_CPUSTRING "\010MIPS64R2" -#endif -#else -#define JIT_F_CPU_FIRST 0 -#define JIT_F_CPUSTRING "" -#endif /* Optimization flags. */ #define JIT_F_OPT_MASK 0x0fff0000 @@ -87,17 +51,12 @@ JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 -#if LJ_TARGET_WINDOWS || LJ_64 /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ #define JIT_P_sizemcode_DEFAULT 64 -#else -/* Could go as low as 4K, but the mmap() overhead would be rather high. */ -#define JIT_P_sizemcode_DEFAULT 32 -#endif /* Optimization parameters and their defaults. Length is a char in octal! */ #define JIT_PARAMDEF(_) \ - _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \ + _(\010, maxtrace, 10000) /* Max. # of traces in cache. */ \ _(\011, maxrecord, 4000) /* Max. # of recorded IR instructions. */ \ _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ @@ -152,11 +111,7 @@ typedef enum { } PostProc; /* Machine code type. */ -#if LJ_TARGET_X86ORX64 typedef uint8_t MCode; -#else -typedef uint32_t MCode; -#endif /* Stack snapshot header. */ typedef struct SnapShot { @@ -183,9 +138,6 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) #define SNAP_TR(slot, tr) \ (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) -#if !LJ_FR2 -#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) -#endif #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) #define snap_ref(sn) ((sn) & 0xffff) #define snap_slot(sn) ((BCReg)((sn) >> 24)) @@ -194,13 +146,9 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn) { -#if LJ_FR2 uint64_t pcbase; memcpy(&pcbase, sn, sizeof(uint64_t)); return (const BCIns *)(pcbase >> 8); -#else - return (const BCIns *)(uintptr_t)*sn; -#endif } /* Snapshot and exit numbers. */ @@ -230,9 +178,7 @@ typedef struct GCtrace { uint8_t topslot; /* Top stack slot already checked to be allocated. */ uint8_t linktype; /* Type of link. */ IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ -#if LJ_GC64 uint32_t unused_gc64; -#endif GCRef gclist; IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ @@ -246,6 +192,8 @@ typedef struct GCtrace { MSize szmcode; /* Size of machine code. */ MCode *mcode; /* Start of machine code. */ MSize mcloop; /* Offset of loop start in machine code. */ + uint16_t nszirmcode; /* Number of elements in szirmcode array. */ + uint16_t *szirmcode; /* Bytes of mcode for each IR instruction (array.) */ uint16_t nchild; /* Number of child traces (root trace only). */ uint16_t spadjust; /* Stack pointer adjustment (offset in bytes). */ TraceNo1 traceno; /* Trace number. */ @@ -253,16 +201,17 @@ typedef struct GCtrace { TraceNo1 root; /* Root trace of side trace (or 0 for root traces). */ TraceNo1 nextroot; /* Next root trace for same prototype. */ TraceNo1 nextside; /* Next side trace of same root trace. */ + TraceNo1 parent; /* Parent of this trace (or 0 for root traces). */ + ExitNo exitno; /* Exit number in parent (valid for side-traces only). */ uint8_t sinktags; /* Trace has SINK tags. */ uint8_t unused1; -#ifdef LUAJIT_USE_GDBJIT - void *gdbjit_entry; /* GDB JIT entry. */ -#endif } GCtrace; +#define TRACE_MAX 65535 + #define gco2trace(o) check_exp((o)->gch.gct == ~LJ_TTRACE, (GCtrace *)(o)) #define traceref(J, n) \ - check_exp((n)>0 && (MSize)(n)sizetrace, (GCtrace *)gcref(J->trace[(n)])) + check_exp((n)>0 && (MSize)(n)trace[(n)])) LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCtrace, gclist)); @@ -277,13 +226,13 @@ static LJ_AINLINE MSize snap_nextofs(GCtrace *T, SnapShot *snap) /* Round-robin penalty cache for bytecodes leading to aborted traces. */ typedef struct HotPenalty { MRef pc; /* Starting bytecode PC. */ - uint16_t val; /* Penalty value, i.e. hotcount start. */ + uint32_t val; /* Penalty value, i.e. hotcount start. */ uint16_t reason; /* Abort reason (really TraceErr). */ } HotPenalty; #define PENALTY_SLOTS 64 /* Penalty cache slot. Must be a power of 2. */ #define PENALTY_MIN (36*2) /* Minimum penalty value. */ -#define PENALTY_MAX 60000 /* Maximum penalty value. */ +#define PENALTY_MAX 6000000 /* Maximum penalty value. */ #define PENALTY_RNDBITS 4 /* # of random bits to add to penalty value. */ /* Round-robin backpropagation cache for narrowing conversions. */ @@ -325,41 +274,15 @@ enum { }; enum { -#if LJ_TARGET_X86ORX64 LJ_K64_TOBIT, /* 2^52 + 2^51 */ LJ_K64_2P64, /* 2^64 */ LJ_K64_M2P64, /* -2^64 */ -#if LJ_32 - LJ_K64_M2P64_31, /* -2^64 or -2^31 */ -#else LJ_K64_M2P64_31 = LJ_K64_M2P64, -#endif -#endif -#if LJ_TARGET_MIPS - LJ_K64_2P31, /* 2^31 */ -#if LJ_64 - LJ_K64_2P63, /* 2^63 */ - LJ_K64_M2P64, /* -2^64 */ -#endif -#endif LJ_K64__MAX, }; enum { -#if LJ_TARGET_X86ORX64 LJ_K32_M2P64_31, /* -2^64 or -2^31 */ -#endif -#if LJ_TARGET_PPC - LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ - LJ_K32_2P52, /* 2^52 */ -#endif -#if LJ_TARGET_PPC || LJ_TARGET_MIPS - LJ_K32_2P31, /* 2^31 */ -#endif -#if LJ_TARGET_MIPS64 - LJ_K32_2P63, /* 2^63 */ - LJ_K32_M2P64, /* -2^64 */ -#endif LJ_K32__MAX }; @@ -368,13 +291,8 @@ enum { ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) /* Set/reset flag to activate the SPLIT pass for the current trace. */ -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) -#define lj_needsplit(J) (J->needsplit = 1) -#define lj_resetsplit(J) (J->needsplit = 0) -#else #define lj_needsplit(J) UNUSED(J) #define lj_resetsplit(J) UNUSED(J) -#endif /* Fold state is used to fold instructions on-the-fly. */ typedef struct FoldState { @@ -383,6 +301,13 @@ typedef struct FoldState { IRIns right[2]; /* Instruction referenced by right operand. */ } FoldState; +/* Log entry for a bytecode that was recorded. */ +typedef struct BCRecLog { + GCproto *pt; /* Prototype of bytecode function (or NULL). */ + BCPos pos; /* Position of bytecode in prototype. */ + int32_t framedepth; /* Frame depth when recorded. */ +} BCRecLog; + /* JIT compiler state. */ typedef struct jit_State { GCtrace cur; /* Current trace. */ @@ -421,8 +346,6 @@ typedef struct jit_State { uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */ IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ - IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ - IRRef irbotlim; /* Lower limit of instuction buffer (biased). */ IRRef loopref; /* Last loop reference or ref of final LOOP (or 0). */ MSize sizesnap; /* Size of temp. snapshot buffer. */ @@ -430,15 +353,16 @@ typedef struct jit_State { SnapEntry *snapmapbuf; /* Temp. snapshot map buffer. */ MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ + BCRecLog *bclog; /* Start of of recorded bytecode log. */ + uint32_t nbclog; /* Number of logged bytecodes. */ + uint32_t maxbclog; /* Max entries in the bytecode log. */ + PostProc postproc; /* Required post-processing after execution. */ -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) - uint8_t needsplit; /* Need SPLIT pass. */ -#endif uint8_t retryrec; /* Retry recording. */ GCRef *trace; /* Array of traces. */ TraceNo freetrace; /* Start of scan for next free trace. */ - MSize sizetrace; /* Size of trace array. */ + uint16_t ntraces; /* Number of traces created since last flush. */ IRRef1 ktrace; /* Reference to KGC with GCtrace. */ IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ @@ -476,16 +400,8 @@ typedef struct jit_State { size_t szallmcarea; /* Total size of all allocated mcode areas. */ TValue errinfo; /* Additional info element for trace errors. */ - -#if LJ_HASPROFILE - GCproto *prev_pt; /* Previous prototype. */ - BCLine prev_line; /* Previous line. */ - int prof_mode; /* Profiling mode: 0, 'f', 'l'. */ -#endif + int8_t final; /* True if trace error is final. */ } -#if LJ_TARGET_ARM -LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ -#endif jit_State; /* Trivial PRNG e.g. used for penalty randomization. */ diff --git a/src/lj_lex.c b/src/lj_lex.c index 2d2f8194cf..ca618435d6 100644 --- a/src/lj_lex.c +++ b/src/lj_lex.c @@ -14,18 +14,17 @@ #include "lj_err.h" #include "lj_buf.h" #include "lj_str.h" -#if LJ_HASFFI #include "lj_tab.h" #include "lj_ctype.h" #include "lj_cdata.h" #include "lualib.h" -#endif #include "lj_state.h" #include "lj_lex.h" #include "lj_parse.h" #include "lj_char.h" #include "lj_strscan.h" #include "lj_strfmt.h" +#include "lj_auditlog.h" /* Lua lexer token names. */ static const char *const tokennames[] = { @@ -48,6 +47,7 @@ static LJ_NOINLINE LexChar lex_more(LexState *ls) size_t sz; const char *p = ls->rfunc(ls->L, ls->rdata, &sz); if (p == NULL || sz == 0) return LEX_EOF; + lj_auditlog_lex(ls->chunkarg, p, sz); ls->pe = p + sz; ls->p = p + 1; return (LexChar)(uint8_t)p[0]; @@ -56,7 +56,9 @@ static LJ_NOINLINE LexChar lex_more(LexState *ls) /* Get next character. */ static LJ_AINLINE LexChar lex_next(LexState *ls) { - return (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls)); + LexChar c = (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls)); + if (ls->log && ls->log < ls->logend) *ls->log++ = c; + return c; } /* Save character. */ @@ -100,13 +102,9 @@ static void lex_number(LexState *ls, TValue *tv) } lex_save(ls, '\0'); fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv, - (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | - (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); - if (LJ_DUALNUM && fmt == STRSCAN_INT) { - setitype(tv, LJ_TISNUM); - } else if (fmt == STRSCAN_NUM) { + STRSCAN_OPT_TONUM|STRSCAN_OPT_LL|STRSCAN_OPT_IMAG); + if (fmt == STRSCAN_NUM) { /* Already in correct format. */ -#if LJ_HASFFI } else if (fmt != STRSCAN_ERROR) { lua_State *L = ls->L; GCcdata *cd; @@ -125,7 +123,6 @@ static void lex_number(LexState *ls, TValue *tv) *(uint64_t *)cdataptr(cd) = tv->u64; } lj_parse_keepcdata(ls, tv, cd); -#endif } else { lua_assert(fmt == STRSCAN_ERROR); lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); @@ -406,6 +403,8 @@ int lj_lex_setup(lua_State *L, LexState *ls) ls->lookahead = TK_eof; /* No look-ahead token. */ ls->linenumber = 1; ls->lastline = 1; + ls->log = NULL; + ls->logend = NULL; lex_next(ls); /* Read-ahead first char. */ if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb && (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ @@ -496,6 +495,20 @@ void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...) va_end(argp); } +/* Log the next input characters to a bounded buffer. */ +void lj_lex_log(LexState *ls, char *log, int size) +{ + ls->log = log; + ls->logend = log + size-1; +} + +/* Stop logging input characters. */ +void lj_lex_endlog(LexState *ls) +{ + ls->log = NULL; + ls->logend = NULL; +} + /* Initialize strings for reserved words. */ void lj_lex_init(lua_State *L) { diff --git a/src/lj_lex.h b/src/lj_lex.h index 33fa865726..5d6e7d9eb7 100644 --- a/src/lj_lex.h +++ b/src/lj_lex.h @@ -73,6 +73,8 @@ typedef struct LexState { BCInsLine *bcstack; /* Stack for bytecode instructions/line numbers. */ MSize sizebcstack; /* Size of bytecode stack. */ uint32_t level; /* Syntactical nesting level. */ + char *log; /* Current position where input should be logged. */ + char *logend; /* Last position where input can be logged. */ } LexState; LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls); @@ -80,6 +82,8 @@ LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); LJ_FUNC void lj_lex_next(LexState *ls); LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok); +LJ_FUNC void lj_lex_log(LexState *ls, char *log, int size); +LJ_FUNC void lj_lex_endlog(LexState *ls); LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...); LJ_FUNC void lj_lex_init(lua_State *L); diff --git a/src/lj_lib.c b/src/lj_lib.c index b8638de6a0..6ef589ab6c 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c @@ -58,7 +58,7 @@ static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab) ls.p = (const char *)(p+len); ls.pe = (const char *)~(uintptr_t)0; ls.c = -1; - ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE)); + ls.level = BCDUMP_F_STRIP; ls.chunkname = name; pt = lj_bcread_proto(&ls); pt->firstline = ~(BCLine)0; @@ -211,14 +211,6 @@ GCstr *lj_lib_optstr(lua_State *L, int narg) return (o < L->top && !tvisnil(o)) ? lj_lib_checkstr(L, narg) : NULL; } -#if LJ_DUALNUM -void lj_lib_checknumber(lua_State *L, int narg) -{ - TValue *o = L->base + narg-1; - if (!(o < L->top && lj_strscan_numberobj(o))) - lj_err_argt(L, narg, LUA_TNUMBER); -} -#endif lua_Number lj_lib_checknum(lua_State *L, int narg) { @@ -226,13 +218,7 @@ lua_Number lj_lib_checknum(lua_State *L, int narg) if (!(o < L->top && (tvisnumber(o) || (tvisstr(o) && lj_strscan_num(strV(o), o))))) lj_err_argt(L, narg, LUA_TNUMBER); - if (LJ_UNLIKELY(tvisint(o))) { - lua_Number n = (lua_Number)intV(o); - setnumV(o, n); - return n; - } else { - return numV(o); - } + return numV(o); } int32_t lj_lib_checkint(lua_State *L, int narg) @@ -240,13 +226,8 @@ int32_t lj_lib_checkint(lua_State *L, int narg) TValue *o = L->base + narg-1; if (!(o < L->top && lj_strscan_numberobj(o))) lj_err_argt(L, narg, LUA_TNUMBER); - if (LJ_LIKELY(tvisint(o))) { - return intV(o); - } else { - int32_t i = lj_num2int(numV(o)); - if (LJ_DUALNUM) setintV(o, i); - return i; - } + int32_t i = lj_num2int(numV(o)); + return i; } int32_t lj_lib_optint(lua_State *L, int narg, int32_t def) diff --git a/src/lj_lib.h b/src/lj_lib.h index 37ec9d7800..6cc8d235f1 100644 --- a/src/lj_lib.h +++ b/src/lj_lib.h @@ -33,11 +33,7 @@ LJ_FUNC TValue *lj_lib_checkany(lua_State *L, int narg); LJ_FUNC GCstr *lj_lib_checkstr(lua_State *L, int narg); LJ_FUNC GCstr *lj_lib_optstr(lua_State *L, int narg); -#if LJ_DUALNUM -LJ_FUNC void lj_lib_checknumber(lua_State *L, int narg); -#else #define lj_lib_checknumber(L, narg) lj_lib_checknum((L), (narg)) -#endif LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); @@ -47,25 +43,10 @@ LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); /* Avoid including lj_frame.h. */ -#if LJ_GC64 #define lj_lib_upvalue(L, n) \ (&gcval(L->base-2)->fn.c.upvalue[(n)-1]) -#elif LJ_FR2 -#define lj_lib_upvalue(L, n) \ - (&gcref((L->base-2)->gcr)->fn.c.upvalue[(n)-1]) -#else -#define lj_lib_upvalue(L, n) \ - (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1]) -#endif -#if LJ_TARGET_WINDOWS -#define lj_lib_checkfpu(L) \ - do { setnumV(L->top++, (lua_Number)1437217655); \ - if (lua_tointeger(L, -1) != 1437217655) lj_err_caller(L, LJ_ERR_BADFPU); \ - L->top--; } while (0) -#else #define lj_lib_checkfpu(L) UNUSED(L) -#endif LJ_FUNC GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n); #define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0)) @@ -110,6 +91,6 @@ LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, /* Exported library functions. */ typedef struct RandomState RandomState; -LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs); +LJ_FUNC uint64_t lj_math_random_step(RandomState *rs); #endif diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 77035bf72a..4d6e492189 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -7,30 +7,22 @@ #define LUA_CORE #include "lj_obj.h" -#if LJ_HASJIT #include "lj_gc.h" #include "lj_err.h" #include "lj_jit.h" #include "lj_mcode.h" #include "lj_trace.h" #include "lj_dispatch.h" -#endif -#if LJ_HASJIT || LJ_HASFFI #include "lj_vm.h" -#endif /* -- OS-specific functions ----------------------------------------------- */ -#if LJ_HASJIT || LJ_HASFFI /* Define this if you want to run LuaJIT with Valgrind. */ #ifdef LUAJIT_USE_VALGRIND #include #endif -#if LJ_TARGET_IOS -void sys_icache_invalidate(void *start, size_t len); -#endif /* Synchronize data/instruction cache. */ void lj_mcode_sync(void *start, void *end) @@ -38,54 +30,11 @@ void lj_mcode_sync(void *start, void *end) #ifdef LUAJIT_USE_VALGRIND VALGRIND_DISCARD_TRANSLATIONS(start, (char *)end-(char *)start); #endif -#if LJ_TARGET_X86ORX64 UNUSED(start); UNUSED(end); -#elif LJ_TARGET_IOS - sys_icache_invalidate(start, (char *)end-(char *)start); -#elif LJ_TARGET_PPC - lj_vm_cachesync(start, end); -#elif defined(__GNUC__) - __clear_cache(start, end); -#else -#error "Missing builtin to flush instruction cache" -#endif } -#endif - -#if LJ_HASJIT - -#if LJ_TARGET_WINDOWS -#define WIN32_LEAN_AND_MEAN -#include -#define MCPROT_RW PAGE_READWRITE -#define MCPROT_RX PAGE_EXECUTE_READ -#define MCPROT_RWX PAGE_EXECUTE_READWRITE - -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) -{ - void *p = VirtualAlloc((void *)hint, sz, - MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); - if (!p && !hint) - lj_trace_err(J, LJ_TRERR_MCODEAL); - return p; -} - -static void mcode_free(jit_State *J, void *p, size_t sz) -{ - UNUSED(J); UNUSED(sz); - VirtualFree(p, 0, MEM_RELEASE); -} - -static int mcode_setprot(void *p, size_t sz, DWORD prot) -{ - DWORD oprot; - return !VirtualProtect(p, sz, prot, &oprot); -} - -#elif LJ_TARGET_POSIX #include @@ -118,30 +67,6 @@ static int mcode_setprot(void *p, size_t sz, int prot) return mprotect(p, sz, prot); } -#elif LJ_64 - -#error "Missing OS support for explicit placement of executable memory" - -#else - -/* Fallback allocator. This will fail if memory is not executable by default. */ -#define LUAJIT_UNPROTECT_MCODE -#define MCPROT_RW 0 -#define MCPROT_RX 0 -#define MCPROT_RWX 0 - -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) -{ - UNUSED(hint); UNUSED(prot); - return lj_mem_new(J->L, sz); -} - -static void mcode_free(jit_State *J, void *p, size_t sz) -{ - lj_mem_free(J2G(J), p, sz); -} - -#endif /* -- MCode area protection ----------------------------------------------- */ @@ -204,11 +129,7 @@ static void mcode_protect(jit_State *J, int prot) /* -- MCode area allocation ----------------------------------------------- */ -#if LJ_64 #define mcode_validptr(p) (p) -#else -#define mcode_validptr(p) ((p) && (uintptr_t)(p) < 0xffff0000) -#endif #ifdef LJ_TARGET_JUMPRANGE @@ -219,13 +140,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) ** Try addresses within a distance of target-range/2+1MB..target+range/2-1MB. ** Use half the jump range so every address in the range can reach any other. */ -#if LJ_TARGET_MIPS - /* Use the middle of the 256MB-aligned region. */ - uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & - ~(uintptr_t)0x0fffffffu) + 0x08000000u; -#else uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; -#endif const uintptr_t range = (1u << (LJ_TARGET_JUMPRANGE-1)) - (1u << 21); /* First try a contiguous area below the last one. */ uintptr_t hint = J->mcarea ? (uintptr_t)J->mcarea - sz : 0; @@ -255,17 +170,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) /* All memory addresses are reachable by relative jumps. */ static void *mcode_alloc(jit_State *J, size_t sz) { -#ifdef __OpenBSD__ - /* Allow better executable memory allocation for OpenBSD W^X mode. */ - void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); - if (p && mcode_setprot(p, sz, MCPROT_GEN)) { - mcode_free(J, p, sz); - return NULL; - } - return p; -#else return mcode_alloc_at(J, 0, sz, MCPROT_GEN); -#endif } #endif @@ -384,4 +289,3 @@ void lj_mcode_limiterr(jit_State *J, size_t need) lj_trace_err(J, LJ_TRERR_MCODELM); /* Retry with new area. */ } -#endif diff --git a/src/lj_mcode.h b/src/lj_mcode.h index f0847e931e..63e507719c 100644 --- a/src/lj_mcode.h +++ b/src/lj_mcode.h @@ -8,11 +8,8 @@ #include "lj_obj.h" -#if LJ_HASJIT || LJ_HASFFI LJ_FUNC void lj_mcode_sync(void *start, void *end); -#endif -#if LJ_HASJIT #include "lj_jit.h" @@ -25,6 +22,5 @@ LJ_FUNC_NORET void lj_mcode_limiterr(jit_State *J, size_t need); #define lj_mcode_commitbot(J, m) (J->mcbot = (m)) -#endif #endif diff --git a/src/lj_meta.c b/src/lj_meta.c index 0bd4d8429b..69036aed1b 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c @@ -73,7 +73,6 @@ cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm) return niltv(L); } -#if LJ_HASFFI /* Tailcall from C function. */ int lj_meta_tailcall(lua_State *L, cTValue *tv) { @@ -100,7 +99,6 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv) */ return 0; } -#endif /* Setup call to metamethod to be run by Assembler VM. */ static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo, @@ -177,7 +175,6 @@ TValue *lj_meta_tset(lua_State *L, cTValue *o, cTValue *k) if (tv != niltv(L)) return (TValue *)tv; if (tvisnil(k)) lj_err_msg(L, LJ_ERR_NILIDX); - else if (tvisint(k)) { setnumV(&tmp, (lua_Number)intV(k)); k = &tmp; } else if (tvisnum(k) && tvisnan(k)) lj_err_msg(L, LJ_ERR_NANIDX); return lj_tab_newkey(L, t, k); } @@ -201,8 +198,6 @@ static cTValue *str2num(cTValue *o, TValue *n) { if (tvisnum(o)) return o; - else if (tvisint(o)) - return (setnumV(n, (lua_Number)intV(o)), n); else if (tvisstr(o) && lj_strscan_num(strV(o), n)) return n; else @@ -290,8 +285,6 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) GCstr *s = strV(o); MSize len = s->len; lj_buf_putmem(sb, strdata(s), len); - } else if (tvisint(o)) { - lj_strfmt_putint(sb, intV(o)); } else { lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)); } @@ -307,7 +300,7 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) } /* Helper for LEN. __len metamethod. */ -TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o) +TValue * lj_meta_len(lua_State *L, cTValue *o) { cTValue *mo = lj_meta_lookup(L, o, MM_len); if (tvisnil(mo)) { @@ -346,8 +339,7 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne) return (TValue *)(intptr_t)ne; } -#if LJ_HASFFI -TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins) +TValue * lj_meta_equal_cd(lua_State *L, BCIns ins) { ASMFunction cont = (bc_op(ins) & 1) ? lj_cont_condf : lj_cont_condt; int op = (int)bc_op(ins) & ~1; @@ -373,12 +365,11 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins) else return (TValue *)(intptr_t)(bc_op(ins) & 1); } -#endif /* Helper for ordered comparisons. String compare, __lt/__le metamethods. */ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op) { - if (LJ_HASFFI && (tviscdata(o1) || tviscdata(o2))) { + if (tviscdata(o1) || tviscdata(o2)) { ASMFunction cont = (op & 1) ? lj_cont_condf : lj_cont_condt; MMS mm = (op & 2) ? MM_le : MM_lt; cTValue *mo = lj_meta_lookup(L, tviscdata(o1) ? o1 : o2, mm); @@ -426,9 +417,8 @@ void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp) { L->top = curr_topL(L); ra++; tp--; - lua_assert(LJ_DUALNUM || tp != ~LJ_TNUMX); /* ISTYPE -> ISNUM broken. */ - if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra); - else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra); + lua_assert(tp != ~LJ_TNUMX); /* ISTYPE -> ISNUM broken. */ + if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra); else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra); else lj_err_argtype(L, ra, lj_obj_itypename[tp]); } @@ -446,32 +436,10 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top) } /* Helper for FORI. Coercion. */ -void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o) +void lj_meta_for(lua_State *L, TValue *o) { if (!lj_strscan_numberobj(o)) lj_err_msg(L, LJ_ERR_FORINIT); if (!lj_strscan_numberobj(o+1)) lj_err_msg(L, LJ_ERR_FORLIM); if (!lj_strscan_numberobj(o+2)) lj_err_msg(L, LJ_ERR_FORSTEP); - if (LJ_DUALNUM) { - /* Ensure all slots are integers or all slots are numbers. */ - int32_t k[3]; - int nint = 0; - ptrdiff_t i; - for (i = 0; i <= 2; i++) { - if (tvisint(o+i)) { - k[i] = intV(o+i); nint++; - } else { - k[i] = lj_num2int(numV(o+i)); nint += ((lua_Number)k[i] == numV(o+i)); - } - } - if (nint == 3) { /* Narrow to integers. */ - setintV(o, k[0]); - setintV(o+1, k[1]); - setintV(o+2, k[2]); - } else if (nint != 0) { /* Widen to numbers. */ - if (tvisint(o)) setnumV(o, (lua_Number)intV(o)); - if (tvisint(o+1)) setnumV(o+1, (lua_Number)intV(o+1)); - if (tvisint(o+2)) setnumV(o+2, (lua_Number)intV(o+2)); - } - } } diff --git a/src/lj_meta.h b/src/lj_meta.h index 73b4572473..65ff9ffa51 100644 --- a/src/lj_meta.h +++ b/src/lj_meta.h @@ -12,9 +12,7 @@ LJ_FUNC void lj_meta_init(lua_State *L); LJ_FUNC cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name); LJ_FUNC cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm); -#if LJ_HASFFI LJ_FUNC int lj_meta_tailcall(lua_State *L, cTValue *tv); -#endif #define lj_meta_fastg(g, mt, mm) \ ((mt) == NULL ? NULL : ((mt)->nomm & (1u<<(mm))) ? NULL : \ @@ -27,12 +25,12 @@ LJ_FUNCA TValue *lj_meta_tset(lua_State *L, cTValue *o, cTValue *k); LJ_FUNCA TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc, BCReg op); LJ_FUNCA TValue *lj_meta_cat(lua_State *L, TValue *top, int left); -LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o); +LJ_FUNCA TValue * lj_meta_len(lua_State *L, cTValue *o); LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); -LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); +LJ_FUNCA TValue * lj_meta_equal_cd(lua_State *L, BCIns ins); LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); LJ_FUNCA void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp); LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); -LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); +LJ_FUNCA void lj_meta_for(lua_State *L, TValue *o); #endif diff --git a/src/lj_obj.c b/src/lj_obj.c index ee33aeb3a8..76c56c2cc2 100644 --- a/src/lj_obj.c +++ b/src/lj_obj.c @@ -20,7 +20,7 @@ LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */ }; /* Compare two objects without calling metamethods. */ -int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2) +int lj_obj_equal(cTValue *o1, cTValue *o2) { if (itype(o1) == itype(o2)) { if (tvispri(o1)) @@ -34,13 +34,13 @@ int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2) } /* Return pointer to object or its object data. */ -const void * LJ_FASTCALL lj_obj_ptr(cTValue *o) +const void * lj_obj_ptr(cTValue *o) { if (tvisudata(o)) return uddata(udataV(o)); else if (tvislightud(o)) return lightudV(o); - else if (LJ_HASFFI && tviscdata(o)) + else if (tviscdata(o)) return cdataptr(cdataV(o)); else if (tvisgcv(o)) return gcV(o); diff --git a/src/lj_obj.h b/src/lj_obj.h index 52372c3e7e..3ab11e1ec8 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -17,71 +17,39 @@ /* Memory and GC object sizes. */ typedef uint32_t MSize; -#if LJ_GC64 typedef uint64_t GCSize; -#else -typedef uint32_t GCSize; -#endif /* Memory reference */ -typedef struct MRef { -#if LJ_GC64 - uint64_t ptr64; /* True 64 bit pointer. */ -#else - uint32_t ptr32; /* Pseudo 32 bit pointer. */ -#endif -} MRef; - -#if LJ_GC64 -#define mref(r, t) ((t *)(void *)(r).ptr64) +typedef void * MRef; -#define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p)) -#define setmrefr(r, v) ((r).ptr64 = (v).ptr64) -#else -#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) +#define mref(r, t) ((t *)(r)) -#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) -#define setmrefr(r, v) ((r).ptr32 = (v).ptr32) -#endif +#define setmref(r, p) ((r) = (void *)(p)) +#define setmrefr(r, v) ((r) = (v)) /* -- GC object references (32 bit address space) ------------------------- */ +/* Forward declaration. */ +union GCobj; + /* GCobj reference */ -typedef struct GCRef { -#if LJ_GC64 - uint64_t gcptr64; /* True 64 bit pointer. */ -#else - uint32_t gcptr32; /* Pseudo 32 bit pointer. */ -#endif -} GCRef; +typedef union GCobj * GCRef; /* Common GC header for all collectable objects. */ #define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct /* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */ -#if LJ_GC64 -#define gcref(r) ((GCobj *)(r).gcptr64) -#define gcrefp(r, t) ((t *)(void *)(r).gcptr64) -#define gcrefu(r) ((r).gcptr64) -#define gcrefeq(r1, r2) ((r1).gcptr64 == (r2).gcptr64) +#define gcref(r) (r) +#define gcrefp(r, t) ((t *)(void *)(r)) +#define gcrefu(r) ((uint64_t)(r)) +#define gcrefeq(r1, r2) ((r1)==(r2)) -#define setgcref(r, gc) ((r).gcptr64 = (uint64_t)&(gc)->gch) +#define setgcref(r, gc) ((r) = (GCobj *)&(gc)->gch) #define setgcreft(r, gc, it) \ - (r).gcptr64 = (uint64_t)&(gc)->gch | (((uint64_t)(it)) << 47) -#define setgcrefp(r, p) ((r).gcptr64 = (uint64_t)(p)) -#define setgcrefnull(r) ((r).gcptr64 = 0) -#define setgcrefr(r, v) ((r).gcptr64 = (v).gcptr64) -#else -#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32) -#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32) -#define gcrefu(r) ((r).gcptr32) -#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32) - -#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch) -#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p)) -#define setgcrefnull(r) ((r).gcptr32 = 0) -#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32) -#endif + (r) = (GCobj *)((uint64_t)&(gc)->gch | ((uint64_t)(it) << 47)) +#define setgcrefp(r, p) ((r) = (GCobj *)p) +#define setgcrefnull(r) ((r) = NULL) +#define setgcrefr(r, v) ((r) = (v)) #define gcnext(gc) (gcref((gc)->gch.nextgc)) @@ -172,7 +140,6 @@ typedef union { typedef LJ_ALIGN(8) union TValue { uint64_t u64; /* 64 bit pattern overlaps number. */ lua_Number n; /* Number object overlaps split tag/value object. */ -#if LJ_GC64 GCRef gcr; /* GCobj reference with tag. */ int64_t it64; struct { @@ -181,27 +148,7 @@ typedef LJ_ALIGN(8) union TValue { , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ ) }; -#else - struct { - LJ_ENDIAN_LOHI( - union { - GCRef gcr; /* GCobj reference (if any). */ - int32_t i; /* Integer value. */ - }; - , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ - ) - }; -#endif -#if LJ_FR2 int64_t ftsz; /* Frame type and size of previous frame, or PC. */ -#else - struct { - LJ_ENDIAN_LOHI( - GCRef func; /* Function for next frame (or dummy L). */ - , FrameLink tp; /* Link to previous frame. */ - ) - } fr; -#endif struct { LJ_ENDIAN_LOHI( uint32_t lo; /* Lower 32 bits of number. */ @@ -221,22 +168,6 @@ typedef const TValue cTValue; /* Internal object tags. ** -** Format for 32 bit GC references (!LJ_GC64): -** -** Internal tags overlap the MSW of a number object (must be a double). -** Interpreted as a double these are special NaNs. The FPU only generates -** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available -** for use as internal tags. Small negative numbers are used to shorten the -** encoding of type comparisons (reg/mem against sign-ext. 8 bit immediate). -** -** ---MSW---.---LSW--- -** primitive types | itype | | -** lightuserdata | itype | void * | (32 bit platforms) -** lightuserdata |ffff| void * | (64 bit platforms, 47 bit pointers) -** GC objects | itype | GCRef | -** int (LJ_DUALNUM)| itype | int | -** number -------double------ -** ** Format for 64 bit GC references (LJ_GC64): ** ** The upper 13 bits must be 1 (0xfff8...) for a special NaN. The next @@ -246,7 +177,6 @@ typedef const TValue cTValue; ** ------MSW------.------LSW------ ** primitive types |1..1|itype|1..................1| ** GC objects/lightud |1..1|itype|-------GCRef--------| -** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------| ** number ------------double------------- ** ** ORDER LJ_T @@ -271,19 +201,13 @@ typedef const TValue cTValue; #define LJ_TNUMX (~13u) /* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */ -#if LJ_64 && !LJ_GC64 -#define LJ_TISNUM 0xfffeffffu -#else #define LJ_TISNUM LJ_TNUMX -#endif #define LJ_TISTRUECOND LJ_TFALSE #define LJ_TISPRI LJ_TTRUE #define LJ_TISGCV (LJ_TSTR+1) #define LJ_TISTABUD LJ_TTAB -#if LJ_GC64 #define LJ_GCVMASK (((uint64_t)1 << 47) - 1) -#endif /* -- String object ------------------------------------------------------- */ @@ -358,9 +282,7 @@ typedef struct GCproto { uint8_t numparams; /* Number of parameters. */ uint8_t framesize; /* Fixed frame size. */ MSize sizebc; /* Number of bytecode instructions. */ -#if LJ_GC64 uint32_t unused_gc64; -#endif GCRef gclist; MRef k; /* Split constant array (points to the middle). */ MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */ @@ -374,9 +296,10 @@ typedef struct GCproto { GCRef chunkname; /* Name of the chunk this function was defined in. */ BCLine firstline; /* First line of the function definition. */ BCLine numline; /* Number of lines for the function definition. */ - MRef lineinfo; /* Compressed map from bytecode ins. to source line. */ + MRef lineinfo; /* Map from bytecode ins. to source line. */ MRef uvinfo; /* Upvalue names. */ MRef varinfo; /* Names and compressed extents of local variables. */ + MRef declname; /* Declared name of function (null-terminated). */ } GCproto; /* Flags for prototype. */ @@ -407,9 +330,10 @@ typedef struct GCproto { #define proto_chunkname(pt) (strref((pt)->chunkname)) #define proto_chunknamestr(pt) (strdata(proto_chunkname((pt)))) -#define proto_lineinfo(pt) (mref((pt)->lineinfo, const void)) +#define proto_lineinfo(pt) (mref((pt)->lineinfo, const uint32_t)) #define proto_uvinfo(pt) (mref((pt)->uvinfo, const uint8_t)) #define proto_varinfo(pt) (mref((pt)->varinfo, const uint8_t)) +#define proto_declname(pt) (mref((pt)->declname, const char)) /* -- Upvalue object ------------------------------------------------------ */ @@ -472,9 +396,6 @@ typedef struct Node { TValue val; /* Value object. Must be first field. */ TValue key; /* Key object. */ MRef next; /* Hash chain. */ -#if !LJ_GC64 - MRef freetop; /* Top of free elements (stored in t->node[0]). */ -#endif } Node; LJ_STATIC_ASSERT(offsetof(Node, val) == 0); @@ -489,27 +410,21 @@ typedef struct GCtab { MRef node; /* Hash part. */ uint32_t asize; /* Size of array part (keys [0, asize-1]). */ uint32_t hmask; /* Hash part mask (size of hash part - 1). */ -#if LJ_GC64 MRef freetop; /* Top of free elements. */ -#endif } GCtab; #define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab)) #define tabref(r) (&gcref((r))->tab) #define noderef(r) (mref((r), Node)) #define nextnode(n) (mref((n)->next, Node)) -#if LJ_GC64 #define getfreetop(t, n) (noderef((t)->freetop)) #define setfreetop(t, n, v) (setmref((t)->freetop, (v))) -#else -#define getfreetop(t, n) (noderef((n)->freetop)) -#define setfreetop(t, n, v) (setmref((n)->freetop, (v))) -#endif /* -- State objects ------------------------------------------------------- */ /* VM states. */ enum { + /* VM states. */ LJ_VMST_INTERP, /* Interpreter. */ LJ_VMST_C, /* C function. */ LJ_VMST_GC, /* Garbage collector. */ @@ -517,25 +432,23 @@ enum { LJ_VMST_RECORD, /* Trace recorder. */ LJ_VMST_OPT, /* Optimizer. */ LJ_VMST_ASM, /* Assembler. */ + /* JIT trace states. + ** These are "abstract" states that logically exist but are never + ** directly used for the value of global_State.vmstate. + */ + LJ_VMST_HEAD, /* Trace mcode before loop */ + LJ_VMST_LOOP, /* Trace mcode inside loop */ + LJ_VMST_JGC, /* GC invoked from JIT mcode. */ + LJ_VMST_FFI, /* Other code outside trace mcode */ LJ_VMST__MAX }; #define setvmstate(g, st) ((g)->vmstate = ~LJ_VMST_##st) /* Metamethods. ORDER MM */ -#ifdef LJ_HASFFI #define MMDEF_FFI(_) _(new) -#else -#define MMDEF_FFI(_) -#endif -#if LJ_52 || LJ_HASFFI #define MMDEF_PAIRS(_) _(pairs) _(ipairs) -#else -#define MMDEF_PAIRS(_) -#define MM_pairs 255 -#define MM_ipairs 255 -#endif #define MMDEF(_) \ _(index) _(newindex) _(gc) _(mode) _(eq) _(len) \ @@ -598,7 +511,9 @@ typedef struct global_State { lua_Alloc allocf; /* Memory allocator. */ void *allocd; /* Memory allocator data. */ GCState gc; /* Garbage collector. */ - volatile int32_t vmstate; /* VM state or current JIT code trace number. */ + volatile int32_t vmstate; /* VM state or current JIT code trace number. */ + volatile int32_t gcvmstate; /* Previous VM state (only when state is GC). */ + volatile int32_t lasttrace; /* VM state before exit to interpreter. */ SBuf tmpbuf; /* Temporary string buffer. */ GCstr strempty; /* Empty string. */ uint8_t stremptyz; /* Zero terminator of empty string. */ @@ -633,13 +548,10 @@ typedef struct global_State { #define HOOK_EVENTMASK 0x0f #define HOOK_ACTIVE 0x10 #define HOOK_ACTIVE_SHIFT 4 -#define HOOK_VMEVENT 0x20 -#define HOOK_GC 0x40 -#define HOOK_PROFILE 0x80 +#define HOOK_GC 0x20 #define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) #define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) #define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC)) -#define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT)) #define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE) #define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK) #define hook_restore(g, h) \ @@ -666,13 +578,7 @@ struct lua_State { #define registry(L) (&G(L)->registrytv) /* Macros to access the currently executing (Lua) function. */ -#if LJ_GC64 #define curr_func(L) (&gcval(L->base-2)->fn) -#elif LJ_FR2 -#define curr_func(L) (&gcref((L->base-2)->gcr)->fn) -#else -#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn) -#endif #define curr_funcisL(L) (isluafunc(curr_func(L))) #define curr_proto(L) (funcproto(curr_func(L))) #define curr_topL(L) (L->base + curr_proto(L)->framesize) @@ -736,21 +642,12 @@ typedef union GCobj { #endif /* Macros to test types. */ -#if LJ_GC64 #define itype(o) ((uint32_t)((o)->it64 >> 47)) #define tvisnil(o) ((o)->it64 == -1) -#else -#define itype(o) ((o)->it) -#define tvisnil(o) (itype(o) == LJ_TNIL) -#endif #define tvisfalse(o) (itype(o) == LJ_TFALSE) #define tvistrue(o) (itype(o) == LJ_TTRUE) #define tvisbool(o) (tvisfalse(o) || tvistrue(o)) -#if LJ_64 && !LJ_GC64 -#define tvislightud(o) (((int32_t)itype(o) >> 15) == -2) -#else #define tvislightud(o) (itype(o) == LJ_TLIGHTUD) -#endif #define tvisstr(o) (itype(o) == LJ_TSTR) #define tvisfunc(o) (itype(o) == LJ_TFUNC) #define tvisthread(o) (itype(o) == LJ_TTHREAD) @@ -759,7 +656,6 @@ typedef union GCobj { #define tvistab(o) (itype(o) == LJ_TTAB) #define tvisudata(o) (itype(o) == LJ_TUDATA) #define tvisnumber(o) (itype(o) <= LJ_TISNUM) -#define tvisint(o) (LJ_DUALNUM && itype(o) == LJ_TISNUM) #define tvisnum(o) (itype(o) < LJ_TISNUM) #define tvistruecond(o) (itype(o) < LJ_TISTRUECOND) @@ -769,37 +665,20 @@ typedef union GCobj { /* Special macros to test numbers for NaN, +0, -0, +1 and raw equality. */ #define tvisnan(o) ((o)->n != (o)->n) -#if LJ_64 #define tviszero(o) (((o)->u64 << 1) == 0) -#else -#define tviszero(o) (((o)->u32.lo | ((o)->u32.hi << 1)) == 0) -#endif #define tvispzero(o) ((o)->u64 == 0) #define tvismzero(o) ((o)->u64 == U64x(80000000,00000000)) #define tvispone(o) ((o)->u64 == U64x(3ff00000,00000000)) #define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64) /* Macros to convert type ids. */ -#if LJ_64 && !LJ_GC64 -#define itypemap(o) \ - (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o)) -#else #define itypemap(o) (tvisnumber(o) ? ~LJ_TNUMX : ~itype(o)) -#endif /* Macros to get tagged values. */ -#if LJ_GC64 #define gcval(o) ((GCobj *)(gcrefu((o)->gcr) & LJ_GCVMASK)) -#else -#define gcval(o) (gcref((o)->gcr)) -#endif #define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - itype(o))) -#if LJ_64 #define lightudV(o) \ check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff))) -#else -#define lightudV(o) check_exp(tvislightud(o), gcrefp((o)->gcr, void)) -#endif #define gcV(o) check_exp(tvisgcv(o), gcval(o)) #define strV(o) check_exp(tvisstr(o), &gcval(o)->str) #define funcV(o) check_exp(tvisfunc(o), &gcval(o)->fn) @@ -812,48 +691,21 @@ typedef union GCobj { #define intV(o) check_exp(tvisint(o), (int32_t)(o)->i) /* Macros to set tagged values. */ -#if LJ_GC64 #define setitype(o, i) ((o)->it = ((i) << 15)) #define setnilV(o) ((o)->it64 = -1) #define setpriV(o, x) ((o)->it64 = (int64_t)~((uint64_t)~(x)<<47)) #define setboolV(o, x) ((o)->it64 = (int64_t)~((uint64_t)((x)+1)<<47)) -#else -#define setitype(o, i) ((o)->it = (i)) -#define setnilV(o) ((o)->it = LJ_TNIL) -#define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x)) -#define setpriV(o, i) (setitype((o), (i))) -#endif static LJ_AINLINE void setlightudV(TValue *o, void *p) { -#if LJ_GC64 o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47); -#elif LJ_64 - o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48); -#else - setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD); -#endif } -#if LJ_64 #define checklightudptr(L, p) \ (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p)) -#else -#define checklightudptr(L, p) (p) -#endif -#if LJ_FR2 #define contptr(f) ((void *)(f)) #define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f)) -#elif LJ_64 -#define contptr(f) \ - ((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin)) -#define setcont(o, f) \ - ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) -#else -#define contptr(f) ((void *)(f)) -#define setcont(o, f) setlightudV((o), contptr(f)) -#endif #define tvchecklive(L, o) \ UNUSED(L), lua_assert(!tvisgcv(o) || \ @@ -861,11 +713,7 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p) static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype) { -#if LJ_GC64 setgcreft(o->gcr, v, itype); -#else - setgcref(o->gcr, v); setitype(o, itype); -#endif } static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it) @@ -893,26 +741,15 @@ define_setV(setudataV, GCudata, LJ_TUDATA) static LJ_AINLINE void setintV(TValue *o, int32_t i) { -#if LJ_DUALNUM - o->i = (uint32_t)i; setitype(o, LJ_TISNUM); -#else o->n = (lua_Number)i; -#endif } static LJ_AINLINE void setint64V(TValue *o, int64_t i) { - if (LJ_DUALNUM && LJ_LIKELY(i == (int64_t)(int32_t)i)) - setintV(o, (int32_t)i); - else - setnumV(o, (lua_Number)i); + setnumV(o, (lua_Number)i); } -#if LJ_64 #define setintptrV(o, i) setint64V((o), (i)) -#else -#define setintptrV(o, i) setintV((o), (i)) -#endif /* Copy tagged values. */ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) @@ -922,47 +759,29 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) /* -- Number to integer conversion ---------------------------------------- */ -#if LJ_SOFTFP -LJ_ASMF int32_t lj_vm_tobit(double x); -#endif static LJ_AINLINE int32_t lj_num2bit(lua_Number n) { -#if LJ_SOFTFP - return lj_vm_tobit(n); -#else TValue o; o.n = n + 6755399441055744.0; /* 2^52 + 2^51 */ return (int32_t)o.u32.lo; -#endif } #define lj_num2int(n) ((int32_t)(n)) static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) { -#ifdef _MSC_VER - if (n >= 9223372036854775808.0) /* They think it's a feature. */ - return (uint64_t)(int64_t)(n - 18446744073709551616.0); - else -#endif return (uint64_t)n; } static LJ_AINLINE int32_t numberVint(cTValue *o) { - if (LJ_LIKELY(tvisint(o))) - return intV(o); - else - return lj_num2int(numV(o)); + return lj_num2int(numV(o)); } static LJ_AINLINE lua_Number numberVnum(cTValue *o) { - if (LJ_UNLIKELY(tvisint(o))) - return (lua_Number)intV(o); - else - return numV(o); + return numV(o); } /* -- Miscellaneous object handling --------------------------------------- */ @@ -974,7 +793,7 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1]; #define lj_typename(o) (lj_obj_itypename[itypemap(o)]) /* Compare two objects without calling metamethods. */ -LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2); -LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(cTValue *o); +LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2); +LJ_FUNC const void * lj_obj_ptr(cTValue *o); #endif diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c index 2417f3242a..702bcfb44f 100644 --- a/src/lj_opt_dce.c +++ b/src/lj_opt_dce.c @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_ir.h" #include "lj_jit.h" @@ -75,4 +74,3 @@ void lj_opt_dce(jit_State *J) #undef IR -#endif diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 408811f2cd..0a41d9d8cf 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -12,7 +12,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_buf.h" #include "lj_str.h" @@ -22,10 +21,8 @@ #include "lj_ircall.h" #include "lj_iropt.h" #include "lj_trace.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_carith.h" -#endif #include "lj_vm.h" #include "lj_strscan.h" #include "lj_strfmt.h" @@ -145,12 +142,12 @@ #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) /* Fold function type. Fastcall on x86 significantly reduces their size. */ -typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J); +typedef IRRef (*FoldFunc)(jit_State *J); /* Macros for the fold specs, so buildvm can recognize them. */ #define LJFOLD(x) #define LJFOLDX(x) -#define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J) +#define LJFOLDF(name) static TRef fold_##name(jit_State *J) /* Note: They must be at the start of a line or buildvm ignores them! */ /* Barrier to prevent using operands across PHIs. */ @@ -197,12 +194,8 @@ LJFOLDF(kfold_numabsneg) LJFOLD(LDEXP KNUM KINT) LJFOLDF(kfold_ldexp) { -#if LJ_TARGET_X86ORX64 UNUSED(J); return NEXTFOLD; -#else - return lj_ir_knum(J, ldexp(knumleft, fright->i)); -#endif } LJFOLD(FPMATH KNUM any) @@ -347,7 +340,6 @@ LJFOLDF(kfold_intcomp0) static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) { switch (op) { -#if LJ_HASFFI case IR_ADD: k1 += k2; break; case IR_SUB: k1 -= k2; break; case IR_MUL: k1 *= k2; break; @@ -359,7 +351,6 @@ static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) case IR_BSAR: k1 >>= (k2 & 63); break; case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break; case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break; -#endif default: UNUSED(k2); lua_assert(0); break; } return k1; @@ -382,7 +373,6 @@ LJFOLD(MOD KINT64 KINT64) LJFOLD(POW KINT64 KINT64) LJFOLDF(kfold_int64arith2) { -#if LJ_HASFFI uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64; if (irt_isi64(fins->t)) { k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) : @@ -394,9 +384,6 @@ LJFOLDF(kfold_int64arith2) lj_carith_powu64(k1, k2); } return INT64FOLD(k1); -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } LJFOLD(BSHL KINT64 KINT) @@ -406,33 +393,21 @@ LJFOLD(BROL KINT64 KINT) LJFOLD(BROR KINT64 KINT) LJFOLDF(kfold_int64shift) { -#if LJ_HASFFI uint64_t k = ir_k64(fleft)->u64; int32_t sh = (fright->i & 63); return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL)); -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } LJFOLD(BNOT KINT64) LJFOLDF(kfold_bnot64) { -#if LJ_HASFFI return INT64FOLD(~ir_k64(fleft)->u64); -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } LJFOLD(BSWAP KINT64) LJFOLDF(kfold_bswap64) { -#if LJ_HASFFI return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } LJFOLD(LT KINT64 KINT64) @@ -445,34 +420,26 @@ LJFOLD(ULE KINT64 KINT64) LJFOLD(UGT KINT64 KINT64) LJFOLDF(kfold_int64comp) { -#if LJ_HASFFI uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64; switch ((IROp)fins->o) { - case IR_LT: return CONDFOLD(a < b); - case IR_GE: return CONDFOLD(a >= b); - case IR_LE: return CONDFOLD(a <= b); - case IR_GT: return CONDFOLD(a > b); - case IR_ULT: return CONDFOLD((uint64_t)a < (uint64_t)b); - case IR_UGE: return CONDFOLD((uint64_t)a >= (uint64_t)b); - case IR_ULE: return CONDFOLD((uint64_t)a <= (uint64_t)b); - case IR_UGT: return CONDFOLD((uint64_t)a > (uint64_t)b); + case IR_LT: return CONDFOLD((int64_t)a < (int64_t)b); + case IR_GE: return CONDFOLD((int64_t)a >= (int64_t)b); + case IR_LE: return CONDFOLD((int64_t)a <= (int64_t)b); + case IR_GT: return CONDFOLD((int64_t)a > (int64_t)b); + case IR_ULT: return CONDFOLD(a < b); + case IR_UGE: return CONDFOLD(a >= b); + case IR_ULE: return CONDFOLD(a <= b); + case IR_UGT: return CONDFOLD(a > b); default: lua_assert(0); return FAILFOLD; } -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } LJFOLD(UGE any KINT64) LJFOLDF(kfold_int64comp0) { -#if LJ_HASFFI if (ir_k64(fright)->u64 == 0) return DROPFOLD; return NEXTFOLD; -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } /* -- Constant folding for strings ---------------------------------------- */ @@ -637,7 +604,7 @@ LJFOLDF(bufput_kfold_op) if (irref_isk(fleft->op2)) { const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; SBuf *sb = lj_buf_tmp_(J->L); - sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb, + sb = ((SBuf * (*)(SBuf *, GCstr *))ci->func)(sb, ir_kstr(IR(fleft->op2))); fins->o = IR_BUFPUT; fins->op1 = fleft->op1; @@ -713,12 +680,7 @@ LJFOLD(ADD KGC KINT64) LJFOLDF(kfold_add_kgc) { GCobj *o = ir_kgc(fleft); -#if LJ_64 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64; -#else - ptrdiff_t ofs = fright->i; -#endif -#if LJ_HASFFI if (irt_iscdata(fleft->t)) { CType *ct = ctype_raw(ctype_ctsG(J2G(J)), gco2cd(o)->ctypeid); if (ctype_isnum(ct->info) || ctype_isenum(ct->info) || @@ -726,7 +688,6 @@ LJFOLDF(kfold_add_kgc) ctype_iscomplex(ct->info) || ctype_isvector(ct->info)) return lj_ir_kkptr(J, (char *)o + ofs); } -#endif return lj_ir_kptr(J, (char *)o + ofs); } @@ -737,11 +698,7 @@ LJFOLD(ADD KKPTR KINT64) LJFOLDF(kfold_add_kptr) { void *p = ir_kptr(fleft); -#if LJ_64 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64; -#else - ptrdiff_t ofs = fright->i; -#endif return lj_ir_kptr_(J, fleft->o, (char *)p + ofs); } @@ -842,14 +799,7 @@ LJFOLDF(kfold_conv_knum_int_num) LJFOLD(CONV KNUM IRCONV_U32_NUM) LJFOLDF(kfold_conv_knum_u32_num) { -#ifdef _MSC_VER - { /* Workaround for MSVC bug. */ - volatile uint32_t u = (uint32_t)knumleft; - return INTFOLD((int32_t)u); - } -#else return INTFOLD((int32_t)(uint32_t)knumleft); -#endif } LJFOLD(CONV KNUM IRCONV_I64_NUM) @@ -1089,11 +1039,9 @@ LJFOLDF(simplify_numpow_kx) lua_Number n = knumleft; if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ fins->o = IR_CONV; -#if LJ_TARGET_X86ORX64 fins->op1 = fins->op2; fins->op2 = IRCONV_NUM_INT; fins->op2 = (IRRef1)lj_opt_fold(J); -#endif fins->op1 = (IRRef1)lj_ir_knum_one(J); fins->o = IR_LDEXP; return RETRYFOLD; @@ -1135,14 +1083,7 @@ LJFOLDF(simplify_conv_i64_num) fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT); return RETRYFOLD; } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { -#if LJ_TARGET_X64 return fleft->op1; -#else - /* Reduce to a zero-extension. */ - fins->op1 = fleft->op1; - fins->op2 = (IRT_I64<<5)|IRT_U32; - return RETRYFOLD; -#endif } return NEXTFOLD; } @@ -1227,16 +1168,10 @@ LJFOLDF(simplify_conv_sext) if (ref == J->scev.idx) { IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; lua_assert(irt_isint(J->scev.t)); - if (lo && IR(lo)->i + ofs >= 0) { + if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { ok_reduce: -#if LJ_TARGET_X64 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ return LEFTFOLD; -#else - /* Reduce to a (cheaper) zero-extension. */ - fins->op2 &= ~IRCONV_SEXT; - return RETRYFOLD; -#endif } } return NEXTFOLD; @@ -1403,13 +1338,9 @@ LJFOLDF(simplify_intmul_k32) LJFOLD(MUL any KINT64) LJFOLDF(simplify_intmul_k64) { -#if LJ_HASFFI if (ir_kint64(fright)->u64 < 0x80000000u) return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); return NEXTFOLD; -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } LJFOLD(MOD any KINT) @@ -1688,6 +1619,43 @@ LJFOLDF(simplify_andk_shiftk) return NEXTFOLD; } +LJFOLD(BAND BOR KINT) +LJFOLD(BOR BAND KINT) +LJFOLDF(simplify_andor_k) +{ + IRIns *irk = IR(fleft->op2); + PHIBARRIER(fleft); + if (irk->o == IR_KINT) { + int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o); + /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ + /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ + if (k == (fins->o == IR_BAND ? 0 : -1)) { + fins->op1 = fleft->op1; + return RETRYFOLD; + } + } + return NEXTFOLD; +} + +LJFOLD(BAND BOR KINT64) +LJFOLD(BOR BAND KINT64) +LJFOLDF(simplify_andor_k64) +{ + IRIns *irk = IR(fleft->op2); + PHIBARRIER(fleft); + if (irk->o == IR_KINT64) { + uint64_t k = kfold_int64arith(ir_k64(irk)->u64, + ir_k64(fright)->u64, (IROp)fins->o); + /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ + /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ + if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) { + fins->op1 = fleft->op1; + return RETRYFOLD; + } + } + return NEXTFOLD; +} + /* -- Reassociation ------------------------------------------------------- */ LJFOLD(ADD ADD KINT) @@ -1717,7 +1685,6 @@ LJFOLD(BOR BOR KINT64) LJFOLD(BXOR BXOR KINT64) LJFOLDF(reassoc_intarith_k64) { -#if LJ_HASFFI IRIns *irk = IR(fleft->op2); if (irk->o == IR_KINT64) { uint64_t k = kfold_int64arith(ir_k64(irk)->u64, @@ -1728,9 +1695,6 @@ LJFOLDF(reassoc_intarith_k64) return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */ } return NEXTFOLD; -#else - UNUSED(J); lua_assert(0); return FAILFOLD; -#endif } LJFOLD(MIN MIN any) @@ -2320,17 +2284,6 @@ LJFOLDF(barrier_tnew_tdup) return DROPFOLD; } -/* -- Profiling ----------------------------------------------------------- */ - -LJFOLD(PROF any any) -LJFOLDF(prof) -{ - IRRef ref = J->chain[IR_PROF]; - if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */ - return ref; - return EMITFOLD; -} - /* -- Stores and allocations ---------------------------------------------- */ /* Stores and allocations cannot be folded or passed on to CSE in general. @@ -2383,7 +2336,7 @@ LJFOLDX(lj_ir_emit) /* ------------------------------------------------------------------------ */ /* Fold IR instruction. */ -TRef LJ_FASTCALL lj_opt_fold(jit_State *J) +TRef lj_opt_fold(jit_State *J) { uint32_t key, any; IRRef ref; @@ -2459,7 +2412,7 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J) /* -- Common-Subexpression Elimination ------------------------------------ */ /* CSE an IR instruction. This is very fast due to the skip-list chains. */ -TRef LJ_FASTCALL lj_opt_cse(jit_State *J) +TRef lj_opt_cse(jit_State *J) { /* Avoid narrow to wide store-to-load forwarding stall */ IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16); @@ -2489,7 +2442,7 @@ TRef LJ_FASTCALL lj_opt_cse(jit_State *J) } /* CSE with explicit search limit. */ -TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim) +TRef lj_opt_cselim(jit_State *J, IRRef lim) { IRRef ref = J->chain[fins->o]; IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16); @@ -2511,4 +2464,3 @@ TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim) #undef knumright #undef emitir -#endif diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index 04c6d06ddf..6bfd541eff 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_err.h" #include "lj_buf.h" @@ -284,15 +283,7 @@ static void loop_unroll(LoopState *lps) /* LOOP separates the pre-roll from the loop body. */ emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); - /* Grow snapshot buffer and map for copy-substituted snapshots. - ** Need up to twice the number of snapshots minus #0 and loop snapshot. - ** Need up to twice the number of entries plus fallback substitutions - ** from the loop snapshot entries for each new snapshot. - ** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap! - */ onsnap = J->cur.nsnap; - lj_snap_grow_buf(J, 2*onsnap-2); - lj_snap_grow_map(J, J->cur.nsnapmap*2+(onsnap-2)*J->cur.snap[onsnap-1].nent); /* The loop snapshot is used for fallback substitutions. */ loopsnap = &J->cur.snap[onsnap-1]; @@ -446,4 +437,3 @@ int lj_opt_loop(jit_State *J) #undef emitir #undef emitir_raw -#endif diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index cc177d39e8..f24e07ff7d 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -11,7 +11,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_tab.h" #include "lj_ir.h" @@ -183,8 +182,7 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) lua_assert(ir->o != IR_TNEW || irt_isnil(fins->t)); if (irt_ispri(fins->t)) { return TREF_PRI(irt_type(fins->t)); - } else if (irt_isnum(fins->t) || (LJ_DUALNUM && irt_isint(fins->t)) || - irt_isstr(fins->t)) { + } else if (irt_isnum(fins->t) || irt_isstr(fins->t)) { TValue keyv; cTValue *tv; IRIns *key = IR(xr->op2); @@ -194,8 +192,6 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) lua_assert(itype2irt(tv) == irt_type(fins->t)); if (irt_isnum(fins->t)) return lj_ir_knum_u64(J, tv->u64); - else if (LJ_DUALNUM && irt_isint(fins->t)) - return lj_ir_kint(J, intV(tv)); else return lj_ir_kstr(J, strV(tv)); } @@ -239,7 +235,7 @@ static TRef fwd_aload_reassoc(jit_State *J) } /* ALOAD forwarding. */ -TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J) +TRef lj_opt_fwd_aload(jit_State *J) { IRRef ref; if ((ref = fwd_ahload(J, fins->op1)) || @@ -249,7 +245,7 @@ TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J) } /* HLOAD forwarding. */ -TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J) +TRef lj_opt_fwd_hload(jit_State *J) { IRRef ref = fwd_ahload(J, fins->op1); if (ref) @@ -258,7 +254,7 @@ TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J) } /* HREFK forwarding. */ -TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J) +TRef lj_opt_fwd_hrefk(jit_State *J) { IRRef tab = fleft->op1; IRRef ref = J->chain[IR_NEWREF]; @@ -282,7 +278,7 @@ TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J) } /* Check whether HREF of TNEW/TDUP can be folded to niltv. */ -int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J) +int lj_opt_fwd_href_nokey(jit_State *J) { IRRef lim = fins->op1; /* Search limit. */ IRRef ref; @@ -324,7 +320,7 @@ static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta) } /* Check whether there's no aliasing NEWREF/table.clear for the left operand. */ -int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) +int lj_opt_fwd_tptr(jit_State *J, IRRef lim) { IRRef ta = fins->op1; IRRef ref = J->chain[IR_NEWREF]; @@ -338,7 +334,7 @@ int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) } /* ASTORE/HSTORE elimination. */ -TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) +TRef lj_opt_dse_ahstore(jit_State *J) { IRRef xref = fins->op1; /* xREF reference. */ IRRef val = fins->op2; /* Stored value reference. */ @@ -408,7 +404,7 @@ static AliasRet aa_uref(IRIns *refa, IRIns *refb) } /* ULOAD forwarding. */ -TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J) +TRef lj_opt_fwd_uload(jit_State *J) { IRRef uref = fins->op1; IRRef lim = REF_BASE; /* Search limit. */ @@ -442,7 +438,7 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J) } /* USTORE elimination. */ -TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J) +TRef lj_opt_dse_ustore(jit_State *J) { IRRef xref = fins->op1; /* xREF reference. */ IRRef val = fins->op2; /* Stored value reference. */ @@ -516,7 +512,7 @@ static AliasRet aa_fref(jit_State *J, IRIns *refa, IRIns *refb) } /* Only the loads for mutable fields end up here (see FOLD). */ -TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J) +TRef lj_opt_fwd_fload(jit_State *J) { IRRef oref = fins->op1; /* Object reference. */ IRRef fid = fins->op2; /* Field ID. */ @@ -548,7 +544,7 @@ TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J) } /* FSTORE elimination. */ -TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J) +TRef lj_opt_dse_fstore(jit_State *J) { IRRef fref = fins->op1; /* FREF reference. */ IRRef val = fins->op2; /* Stored value reference. */ @@ -631,13 +627,13 @@ static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb) if (refa->o == IR_ADD && irref_isk(refa->op2)) { IRIns *irk = IR(refa->op2); basea = IR(refa->op1); - ofsa = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : + ofsa = (irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; } if (refb->o == IR_ADD && irref_isk(refb->op2)) { IRIns *irk = IR(refb->op2); baseb = IR(refb->op1); - ofsb = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : + ofsb = (irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; } /* Treat constified pointers like base vs. base+offset. */ @@ -689,7 +685,7 @@ static IRRef reassoc_xref(jit_State *J, IRIns *ir) ptrdiff_t ofs = 0; if (ir->o == IR_ADD && irref_isk(ir->op2)) { /* Get constant offset. */ IRIns *irk = IR(ir->op2); - ofs = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : + ofs = (irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 : (ptrdiff_t)irk->i; ir = IR(ir->op1); } @@ -730,7 +726,7 @@ static IRRef reassoc_xref(jit_State *J, IRIns *ir) } /* XLOAD forwarding. */ -TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J) +TRef lj_opt_fwd_xload(jit_State *J) { IRRef xref = fins->op1; IRIns *xr = IR(xref); @@ -798,7 +794,7 @@ TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J) } /* XSTORE elimination. */ -TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J) +TRef lj_opt_dse_xstore(jit_State *J) { IRRef xref = fins->op1; IRIns *xr = IR(xref); @@ -847,7 +843,7 @@ TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J) /* -- Forwarding of lj_tab_len -------------------------------------------- */ /* This is rather simplistic right now, but better than nothing. */ -TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J) +TRef lj_opt_fwd_tab_len(jit_State *J) { IRRef tab = fins->op1; /* Table reference. */ IRRef lim = tab; /* Search limit. */ @@ -932,4 +928,3 @@ int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref) #undef fleft #undef fright -#endif diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index cd96ca4b4f..5609de0786 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -9,7 +9,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_bc.h" #include "lj_ir.h" @@ -403,7 +402,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) } /* Narrow a type conversion of an arithmetic operation. */ -TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J) +TRef lj_opt_narrow_convert(jit_State *J) { if ((J->flags & JIT_F_OPT_NARROW)) { NarrowConv nc; @@ -442,14 +441,14 @@ static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode) ((mode & IRCONV_DSTMASK) >> IRCONV_DSH)), op1, op2); narrow_bpc_set(J, ref, tref_ref(tr), mode); } - } else if (LJ_64 && (mode & IRCONV_SEXT) && !irt_is64(ir->t)) { + } else if ((mode & IRCONV_SEXT) && !irt_is64(ir->t)) { tr = emitir(IRT(IR_CONV, IRT_INTP), tr, mode); } return tr; } /* Narrow array index. */ -TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) +TRef lj_opt_narrow_index(jit_State *J, TRef tr) { IRIns *ir; lua_assert(tref_isnumber(tr)); @@ -464,7 +463,7 @@ TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) } /* Narrow conversion to integer operand (overflow undefined). */ -TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr) +TRef lj_opt_narrow_toint(jit_State *J, TRef tr) { if (tref_isstr(tr)) tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); @@ -480,7 +479,7 @@ TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr) } /* Narrow conversion to bitop operand (overflow wrapped). */ -TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr) +TRef lj_opt_narrow_tobit(jit_State *J, TRef tr) { if (tref_isstr(tr)) tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); @@ -495,19 +494,15 @@ TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr) return narrow_stripov(J, tr, IR_SUBOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT); } -#if LJ_HASFFI /* Narrow C array index (overflow undefined). */ -TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) +TRef lj_opt_narrow_cindex(jit_State *J, TRef tr) { lua_assert(tref_isnumber(tr)); if (tref_isnum(tr)) return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY); /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ - return narrow_stripov(J, tr, IR_MULOV, - LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) : - ((IRT_INTP<<5)|IRT_INT|IRCONV_TOBIT)); + return narrow_stripov(J, tr, IR_MULOV, ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT)); } -#endif /* -- Narrowing of arithmetic operators ----------------------------------- */ @@ -535,8 +530,8 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, { rb = conv_str_tonum(J, rb, vb); rc = conv_str_tonum(J, rc, vc); - /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */ - if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) && + /* Must not narrow MUL, because it loses -0. */ + if ((op >= IR_ADD && op <= IR_SUB) && tref_isinteger(rb) && tref_isinteger(rc) && numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc), (int)op - (int)IR_ADD))) @@ -564,9 +559,9 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) TRef tmp; rb = conv_str_tonum(J, rb, vb); rc = conv_str_tonum(J, rc, vc); - if ((LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) && + if (((J->flags & JIT_F_OPT_NARROW)) && tref_isinteger(rb) && tref_isinteger(rc) && - (tvisint(vc) ? intV(vc) != 0 : !tviszero(vc))) { + !tviszero(vc)) { emitir(IRTGI(IR_NE), rc, lj_ir_kint(J, 0)); return emitir(IRTI(IR_MOD), rb, rc); } @@ -586,7 +581,7 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */ rc = conv_str_tonum(J, rc, vc); /* Narrowing must be unconditional to preserve (-x)^i semantics. */ - if (tvisint(vc) || numisint(numV(vc))) { + if (numisint(numV(vc))) { int checkrange = 0; /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { @@ -622,8 +617,7 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) /* Narrow a single runtime value. */ static int narrow_forl(jit_State *J, cTValue *o) { - if (tvisint(o)) return 1; - if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o)); + if (J->flags & JIT_F_OPT_NARROW) return numisint(numV(o)); return 0; } @@ -651,4 +645,3 @@ IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) #undef emitir #undef emitir_raw -#endif diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c index 929ccb6187..aa64078a26 100644 --- a/src/lj_opt_sink.c +++ b/src/lj_opt_sink.c @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_ir.h" #include "lj_jit.h" @@ -93,21 +92,14 @@ static void sink_mark_ins(jit_State *J) irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ break; } -#if LJ_HASFFI case IR_CNEWI: - if (irt_isphi(ir->t) && - (!sink_checkphi(J, ir, ir->op2) || - (LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP && - !sink_checkphi(J, ir, (ir+1)->op2)))) + if (irt_isphi(ir->t) && !sink_checkphi(J, ir, ir->op2)) irt_setmark(ir->t); /* Mark ineligible allocation. */ /* fallthrough */ -#endif case IR_USTORE: irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ break; -#if LJ_HASFFI case IR_CALLXS: -#endif case IR_CALLS: irt_setmark(IR(ir->op1)->t); /* Mark (potentially) stored values. */ break; @@ -116,7 +108,7 @@ static void sink_mark_ins(jit_State *J) irl->prev = irr->prev = 0; /* Clear PHI value counts. */ if (irl->o == irr->o && (irl->o == IR_TNEW || irl->o == IR_TDUP || - (LJ_HASFFI && (irl->o == IR_CNEW || irl->o == IR_CNEWI)))) + (irl->o == IR_CNEW || irl->o == IR_CNEWI))) break; irt_setmark(irl->t); irt_setmark(irr->t); @@ -186,9 +178,7 @@ static void sink_sweep_ins(jit_State *J) ir->prev = REGSP_INIT; } break; -#if LJ_HASFFI case IR_CNEW: case IR_CNEWI: -#endif case IR_TNEW: case IR_TDUP: if (!irt_ismarked(ir->t)) { ir->t.irt &= ~IRT_GUARD; @@ -203,7 +193,7 @@ static void sink_sweep_ins(jit_State *J) IRIns *ira = IR(ir->op2); if (!irt_ismarked(ira->t) && (ira->o == IR_TNEW || ira->o == IR_TDUP || - (LJ_HASFFI && (ira->o == IR_CNEW || ira->o == IR_CNEWI)))) { + (ira->o == IR_CNEW || ira->o == IR_CNEWI))) { ir->prev = REGSP(RID_SINK, 0); } else { ir->prev = REGSP_INIT; @@ -219,6 +209,7 @@ static void sink_sweep_ins(jit_State *J) for (ir = IR(J->cur.nk); ir < irbase; ir++) { irt_clearmark(ir->t); ir->prev = REGSP_INIT; + /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ if (irt_is64(ir->t) && ir->o != IR_KNULL) ir++; } @@ -235,7 +226,7 @@ void lj_opt_sink(jit_State *J) JIT_F_OPT_DCE|JIT_F_OPT_CSE|JIT_F_OPT_FOLD); if ((J->flags & need) == need && (J->chain[IR_TNEW] || J->chain[IR_TDUP] || - (LJ_HASFFI && (J->chain[IR_CNEW] || J->chain[IR_CNEWI])))) { + (J->chain[IR_CNEW] || J->chain[IR_CNEWI]))) { if (!J->loopref) sink_mark_snap(J, &J->cur.snap[J->cur.nsnap-1]); sink_mark_ins(J); @@ -247,4 +238,3 @@ void lj_opt_sink(jit_State *J) #undef IR -#endif diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index fc9352042e..1e065bc2fc 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -8,863 +8,3 @@ #include "lj_obj.h" -#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) - -#include "lj_err.h" -#include "lj_buf.h" -#include "lj_ir.h" -#include "lj_jit.h" -#include "lj_ircall.h" -#include "lj_iropt.h" -#include "lj_dispatch.h" -#include "lj_vm.h" - -/* SPLIT pass: -** -** This pass splits up 64 bit IR instructions into multiple 32 bit IR -** instructions. It's only active for soft-float targets or for 32 bit CPUs -** which lack native 64 bit integer operations (the FFI is currently the -** only emitter for 64 bit integer instructions). -** -** Splitting the IR in a separate pass keeps each 32 bit IR assembler -** backend simple. Only a small amount of extra functionality needs to be -** implemented. This is much easier than adding support for allocating -** register pairs to each backend (believe me, I tried). A few simple, but -** important optimizations can be performed by the SPLIT pass, which would -** be tedious to do in the backend. -** -** The basic idea is to replace each 64 bit IR instruction with its 32 bit -** equivalent plus an extra HIOP instruction. The splitted IR is not passed -** through FOLD or any other optimizations, so each HIOP is guaranteed to -** immediately follow it's counterpart. The actual functionality of HIOP is -** inferred from the previous instruction. -** -** The operands of HIOP hold the hiword input references. The output of HIOP -** is the hiword output reference, which is also used to hold the hiword -** register or spill slot information. The register allocator treats this -** instruction independently of any other instruction, which improves code -** quality compared to using fixed register pairs. -** -** It's easier to split up some instructions into two regular 32 bit -** instructions. E.g. XLOAD is split up into two XLOADs with two different -** addresses. Obviously 64 bit constants need to be split up into two 32 bit -** constants, too. Some hiword instructions can be entirely omitted, e.g. -** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls -** are split up into two 32 bit arguments each. -** -** On soft-float targets, floating-point instructions are directly converted -** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX). -** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump). -** -** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with -** two int64_t fields: -** -** 0100 p32 ADD base +8 -** 0101 i64 XLOAD 0100 -** 0102 i64 ADD 0101 +1 -** 0103 p32 ADD base +16 -** 0104 i64 XSTORE 0103 0102 -** -** mov rax, [esi+0x8] -** add rax, +0x01 -** mov [esi+0x10], rax -** -** Here's the transformed IR and the x86 machine code after the SPLIT pass: -** -** 0100 p32 ADD base +8 -** 0101 int XLOAD 0100 -** 0102 p32 ADD base +12 -** 0103 int XLOAD 0102 -** 0104 int ADD 0101 +1 -** 0105 int HIOP 0103 +0 -** 0106 p32 ADD base +16 -** 0107 int XSTORE 0106 0104 -** 0108 int HIOP 0106 0105 -** -** mov eax, [esi+0x8] -** mov ecx, [esi+0xc] -** add eax, +0x01 -** adc ecx, +0x00 -** mov [esi+0x10], eax -** mov [esi+0x14], ecx -** -** You may notice the reassociated hiword address computation, which is -** later fused into the mov operands by the assembler. -*/ - -/* Some local macros to save typing. Undef'd at the end. */ -#define IR(ref) (&J->cur.ir[(ref)]) - -/* Directly emit the transformed IR without updating chains etc. */ -static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2) -{ - IRRef nref = lj_ir_nextins(J); - IRIns *ir = IR(nref); - ir->ot = ot; - ir->op1 = op1; - ir->op2 = op2; - return nref; -} - -#if LJ_SOFTFP -/* Emit a (checked) number to integer conversion. */ -static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check) -{ - IRRef tmp, res; -#if LJ_LE - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi); -#else - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo); -#endif - res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i); - if (check) { - tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d); - split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); - split_emit(J, IRTGI(IR_EQ), tmp, lo); - split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi); - } - return res; -} - -/* Emit a CALLN with one split 64 bit argument. */ -static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir, - IRIns *ir, IRCallID id) -{ - IRRef tmp, op1 = ir->op1; - J->cur.nins--; -#if LJ_LE - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); -#else - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); -#endif - ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); - return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); -} -#endif - -/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ -static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, - IRIns *ir, IRCallID id) -{ - IRRef tmp, op1 = ir->op1, op2 = ir->op2; - J->cur.nins--; -#if LJ_LE - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); -#else - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); -#endif - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); - ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); - return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); -} - -/* Emit a CALLN with two split 64 bit arguments. */ -static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, - IRIns *ir, IRCallID id) -{ - IRRef tmp, op1 = ir->op1, op2 = ir->op2; - J->cur.nins--; -#if LJ_LE - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); -#else - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); -#endif - ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); - return split_emit(J, - IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), - tmp, tmp); -} - -/* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ -static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref) -{ - IRRef nref = oir[ref].prev; - IRIns *ir = IR(nref); - int32_t ofs = 4; - if (ir->o == IR_KPTR) - return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs); - if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) { - /* Reassociate address. */ - ofs += IR(ir->op2)->i; - nref = ir->op1; - if (ofs == 0) return nref; - } - return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs)); -} - -#if LJ_HASFFI -static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, - IRIns *oir, IRIns *nir, IRIns *ir) -{ - IROp op = ir->o; - IRRef kref = nir->op2; - if (irref_isk(kref)) { /* Optimize constant shifts. */ - int32_t k = (IR(kref)->i & 63); - IRRef lo = nir->op1, hi = hisubst[ir->op1]; - if (op == IR_BROL || op == IR_BROR) { - if (op == IR_BROR) k = (-k & 63); - if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; } - if (k == 0) { - passthrough: - J->cur.nins--; - ir->prev = lo; - return hi; - } else { - TRef k1, k2; - IRRef t1, t2, t3, t4; - J->cur.nins--; - k1 = lj_ir_kint(J, k); - k2 = lj_ir_kint(J, (-k & 31)); - t1 = split_emit(J, IRTI(IR_BSHL), lo, k1); - t2 = split_emit(J, IRTI(IR_BSHL), hi, k1); - t3 = split_emit(J, IRTI(IR_BSHR), lo, k2); - t4 = split_emit(J, IRTI(IR_BSHR), hi, k2); - ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4); - return split_emit(J, IRTI(IR_BOR), t2, t3); - } - } else if (k == 0) { - goto passthrough; - } else if (k < 32) { - if (op == IR_BSHL) { - IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref); - IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31))); - return split_emit(J, IRTI(IR_BOR), t1, t2); - } else { - IRRef t1 = ir->prev, t2; - lua_assert(op == IR_BSHR || op == IR_BSAR); - nir->o = IR_BSHR; - t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31))); - ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2); - return split_emit(J, IRTI(op), hi, kref); - } - } else { - if (op == IR_BSHL) { - if (k == 32) - J->cur.nins--; - else - lo = ir->prev; - ir->prev = lj_ir_kint(J, 0); - return lo; - } else { - lua_assert(op == IR_BSHR || op == IR_BSAR); - if (k == 32) { - J->cur.nins--; - ir->prev = hi; - } else { - nir->op1 = hi; - } - if (op == IR_BSHR) - return lj_ir_kint(J, 0); - else - return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31)); - } - } - } - return split_call_li(J, hisubst, oir, ir, - op - IR_BSHL + IRCALL_lj_carith_shl64); -} - -static IRRef split_bitop(jit_State *J, IRRef1 *hisubst, - IRIns *nir, IRIns *ir) -{ - IROp op = ir->o; - IRRef hi, kref = nir->op2; - if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */ - int32_t k = IR(kref)->i; - if (k == 0 || k == -1) { - if (op == IR_BAND) k = ~k; - if (k == 0) { - J->cur.nins--; - ir->prev = nir->op1; - } else if (op == IR_BXOR) { - nir->o = IR_BNOT; - nir->op2 = 0; - } else { - J->cur.nins--; - ir->prev = kref; - } - } - } - hi = hisubst[ir->op1]; - kref = hisubst[ir->op2]; - if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */ - int32_t k = IR(kref)->i; - if (k == 0 || k == -1) { - if (op == IR_BAND) k = ~k; - if (k == 0) { - return hi; - } else if (op == IR_BXOR) { - return split_emit(J, IRTI(IR_BNOT), hi, 0); - } else { - return kref; - } - } - } - return split_emit(J, IRTI(op), hi, kref); -} -#endif - -/* Substitute references of a snapshot. */ -static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) -{ - SnapEntry *map = &J->cur.snapmap[snap->mapofs]; - MSize n, nent = snap->nent; - for (n = 0; n < nent; n++) { - SnapEntry sn = map[n]; - IRIns *ir = &oir[snap_ref(sn)]; - if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn)))) - map[n] = ((sn & 0xffff0000) | ir->prev); - } -} - -/* Transform the old IR to the new IR. */ -static void split_ir(jit_State *J) -{ - IRRef nins = J->cur.nins, nk = J->cur.nk; - MSize irlen = nins - nk; - MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); - IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need); - IRRef1 *hisubst; - IRRef ref, snref; - SnapShot *snap; - - /* Copy old IR to buffer. */ - memcpy(oir, IR(nk), irlen*sizeof(IRIns)); - /* Bias hiword substitution table and old IR. Loword kept in field prev. */ - hisubst = (IRRef1 *)&oir[irlen] - nk; - oir -= nk; - - /* Remove all IR instructions, but retain IR constants. */ - J->cur.nins = REF_FIRST; - J->loopref = 0; - - /* Process constants and fixed references. */ - for (ref = nk; ref <= REF_BASE; ref++) { - IRIns *ir = &oir[ref]; - if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { - /* Split up 64 bit constant. */ - TValue tv = *ir_k64(ir); - ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); - hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); - } else { - ir->prev = ref; /* Identity substitution for loword. */ - hisubst[ref] = 0; - } - if (irt_is64(ir->t) && ir->o != IR_KNULL) - ref++; - } - - /* Process old IR instructions. */ - snap = J->cur.snap; - snref = snap->ref; - for (ref = REF_FIRST; ref < nins; ref++) { - IRIns *ir = &oir[ref]; - IRRef nref = lj_ir_nextins(J); - IRIns *nir = IR(nref); - IRRef hi = 0; - - if (ref >= snref) { - snap->ref = nref; - split_subst_snap(J, snap++, oir); - snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0; - } - - /* Copy-substitute old instruction to new instruction. */ - nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; - nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; - ir->prev = nref; /* Loword substitution. */ - nir->o = ir->o; - nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); - hisubst[ref] = 0; - - /* Split 64 bit instructions. */ -#if LJ_SOFTFP - if (irt_isnum(ir->t)) { - nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ - /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ - switch (ir->o) { - case IR_ADD: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); - break; - case IR_SUB: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); - break; - case IR_MUL: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); - break; - case IR_DIV: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); - break; - case IR_POW: - hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); - break; - case IR_FPMATH: - /* Try to rejoin pow from EXP2, MUL and LOG2. */ - if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { - IRIns *irp = IR(nir->op1); - if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { - IRIns *irm4 = IR(irp->op1); - IRIns *irm3 = IR(irm4->op1); - IRIns *irm12 = IR(irm3->op1); - IRIns *irl1 = IR(irm12->op1); - if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && - irl1->op2 == IRCALL_lj_vm_log2) { - IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ - IRRef arg3 = irm3->op2, arg4 = irm4->op2; - J->cur.nins--; - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); - ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); - hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); - break; - } - } - } - hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); - break; - case IR_ATAN2: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); - break; - case IR_LDEXP: - hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); - break; - case IR_NEG: case IR_ABS: - nir->o = IR_CONV; /* Pass through loword. */ - nir->op2 = (IRT_INT << 5) | IRT_INT; - hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), - hisubst[ir->op1], - lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG)))); - break; - case IR_SLOAD: - if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ - nir->op2 &= ~IRSLOAD_CONVERT; - ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref, - IRCALL_softfp_i2d); - hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); - break; - } - /* fallthrough */ - case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - case IR_STRTO: - hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); - break; - case IR_FLOAD: - lua_assert(ir->op1 == REF_NIL); - hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4)); - nir->op2 += LJ_BE*4; - break; - case IR_XLOAD: { - IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ - J->cur.nins--; - hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ -#if LJ_BE - hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2); - inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD); -#endif - nref = lj_ir_nextins(J); - nir = IR(nref); - *nir = inslo; /* Re-emit lo XLOAD. */ -#if LJ_LE - hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); - ir->prev = nref; -#else - ir->prev = hi; hi = nref; -#endif - break; - } - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE: - split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); - break; - case IR_CONV: { /* Conversion to number. Others handled below. */ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); - UNUSED(st); -#if LJ_32 && LJ_HASFFI - if (st == IRT_I64 || st == IRT_U64) { - hi = split_call_l(J, hisubst, oir, ir, - st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d); - break; - } -#endif - lua_assert(st == IRT_INT || - (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); - nir->o = IR_CALLN; -#if LJ_32 && LJ_HASFFI - nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : - st == IRT_FLOAT ? IRCALL_softfp_f2d : - IRCALL_softfp_ui2d; -#else - nir->op2 = IRCALL_softfp_i2d; -#endif - hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); - break; - } - case IR_CALLN: - case IR_CALLL: - case IR_CALLS: - case IR_CALLXS: - goto split_call; - case IR_PHI: - if (nir->op1 == nir->op2) - J->cur.nins--; /* Drop useless PHIs. */ - if (hisubst[ir->op1] != hisubst[ir->op2]) - split_emit(J, IRT(IR_PHI, IRT_SOFTFP), - hisubst[ir->op1], hisubst[ir->op2]); - break; - case IR_HIOP: - J->cur.nins--; /* Drop joining HIOP. */ - ir->prev = nir->op1; - hi = nir->op2; - break; - default: - lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); - hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), - hisubst[ir->op1], hisubst[ir->op2]); - break; - } - } else -#endif -#if LJ_32 && LJ_HASFFI - if (irt_isint64(ir->t)) { - IRRef hiref = hisubst[ir->op1]; - nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ - switch (ir->o) { - case IR_ADD: - case IR_SUB: - /* Use plain op for hiword if loword cannot produce a carry/borrow. */ - if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { - ir->prev = nir->op1; /* Pass through loword. */ - nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; - hi = nref; - break; - } - /* fallthrough */ - case IR_NEG: - hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); - break; - case IR_MUL: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); - break; - case IR_DIV: - hi = split_call_ll(J, hisubst, oir, ir, - irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - break; - case IR_MOD: - hi = split_call_ll(J, hisubst, oir, ir, - irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - break; - case IR_POW: - hi = split_call_ll(J, hisubst, oir, ir, - irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - break; - case IR_BNOT: - hi = split_emit(J, IRTI(IR_BNOT), hiref, 0); - break; - case IR_BSWAP: - ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0); - hi = nref; - break; - case IR_BAND: case IR_BOR: case IR_BXOR: - hi = split_bitop(J, hisubst, nir, ir); - break; - case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: - hi = split_bitshift(J, hisubst, oir, nir, ir); - break; - case IR_FLOAD: - lua_assert(ir->op2 == IRFL_CDATA_INT64); - hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); -#if LJ_BE - ir->prev = hi; hi = nref; -#endif - break; - case IR_XLOAD: - hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2); -#if LJ_BE - ir->prev = hi; hi = nref; -#endif - break; - case IR_XSTORE: - split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]); - break; - case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); -#if LJ_SOFTFP - if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ - hi = split_call_l(J, hisubst, oir, ir, - irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); - } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ - nir->o = IR_CALLN; - nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; - hi = split_emit(J, IRTI(IR_HIOP), nref, nref); - } -#else - if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ - hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); - } -#endif - else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ - /* Drop cast, since assembler doesn't care. But fwd both parts. */ - hi = hiref; - goto fwdlo; - } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ - IRRef k31 = lj_ir_kint(J, 31); - nir = IR(nref); /* May have been reallocated. */ - ir->prev = nir->op1; /* Pass through loword. */ - nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ - nir->op2 = k31; - hi = nref; - } else { /* Zero-extend to 64 bit. */ - hi = lj_ir_kint(J, 0); - goto fwdlo; - } - break; - } - case IR_CALLXS: - goto split_call; - case IR_PHI: { - IRRef hiref2; - if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || - nir->op1 == nir->op2) - J->cur.nins--; /* Drop useless PHIs. */ - hiref2 = hisubst[ir->op2]; - if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) - split_emit(J, IRTI(IR_PHI), hiref, hiref2); - break; - } - case IR_HIOP: - J->cur.nins--; /* Drop joining HIOP. */ - ir->prev = nir->op1; - hi = nir->op2; - break; - default: - lua_assert(ir->o <= IR_NE); /* Comparisons. */ - split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); - break; - } - } else -#endif -#if LJ_SOFTFP - if (ir->o == IR_SLOAD) { - if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */ - nir->op2 &= ~IRSLOAD_CONVERT; - if (!(nir->op2 & IRSLOAD_TYPECHECK)) - nir->t.irt = IRT_INT; /* Drop guard. */ - split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); - ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t)); - } - } else if (ir->o == IR_TOBIT) { - IRRef tmp, op1 = ir->op1; - J->cur.nins--; -#if LJ_LE - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); -#else - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); -#endif - ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); - } else if (ir->o == IR_TOSTR) { - if (hisubst[ir->op1]) { - if (irref_isk(ir->op1)) - nir->op1 = ir->op1; - else - split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref); - } - } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) { - if (irref_isk(ir->op2) && hisubst[ir->op2]) - nir->op2 = ir->op2; - } else -#endif - if (ir->o == IR_CONV) { /* See above, too. */ - IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); -#if LJ_32 && LJ_HASFFI - if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ -#if LJ_SOFTFP - if (irt_isfloat(ir->t)) { - split_call_l(J, hisubst, oir, ir, - st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f); - J->cur.nins--; /* Drop unused HIOP. */ - } -#else - if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ - ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), - hisubst[ir->op1], nref); - } -#endif - else { /* Truncate to lower 32 bits. */ - fwdlo: - ir->prev = nir->op1; /* Forward loword. */ - /* Replace with NOP to avoid messing up the snapshot logic. */ - nir->ot = IRT(IR_NOP, IRT_NIL); - nir->op1 = nir->op2 = 0; - } - } -#endif -#if LJ_SOFTFP && LJ_32 && LJ_HASFFI - else if (irt_isfloat(ir->t)) { - if (st == IRT_NUM) { - split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); - J->cur.nins--; /* Drop unused HIOP. */ - } else { - nir->o = IR_CALLN; - nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; - } - } else if (st == IRT_FLOAT) { - nir->o = IR_CALLN; - nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; - } else -#endif -#if LJ_SOFTFP - if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { - if (irt_isguard(ir->t)) { - lua_assert(st == IRT_NUM && irt_isint(ir->t)); - J->cur.nins--; - ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); - } else { - split_call_l(J, hisubst, oir, ir, -#if LJ_32 && LJ_HASFFI - st == IRT_NUM ? - (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : - (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) -#else - IRCALL_softfp_d2i -#endif - ); - J->cur.nins--; /* Drop unused HIOP. */ - } - } -#endif - } else if (ir->o == IR_CALLXS) { - IRRef hiref; - split_call: - hiref = hisubst[ir->op1]; - if (hiref) { - IROpT ot = nir->ot; - IRRef op2 = nir->op2; - nir->ot = IRT(IR_CARG, IRT_NIL); -#if LJ_LE - nir->op2 = hiref; -#else - nir->op2 = nir->op1; nir->op1 = hiref; -#endif - ir->prev = nref = split_emit(J, ot, nref, op2); - } - if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) - hi = split_emit(J, - IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), - nref, nref); - } else if (ir->o == IR_CARG) { - IRRef hiref = hisubst[ir->op1]; - if (hiref) { - IRRef op2 = nir->op2; -#if LJ_LE - nir->op2 = hiref; -#else - nir->op2 = nir->op1; nir->op1 = hiref; -#endif - ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); - nir = IR(nref); - } - hiref = hisubst[ir->op2]; - if (hiref) { -#if !LJ_TARGET_X86 - int carg = 0; - IRIns *cir; - for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1)) - carg++; - if ((carg & 1) == 0) { /* Align 64 bit arguments. */ - IRRef op2 = nir->op2; - nir->op2 = REF_NIL; - nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); - nir = IR(nref); - } -#endif -#if LJ_BE - { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; } -#endif - ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref); - } - } else if (ir->o == IR_CNEWI) { - if (hisubst[ir->op2]) - split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); - } else if (ir->o == IR_LOOP) { - J->loopref = nref; /* Needed by assembler. */ - } - hisubst[ref] = hi; /* Store hiword substitution. */ - } - if (snref == nins) { /* Substitution for last snapshot. */ - snap->ref = J->cur.nins; - split_subst_snap(J, snap, oir); - } - - /* Add PHI marks. */ - for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { - IRIns *ir = IR(ref); - if (ir->o != IR_PHI) break; - if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); - if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); - } -} - -/* Protected callback for split pass. */ -static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud) -{ - jit_State *J = (jit_State *)ud; - split_ir(J); - UNUSED(L); UNUSED(dummy); - return NULL; -} - -#if defined(LUA_USE_ASSERT) || LJ_SOFTFP -/* Slow, but sure way to check whether a SPLIT pass is needed. */ -static int split_needsplit(jit_State *J) -{ - IRIns *ir, *irend; - IRRef ref; - for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++) - if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t)) - return 1; - if (LJ_SOFTFP) { - for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev) - if ((IR(ref)->op2 & IRSLOAD_CONVERT)) - return 1; - if (J->chain[IR_TOBIT]) - return 1; - } - for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) { - IRType st = (IR(ref)->op2 & IRCONV_SRCMASK); - if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) || - st == IRT_I64 || st == IRT_U64) - return 1; - } - return 0; /* Nope. */ -} -#endif - -/* SPLIT pass. */ -void lj_opt_split(jit_State *J) -{ -#if LJ_SOFTFP - if (!J->needsplit) - J->needsplit = split_needsplit(J); -#else - lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ -#endif - if (J->needsplit) { - int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); - if (errcode) { - /* Completely reset the trace to avoid inconsistent dump on abort. */ - J->cur.nins = J->cur.nk = REF_BASE; - J->cur.nsnap = 0; - lj_err_throw(J->L, errcode); /* Propagate errors. */ - } - } -} - -#undef IR - -#endif diff --git a/src/lj_parse.c b/src/lj_parse.c index 08f7cfa6ac..d9c3f235f5 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -19,14 +19,12 @@ #include "lj_func.h" #include "lj_state.h" #include "lj_bc.h" -#if LJ_HASFFI #include "lj_ctype.h" -#endif #include "lj_strfmt.h" #include "lj_lex.h" #include "lj_parse.h" #include "lj_vm.h" -#include "lj_vmevent.h" +#include "lj_auditlog.h" /* -- Parser structures and definitions ----------------------------------- */ @@ -91,7 +89,7 @@ static LJ_AINLINE void expr_init(ExpDesc *e, ExpKind k, uint32_t info) static int expr_numiszero(ExpDesc *e) { TValue *o = expr_numtv(e); - return tvisint(o) ? (intV(o) == 0) : tviszero(o); + return tviszero(o); } /* Per-function linked list of scope blocks. */ @@ -241,7 +239,6 @@ GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t len) return s; } -#if LJ_HASFFI /* Anchor cdata to avoid GC. */ void lj_parse_keepcdata(LexState *ls, TValue *tv, GCcdata *cd) { @@ -250,7 +247,6 @@ void lj_parse_keepcdata(LexState *ls, TValue *tv, GCcdata *cd) setcdataV(L, tv, cd); setboolV(lj_tab_set(L, ls->fs->kt, tv), 1); } -#endif /* -- Jump list handling -------------------------------------------------- */ @@ -510,25 +506,16 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg) if (e->k == VKSTR) { ins = BCINS_AD(BC_KSTR, reg, const_str(fs, e)); } else if (e->k == VKNUM) { -#if LJ_DUALNUM - cTValue *tv = expr_numtv(e); - if (tvisint(tv) && checki16(intV(tv))) - ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)intV(tv)); - else -#else lua_Number n = expr_numberV(e); int32_t k = lj_num2int(n); if (checki16(k) && n == (lua_Number)k) ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k); else -#endif ins = BCINS_AD(BC_KNUM, reg, const_num(fs, e)); -#if LJ_HASFFI } else if (e->k == VKCDATA) { fs->flags |= PROTO_FFI; ins = BCINS_AD(BC_KCDATA, reg, const_gc(fs, obj2gco(cdataV(&e->u.nval)), LJ_TCDATA)); -#endif } else if (e->k == VRELOCABLE) { setbc_a(bcptr(fs, e), reg); goto noins; @@ -772,13 +759,6 @@ static int foldarith(BinOpr opr, ExpDesc *e1, ExpDesc *e2) n = lj_vm_foldarith(expr_numberV(e1), expr_numberV(e2), (int)opr-OPR_ADD); setnumV(&o, n); if (tvisnan(&o) || tvismzero(&o)) return 0; /* Avoid NaN and -0 as consts. */ - if (LJ_DUALNUM) { - int32_t k = lj_num2int(n); - if ((lua_Number)k == n) { - setintV(&e1->u.nval, k); - return 1; - } - } setnumV(&e1->u.nval, n); return 1; } @@ -930,7 +910,7 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e) if (e->k == VKNIL || e->k == VKFALSE) { e->k = VKTRUE; return; - } else if (expr_isk(e) || (LJ_HASFFI && e->k == VKCDATA)) { + } else if (expr_isk(e) || e->k == VKCDATA) { e->k = VKFALSE; return; } else if (e->k == VJMP) { @@ -947,7 +927,6 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e) } else { lua_assert(op == BC_UNM || op == BC_LEN); if (op == BC_UNM && !expr_hasjump(e)) { /* Constant-fold negations. */ -#if LJ_HASFFI if (e->k == VKCDATA) { /* Fold in-place since cdata is not interned. */ GCcdata *cd = cdataV(&e->u.nval); int64_t *p = (int64_t *)cdataptr(cd); @@ -957,20 +936,10 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e) *p = -*p; return; } else -#endif if (expr_isnumk(e) && !expr_numiszero(e)) { /* Avoid folding to -0. */ TValue *o = expr_numtv(e); - if (tvisint(o)) { - int32_t k = intV(o); - if (k == -k) - setnumV(o, -(lua_Number)k); - else - setintV(o, -k); - return; - } else { - o->u64 ^= U64x(80000000,00000000); - return; - } + o->u64 ^= U64x(80000000,00000000); + return; } } expr_toanyreg(fs, e); @@ -1350,10 +1319,7 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr) for (i = 0; i < kt->asize; i++) if (tvhaskslot(&array[i])) { TValue *tv = &((TValue *)kptr)[tvkslot(&array[i])]; - if (LJ_DUALNUM) - setintV(tv, (int32_t)i); - else - setnumV(tv, (lua_Number)i); + setnumV(tv, (lua_Number)i); } node = noderef(kt->node); hmask = kt->hmask; @@ -1361,20 +1327,9 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr) Node *n = &node[i]; if (tvhaskslot(&n->val)) { ptrdiff_t kidx = (ptrdiff_t)tvkslot(&n->val); - lua_assert(!tvisint(&n->key)); if (tvisnum(&n->key)) { TValue *tv = &((TValue *)kptr)[kidx]; - if (LJ_DUALNUM) { - lua_Number nn = numV(&n->key); - int32_t k = lj_num2int(nn); - lua_assert(!tvismzero(&n->key)); - if ((lua_Number)k == nn) - setintV(tv, k); - else - *tv = n->key; - } else { - *tv = n->key; - } + *tv = n->key; } else { GCobj *o = gcV(&n->key); setgcref(((GCRef *)kptr)[~kidx], o); @@ -1394,49 +1349,26 @@ static void fs_fixup_uv1(FuncState *fs, GCproto *pt, uint16_t *uv) memcpy(uv, fs->uvtmp, fs->nuv*sizeof(VarIndex)); } -#ifndef LUAJIT_DISABLE_DEBUGINFO /* Prepare lineinfo for prototype. */ static size_t fs_prep_line(FuncState *fs, BCLine numline) { - return (fs->pc-1) << (numline < 256 ? 0 : numline < 65536 ? 1 : 2); + return (fs->pc+1) * sizeof(BCLine); } /* Fixup lineinfo for prototype. */ static void fs_fixup_line(FuncState *fs, GCproto *pt, - void *lineinfo, BCLine numline) + uint32_t *lineinfo, BCLine numline) { - BCInsLine *base = fs->bcbase + 1; - BCLine first = fs->linedefined; - MSize i = 0, n = fs->pc-1; + int i; pt->firstline = fs->linedefined; pt->numline = numline; setmref(pt->lineinfo, lineinfo); - if (LJ_LIKELY(numline < 256)) { - uint8_t *li = (uint8_t *)lineinfo; - do { - BCLine delta = base[i].line - first; - lua_assert(delta >= 0 && delta < 256); - li[i] = (uint8_t)delta; - } while (++i < n); - } else if (LJ_LIKELY(numline < 65536)) { - uint16_t *li = (uint16_t *)lineinfo; - do { - BCLine delta = base[i].line - first; - lua_assert(delta >= 0 && delta < 65536); - li[i] = (uint16_t)delta; - } while (++i < n); - } else { - uint32_t *li = (uint32_t *)lineinfo; - do { - BCLine delta = base[i].line - first; - lua_assert(delta >= 0); - li[i] = (uint32_t)delta; - } while (++i < n); - } + for (i = 0; i <= fs->pc; i++) lineinfo[i] = fs->bcbase[i].line; } /* Prepare variable info for prototype. */ -static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) +static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar, + size_t *ofsdeclname, const char *declname) { VarInfo *vs =ls->vstack, *ve; MSize i, n; @@ -1474,27 +1406,26 @@ static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) } } lj_buf_putb(&ls->sb, '\0'); /* Terminator for varinfo. */ + /* Store function declaration name. */ + *ofsdeclname = sbuflen(&ls->sb); + { + char *p; + int len = strlen(declname) + 1; + p = lj_buf_more(&ls->sb, len); + p = lj_buf_wmem(p, declname, len); + setsbufP(&ls->sb, p); + } return sbuflen(&ls->sb); } /* Fixup variable info for prototype. */ -static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar) +static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar, size_t ofsdeclname) { setmref(pt->uvinfo, p); setmref(pt->varinfo, (char *)p + ofsvar); + setmref(pt->declname, (char*)p + ofsdeclname); memcpy(p, sbufB(&ls->sb), sbuflen(&ls->sb)); /* Copy from temp. buffer. */ } -#else - -/* Initialize with empty debug info, if disabled. */ -#define fs_prep_line(fs, numline) (UNUSED(numline), 0) -#define fs_fixup_line(fs, pt, li, numline) \ - pt->firstline = pt->numline = 0, setmref((pt)->lineinfo, NULL) -#define fs_prep_var(ls, fs, ofsvar) (UNUSED(ofsvar), 0) -#define fs_fixup_var(ls, pt, p, ofsvar) \ - setmref((pt)->uvinfo, NULL), setmref((pt)->varinfo, NULL) - -#endif /* Check if bytecode op returns. */ static int bcopisret(BCOp op) @@ -1547,12 +1478,12 @@ static void fs_fixup_ret(FuncState *fs) } /* Finish a FuncState and return the new prototype. */ -static GCproto *fs_finish(LexState *ls, BCLine line) +static GCproto *fs_finish(LexState *ls, BCLine line, char *declname) { lua_State *L = ls->L; FuncState *fs = ls->fs; BCLine numline = line - fs->linedefined; - size_t sizept, ofsk, ofsuv, ofsli, ofsdbg, ofsvar; + size_t sizept, ofsk, ofsuv, ofsli, ofsdbg, ofsvar, ofsdeclname; GCproto *pt; /* Apply final fixups. */ @@ -1564,7 +1495,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line) ofsk = sizept; sizept += fs->nkn*sizeof(TValue); ofsuv = sizept; sizept += ((fs->nuv+1)&~1)*2; ofsli = sizept; sizept += fs_prep_line(fs, numline); - ofsdbg = sizept; sizept += fs_prep_var(ls, fs, &ofsvar); + ofsdbg = sizept; sizept += fs_prep_var(ls, fs, &ofsvar, &ofsdeclname, declname); /* Allocate prototype and initialize its fields. */ pt = (GCproto *)lj_mem_newgco(L, (MSize)sizept); @@ -1582,16 +1513,13 @@ static GCproto *fs_finish(LexState *ls, BCLine line) fs_fixup_k(fs, pt, (void *)((char *)pt + ofsk)); fs_fixup_uv1(fs, pt, (uint16_t *)((char *)pt + ofsuv)); fs_fixup_line(fs, pt, (void *)((char *)pt + ofsli), numline); - fs_fixup_var(ls, pt, (uint8_t *)((char *)pt + ofsdbg), ofsvar); - - lj_vmevent_send(L, BC, - setprotoV(L, L->top++, pt); - ); + fs_fixup_var(ls, pt, (uint8_t *)((char *)pt + ofsdbg), ofsvar, ofsdeclname); L->top--; /* Pop table of constants. */ ls->vtop = fs->vbase; /* Reset variable stack. */ ls->fs = fs->prev; lua_assert(ls->fs != NULL || ls->tok == TK_eof); + lj_auditlog_new_prototype(pt); return pt; } @@ -1638,22 +1566,12 @@ static void expr_index(FuncState *fs, ExpDesc *t, ExpDesc *e) /* Already called: expr_toval(fs, e). */ t->k = VINDEXED; if (expr_isnumk(e)) { -#if LJ_DUALNUM - if (tvisint(expr_numtv(e))) { - int32_t k = intV(expr_numtv(e)); - if (checku8(k)) { - t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */ - return; - } - } -#else lua_Number n = expr_numberV(e); int32_t k = lj_num2int(n); if (checku8(k) && n == (lua_Number)k) { t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */ return; } -#endif } else if (expr_isstrk(e)) { BCReg idx = const_str(fs, e); if (idx <= BCMAX_C) { @@ -1837,7 +1755,8 @@ static BCReg parse_params(LexState *ls, int needself) static void parse_chunk(LexState *ls); /* Parse body of a function. */ -static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line) +static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line, + char *declname) { FuncState fs, *pfs = ls->fs; FuncScope bl; @@ -1852,15 +1771,13 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line) bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */ parse_chunk(ls); if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line); - pt = fs_finish(ls, (ls->lastline = ls->linenumber)); + pt = fs_finish(ls, (ls->lastline = ls->linenumber), declname); pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */ pfs->bclim = (BCPos)(ls->sizebcstack - oldbase); /* Store new prototype in the constant array of the parent. */ expr_init(e, VRELOCABLE, bcemit_AD(pfs, BC_FNEW, 0, const_gc(pfs, obj2gco(pt), LJ_TPROTO))); -#if LJ_HASFFI pfs->flags |= (fs.flags & PROTO_FFI); -#endif if (!(pfs->flags & PROTO_CHILD)) { if (pfs->flags & PROTO_HAS_RETURN) pfs->flags |= PROTO_FIXUP_RETURN; @@ -1974,7 +1891,7 @@ static void expr_simple(LexState *ls, ExpDesc *v) { switch (ls->tok) { case TK_number: - expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokval)) ? VKCDATA : VKNUM, 0); + expr_init(v, tviscdata(&ls->tokval) ? VKCDATA : VKNUM, 0); copyTV(ls->L, &v->u.nval, &ls->tokval); break; case TK_string: @@ -2005,7 +1922,7 @@ static void expr_simple(LexState *ls, ExpDesc *v) return; case TK_function: lj_lex_next(ls); - parse_body(ls, v, 0, ls->linenumber); + parse_body(ls, v, 0, ls->linenumber, ""); return; default: expr_primary(ls, v); @@ -2234,9 +2151,24 @@ static void parse_call_assign(LexState *ls) } } +/* Convert the logged input into a canonical function declaration name. */ +static char *log_declname(char *log) +{ + char *end = log + strlen(log) - 1; + /* Strip off trailing chars e.g. change "myfunc (X" to "myfunc". */ + while (end > log && *end != '(') end--; /* Rewind to '('. */ + if (*end != '(') return log; else end--; /* Skip '(' if present. */ + while (end > log && *end == ' ') end--; /* Rewind over spaces. */ + *(end+1) = '\0'; /* Truncate. */ + return log; +} + /* Parse 'local' statement. */ static void parse_local(LexState *ls) { + char log[128]; + memset(&log[0], 0, sizeof(log)); + lj_lex_log(ls, &log[0], sizeof(log)-1); if (lex_opt(ls, TK_function)) { /* Local function declaration. */ ExpDesc v, b; FuncState *fs = ls->fs; @@ -2245,7 +2177,8 @@ static void parse_local(LexState *ls) v.u.s.aux = fs->varmap[fs->freereg]; bcreg_reserve(fs, 1); var_add(ls, 1); - parse_body(ls, &b, 0, ls->linenumber); + lj_lex_endlog(ls); /* Captured declared function name. */ + parse_body(ls, &b, 0, ls->linenumber, log_declname(log)); /* bcemit_store(fs, &v, &b) without setting VSTACK_VAR_RW. */ expr_free(fs, &b); expr_toreg(fs, &b, v.u.s.info); @@ -2254,6 +2187,7 @@ static void parse_local(LexState *ls) } else { /* Local variable declaration. */ ExpDesc e; BCReg nexps, nvars = 0; + lj_lex_endlog(ls); /* Not used for variables. */ do { /* Collect LHS. */ var_new(ls, nvars++, lex_str(ls)); } while (lex_opt(ls, ',')); @@ -2274,6 +2208,9 @@ static void parse_func(LexState *ls, BCLine line) FuncState *fs; ExpDesc v, b; int needself = 0; + char log[128]; + memset(log, 0, sizeof(log)); + lj_lex_log(ls, log, sizeof(log)-1); lj_lex_next(ls); /* Skip 'function'. */ /* Parse function name. */ var_lookup(ls, &v); @@ -2283,7 +2220,8 @@ static void parse_func(LexState *ls, BCLine line) needself = 1; expr_field(ls, &v); } - parse_body(ls, &b, needself, line); + lj_lex_endlog(ls); + parse_body(ls, &b, needself, line, log_declname(log)); fs = ls->fs; bcemit_store(fs, &v, &b); fs->bcbase[fs->pc - 1].line = line; /* Set line for the store. */ @@ -2697,11 +2635,7 @@ GCproto *lj_parse(LexState *ls) FuncScope bl; GCproto *pt; lua_State *L = ls->L; -#ifdef LUAJIT_DISABLE_DEBUGINFO - ls->chunkname = lj_str_newlit(L, "="); -#else ls->chunkname = lj_str_newz(L, ls->chunkarg); -#endif setstrV(L, L->top, ls->chunkname); /* Anchor chunkname string. */ incr_top(L); ls->level = 0; @@ -2717,7 +2651,7 @@ GCproto *lj_parse(LexState *ls) parse_chunk(ls); if (ls->tok != TK_eof) err_token(ls, TK_eof); - pt = fs_finish(ls, ls->linenumber); + pt = fs_finish(ls, ls->linenumber, ""); L->top--; /* Drop chunkname. */ lua_assert(fs.prev == NULL); lua_assert(ls->fs == NULL); diff --git a/src/lj_parse.h b/src/lj_parse.h index ceeab6994f..acf3049a22 100644 --- a/src/lj_parse.h +++ b/src/lj_parse.h @@ -11,8 +11,6 @@ LJ_FUNC GCproto *lj_parse(LexState *ls); LJ_FUNC GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t l); -#if LJ_HASFFI LJ_FUNC void lj_parse_keepcdata(LexState *ls, TValue *tv, GCcdata *cd); -#endif #endif diff --git a/src/lj_profile.c b/src/lj_profile.c deleted file mode 100644 index 116998e1e8..0000000000 --- a/src/lj_profile.c +++ /dev/null @@ -1,368 +0,0 @@ -/* -** Low-overhead profiling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#define lj_profile_c -#define LUA_CORE - -#include "lj_obj.h" - -#if LJ_HASPROFILE - -#include "lj_buf.h" -#include "lj_frame.h" -#include "lj_debug.h" -#include "lj_dispatch.h" -#if LJ_HASJIT -#include "lj_jit.h" -#include "lj_trace.h" -#endif -#include "lj_profile.h" - -#include "luajit.h" - -#if LJ_PROFILE_SIGPROF - -#include -#include -#define profile_lock(ps) UNUSED(ps) -#define profile_unlock(ps) UNUSED(ps) - -#elif LJ_PROFILE_PTHREAD - -#include -#include -#if LJ_TARGET_PS3 -#include -#endif -#define profile_lock(ps) pthread_mutex_lock(&ps->lock) -#define profile_unlock(ps) pthread_mutex_unlock(&ps->lock) - -#elif LJ_PROFILE_WTHREAD - -#define WIN32_LEAN_AND_MEAN -#if LJ_TARGET_XBOX360 -#include -#include -#else -#include -#endif -typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int); -#define profile_lock(ps) EnterCriticalSection(&ps->lock) -#define profile_unlock(ps) LeaveCriticalSection(&ps->lock) - -#endif - -/* Profiler state. */ -typedef struct ProfileState { - global_State *g; /* VM state that started the profiler. */ - luaJIT_profile_callback cb; /* Profiler callback. */ - void *data; /* Profiler callback data. */ - SBuf sb; /* String buffer for stack dumps. */ - int interval; /* Sample interval in milliseconds. */ - int samples; /* Number of samples for next callback. */ - int vmstate; /* VM state when profile timer triggered. */ -#if LJ_PROFILE_SIGPROF - struct sigaction oldsa; /* Previous SIGPROF state. */ -#elif LJ_PROFILE_PTHREAD - pthread_mutex_t lock; /* g->hookmask update lock. */ - pthread_t thread; /* Timer thread. */ - int abort; /* Abort timer thread. */ -#elif LJ_PROFILE_WTHREAD -#if LJ_TARGET_WINDOWS - HINSTANCE wmm; /* WinMM library handle. */ - WMM_TPFUNC wmm_tbp; /* WinMM timeBeginPeriod function. */ - WMM_TPFUNC wmm_tep; /* WinMM timeEndPeriod function. */ -#endif - CRITICAL_SECTION lock; /* g->hookmask update lock. */ - HANDLE thread; /* Timer thread. */ - int abort; /* Abort timer thread. */ -#endif -} ProfileState; - -/* Sadly, we have to use a static profiler state. -** -** The SIGPROF variant needs a static pointer to the global state, anyway. -** And it would be hard to extend for multiple threads. You can still use -** multiple VMs in multiple threads, but only profile one at a time. -*/ -static ProfileState profile_state; - -/* Default sample interval in milliseconds. */ -#define LJ_PROFILE_INTERVAL_DEFAULT 10 - -/* -- Profiler/hook interaction ------------------------------------------- */ - -#if !LJ_PROFILE_SIGPROF -void LJ_FASTCALL lj_profile_hook_enter(global_State *g) -{ - ProfileState *ps = &profile_state; - if (ps->g) { - profile_lock(ps); - hook_enter(g); - profile_unlock(ps); - } else { - hook_enter(g); - } -} - -void LJ_FASTCALL lj_profile_hook_leave(global_State *g) -{ - ProfileState *ps = &profile_state; - if (ps->g) { - profile_lock(ps); - hook_leave(g); - profile_unlock(ps); - } else { - hook_leave(g); - } -} -#endif - -/* -- Profile callbacks --------------------------------------------------- */ - -/* Callback from profile hook (HOOK_PROFILE already cleared). */ -void LJ_FASTCALL lj_profile_interpreter(lua_State *L) -{ - ProfileState *ps = &profile_state; - global_State *g = G(L); - uint8_t mask; - profile_lock(ps); - mask = (g->hookmask & ~HOOK_PROFILE); - if (!(mask & HOOK_VMEVENT)) { - int samples = ps->samples; - ps->samples = 0; - g->hookmask = HOOK_VMEVENT; - lj_dispatch_update(g); - profile_unlock(ps); - ps->cb(ps->data, L, samples, ps->vmstate); /* Invoke user callback. */ - profile_lock(ps); - mask |= (g->hookmask & HOOK_PROFILE); - } - g->hookmask = mask; - lj_dispatch_update(g); - profile_unlock(ps); -} - -/* Trigger profile hook. Asynchronous call from OS-specific profile timer. */ -static void profile_trigger(ProfileState *ps) -{ - global_State *g = ps->g; - uint8_t mask; - profile_lock(ps); - ps->samples++; /* Always increment number of samples. */ - mask = g->hookmask; - if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT))) { /* Set profile hook. */ - int st = g->vmstate; - ps->vmstate = st >= 0 ? 'N' : - st == ~LJ_VMST_INTERP ? 'I' : - st == ~LJ_VMST_C ? 'C' : - st == ~LJ_VMST_GC ? 'G' : 'J'; - g->hookmask = (mask | HOOK_PROFILE); - lj_dispatch_update(g); - } - profile_unlock(ps); -} - -/* -- OS-specific profile timer handling ---------------------------------- */ - -#if LJ_PROFILE_SIGPROF - -/* SIGPROF handler. */ -static void profile_signal(int sig) -{ - UNUSED(sig); - profile_trigger(&profile_state); -} - -/* Start profiling timer. */ -static void profile_timer_start(ProfileState *ps) -{ - int interval = ps->interval; - struct itimerval tm; - struct sigaction sa; - tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000; - tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000; - setitimer(ITIMER_PROF, &tm, NULL); - sa.sa_flags = SA_RESTART; - sa.sa_handler = profile_signal; - sigemptyset(&sa.sa_mask); - sigaction(SIGPROF, &sa, &ps->oldsa); -} - -/* Stop profiling timer. */ -static void profile_timer_stop(ProfileState *ps) -{ - struct itimerval tm; - tm.it_value.tv_sec = tm.it_interval.tv_sec = 0; - tm.it_value.tv_usec = tm.it_interval.tv_usec = 0; - setitimer(ITIMER_PROF, &tm, NULL); - sigaction(SIGPROF, &ps->oldsa, NULL); -} - -#elif LJ_PROFILE_PTHREAD - -/* POSIX timer thread. */ -static void *profile_thread(ProfileState *ps) -{ - int interval = ps->interval; -#if !LJ_TARGET_PS3 - struct timespec ts; - ts.tv_sec = interval / 1000; - ts.tv_nsec = (interval % 1000) * 1000000; -#endif - while (1) { -#if LJ_TARGET_PS3 - sys_timer_usleep(interval * 1000); -#else - nanosleep(&ts, NULL); -#endif - if (ps->abort) break; - profile_trigger(ps); - } - return NULL; -} - -/* Start profiling timer thread. */ -static void profile_timer_start(ProfileState *ps) -{ - pthread_mutex_init(&ps->lock, 0); - ps->abort = 0; - pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps); -} - -/* Stop profiling timer thread. */ -static void profile_timer_stop(ProfileState *ps) -{ - ps->abort = 1; - pthread_join(ps->thread, NULL); - pthread_mutex_destroy(&ps->lock); -} - -#elif LJ_PROFILE_WTHREAD - -/* Windows timer thread. */ -static DWORD WINAPI profile_thread(void *psx) -{ - ProfileState *ps = (ProfileState *)psx; - int interval = ps->interval; -#if LJ_TARGET_WINDOWS - ps->wmm_tbp(interval); -#endif - while (1) { - Sleep(interval); - if (ps->abort) break; - profile_trigger(ps); - } -#if LJ_TARGET_WINDOWS - ps->wmm_tep(interval); -#endif - return 0; -} - -/* Start profiling timer thread. */ -static void profile_timer_start(ProfileState *ps) -{ -#if LJ_TARGET_WINDOWS - if (!ps->wmm) { /* Load WinMM library on-demand. */ - ps->wmm = LoadLibraryExA("winmm.dll", NULL, 0); - if (ps->wmm) { - ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod"); - ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod"); - if (!ps->wmm_tbp || !ps->wmm_tep) { - ps->wmm = NULL; - return; - } - } - } -#endif - InitializeCriticalSection(&ps->lock); - ps->abort = 0; - ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL); -} - -/* Stop profiling timer thread. */ -static void profile_timer_stop(ProfileState *ps) -{ - ps->abort = 1; - WaitForSingleObject(ps->thread, INFINITE); - DeleteCriticalSection(&ps->lock); -} - -#endif - -/* -- Public profiling API ------------------------------------------------ */ - -/* Start profiling. */ -LUA_API void luaJIT_profile_start(lua_State *L, const char *mode, - luaJIT_profile_callback cb, void *data) -{ - ProfileState *ps = &profile_state; - int interval = LJ_PROFILE_INTERVAL_DEFAULT; - while (*mode) { - int m = *mode++; - switch (m) { - case 'i': - interval = 0; - while (*mode >= '0' && *mode <= '9') - interval = interval * 10 + (*mode++ - '0'); - if (interval <= 0) interval = 1; - break; -#if LJ_HASJIT - case 'l': case 'f': - L2J(L)->prof_mode = m; - lj_trace_flushall(L); - break; -#endif - default: /* Ignore unknown mode chars. */ - break; - } - } - if (ps->g) { - luaJIT_profile_stop(L); - if (ps->g) return; /* Profiler in use by another VM. */ - } - ps->g = G(L); - ps->interval = interval; - ps->cb = cb; - ps->data = data; - ps->samples = 0; - lj_buf_init(L, &ps->sb); - profile_timer_start(ps); -} - -/* Stop profiling. */ -LUA_API void luaJIT_profile_stop(lua_State *L) -{ - ProfileState *ps = &profile_state; - global_State *g = ps->g; - if (G(L) == g) { /* Only stop profiler if started by this VM. */ - profile_timer_stop(ps); - g->hookmask &= ~HOOK_PROFILE; - lj_dispatch_update(g); -#if LJ_HASJIT - G2J(g)->prof_mode = 0; - lj_trace_flushall(L); -#endif - lj_buf_free(g, &ps->sb); - setmref(ps->sb.b, NULL); - setmref(ps->sb.e, NULL); - ps->g = NULL; - } -} - -/* Return a compact stack dump. */ -LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, - int depth, size_t *len) -{ - ProfileState *ps = &profile_state; - SBuf *sb = &ps->sb; - setsbufL(sb, L); - lj_buf_reset(sb); - lj_debug_dumpstack(L, sb, fmt, depth); - *len = (size_t)sbuflen(sb); - return sbufB(sb); -} - -#endif diff --git a/src/lj_profile.h b/src/lj_profile.h deleted file mode 100644 index 0cccfd78cd..0000000000 --- a/src/lj_profile.h +++ /dev/null @@ -1,21 +0,0 @@ -/* -** Low-overhead profiling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#ifndef _LJ_PROFILE_H -#define _LJ_PROFILE_H - -#include "lj_obj.h" - -#if LJ_HASPROFILE - -LJ_FUNC void LJ_FASTCALL lj_profile_interpreter(lua_State *L); -#if !LJ_PROFILE_SIGPROF -LJ_FUNC void LJ_FASTCALL lj_profile_hook_enter(global_State *g); -LJ_FUNC void LJ_FASTCALL lj_profile_hook_leave(global_State *g); -#endif - -#endif - -#endif diff --git a/src/lj_record.c b/src/lj_record.c index 9d0469c425..98e47afe85 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -8,21 +8,15 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_err.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_meta.h" #include "lj_frame.h" -#if LJ_HASFFI #include "lj_ctype.h" -#endif #include "lj_bc.h" #include "lj_ff.h" -#if LJ_HASPROFILE -#include "lj_debug.h" -#endif #include "lj_ir.h" #include "lj_jit.h" #include "lj_ircall.h" @@ -87,9 +81,9 @@ static void rec_check_slots(jit_State *J) BCReg s, nslots = J->baseslot + J->maxslot; int32_t depth = 0; cTValue *base = J->L->base - J->baseslot; - lua_assert(J->baseslot >= 1+LJ_FR2 && J->baseslot < LJ_MAX_JSLOTS); + lua_assert(J->baseslot >= 1+LJ_FR2); lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME)); - lua_assert(nslots < LJ_MAX_JSLOTS); + lua_assert(nslots <= LJ_MAX_JSLOTS); for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; if (tr) { @@ -103,31 +97,23 @@ static void rec_check_slots(jit_State *J) } if (s == 0) { lua_assert(tref_isfunc(tr)); -#if LJ_FR2 } else if (s == 1) { lua_assert((tr & ~TREF_FRAME) == 0); -#endif } else if ((tr & TREF_FRAME)) { GCfunc *fn = gco2func(frame_gc(tv)); BCReg delta = (BCReg)(tv - frame_prev(tv)); -#if LJ_FR2 if (ref) lua_assert(ir_knum(ir)->u64 == tv->u64); tr = J->slot[s-1]; ir = IR(tref_ref(tr)); -#endif lua_assert(tref_isfunc(tr)); if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) : (s == delta + LJ_FR2)); depth++; } else if ((tr & TREF_CONT)) { -#if LJ_FR2 if (ref) lua_assert(ir_knum(ir)->u64 == tv->u64); -#else - lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); -#endif lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME)); depth++; } else { @@ -219,8 +205,6 @@ TRef lj_record_constify(jit_State *J, cTValue *o) { if (tvisgcv(o)) return lj_ir_kgc(J, gcV(o), itype2irt(o)); - else if (tvisint(o)) - return lj_ir_kint(J, intV(o)); else if (tvisnum(o)) return lj_ir_knumint(J, numV(o)); else if (tvisbool(o)) @@ -242,7 +226,6 @@ typedef enum { static void canonicalize_slots(jit_State *J) { BCReg s; - if (LJ_DUALNUM) return; for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { TRef tr = J->slot[s]; if (tref_isinteger(tr)) { @@ -310,7 +293,7 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t) cTValue *tv = proto_knumtv(J->pt, bc_d(ins)); if (t == IRT_INT) { int32_t k = numberVint(tv); - if (tvisint(tv) || numV(tv) == (lua_Number)k) /* -0 is ok here. */ + if (numV(tv) == (lua_Number)k) /* -0 is ok here. */ return lj_ir_kint(J, k); return 0; /* Type mismatch. */ } else { @@ -327,7 +310,7 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t) /* Load and optionally convert a FORI argument from a slot. */ static TRef fori_load(jit_State *J, BCReg slot, IRType t, int mode) { - int conv = (tvisint(&J->L->base[slot]) != (t==IRT_INT)) ? IRSLOAD_CONVERT : 0; + int conv = (0 != (t==IRT_INT)) ? IRSLOAD_CONVERT : 0; return sloadt(J, (int32_t)slot, t + (((mode & IRSLOAD_TYPECHECK) || (conv && t == IRT_INT && !(mode >> 16))) ? @@ -353,7 +336,7 @@ static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot, */ static int rec_for_direction(cTValue *o) { - return (tvisint(o) ? intV(o) : (int32_t)o->u32.hi) >= 0; + return ((int32_t)o->u32.hi) >= 0; } /* Simulate the runtime behavior of the FOR loop iterator. */ @@ -421,12 +404,11 @@ static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev, cTValue *tv = &J->L->base[ra]; TRef idx = J->base[ra+FORL_IDX]; IRType t = idx ? tref_type(idx) : - (init || LJ_DUALNUM) ? lj_opt_narrow_forl(J, tv) : IRT_NUM; - int mode = IRSLOAD_INHERIT + - ((!LJ_DUALNUM || tvisint(tv) == (t == IRT_INT)) ? IRSLOAD_READONLY : 0); + init ? lj_opt_narrow_forl(J, tv) : IRT_NUM; + int mode = IRSLOAD_INHERIT + IRSLOAD_READONLY; TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); - int tc, dir = rec_for_direction(&tv[FORL_STEP]); + int dir = rec_for_direction(&tv[FORL_STEP]); lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); scev->t.irt = t; scev->dir = dir; @@ -434,17 +416,9 @@ static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev, scev->step = tref_ref(step); rec_for_check(J, t, dir, stop, step, init); scev->start = tref_ref(find_kinit(J, fori, ra+FORL_IDX, IRT_INT)); - tc = (LJ_DUALNUM && - !(scev->start && irref_isk(scev->stop) && irref_isk(scev->step) && - tvisint(&tv[FORL_IDX]) == (t == IRT_INT))) ? - IRSLOAD_TYPECHECK : 0; - if (tc) { - J->base[ra+FORL_STOP] = stop; - J->base[ra+FORL_STEP] = step; - } if (!idx) idx = fori_load(J, ra+FORL_IDX, t, - IRSLOAD_INHERIT + tc + (J->scev.start << 16)); + IRSLOAD_INHERIT + (J->scev.start << 16)); if (!init) J->base[ra+FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step); J->base[ra+FORL_EXT] = idx; @@ -479,8 +453,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) } else { /* Handle FORI/JFORI opcodes. */ BCReg i; lj_meta_for(J->L, tv); - t = (LJ_DUALNUM || tref_isint(tr[FORL_IDX])) ? lj_opt_narrow_forl(J, tv) : - IRT_NUM; + t = (tref_isint(tr[FORL_IDX])) ? lj_opt_narrow_forl(J, tv) : IRT_NUM; for (i = FORL_IDX; i <= FORL_STEP; i++) { if (!tr[i]) sload(J, ra+i); lua_assert(tref_isnumber_str(tr[i])); @@ -595,7 +568,8 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) /* Handle the case when an already compiled loop op is hit. */ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) { - if (J->parent == 0 && J->exitno == 0) { /* Root trace hit an inner loop. */ + /* Root trace hit an inner loop. */ + if (J->parent == 0 && J->exitno == 0 && !innerloopleft(J, J->startpc)) { /* Better let the inner loop spawn a side trace back here. */ lj_trace_err(J, LJ_TRERR_LINNER); } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ @@ -607,52 +581,6 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) } /* Side trace continues across a loop that's left or not entered. */ } -/* -- Record profiler hook checks ----------------------------------------- */ - -#if LJ_HASPROFILE - -/* Need to insert profiler hook check? */ -static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc) -{ - GCproto *ppt; - lua_assert(J->prof_mode == 'f' || J->prof_mode == 'l'); - if (!pt) - return 0; - ppt = J->prev_pt; - J->prev_pt = pt; - if (pt != ppt && ppt) { - J->prev_line = -1; - return 1; - } - if (J->prof_mode == 'l') { - BCLine line = lj_debug_line(pt, proto_bcpos(pt, pc)); - BCLine pline = J->prev_line; - J->prev_line = line; - if (pline != line) - return 1; - } - return 0; -} - -static void rec_profile_ins(jit_State *J, const BCIns *pc) -{ - if (J->prof_mode && rec_profile_need(J, J->pt, pc)) { - emitir(IRTG(IR_PROF, IRT_NIL), 0, 0); - lj_snap_add(J); - } -} - -static void rec_profile_ret(jit_State *J) -{ - if (J->prof_mode == 'f') { - emitir(IRTG(IR_PROF, IRT_NIL), 0, 0); - J->prev_pt = NULL; - lj_snap_add(J); - } -} - -#endif - /* -- Record calls and returns -------------------------------------------- */ /* Specialize to the runtime value of the called function or its prototype. */ @@ -707,19 +635,13 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) lj_trace_err(J, LJ_TRERR_NOMM); for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */ fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1]; -#if LJ_FR2 fbase[2] = fbase[0]; -#endif fbase[0] = ix.mobj; /* Replace function. */ functv = &ix.mobjv; } kfunc = rec_call_specialize(J, funcV(functv), fbase[0]); -#if LJ_FR2 fbase[0] = kfunc; fbase[1] = TREF_FRAME; -#else - fbase[0] = kfunc | TREF_FRAME; -#endif J->maxslot = (BCReg)nargs; } @@ -731,6 +653,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) J->framedepth++; J->base += func+1+LJ_FR2; J->baseslot += func+1+LJ_FR2; + if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) + lj_trace_err(J, LJ_TRERR_STACKOV); } /* Record tail call. */ @@ -919,12 +843,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) { BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize; -#if LJ_FR2 J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); J->base[top+1] = TREF_CONT; -#else - J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; -#endif J->framedepth++; for (s = J->maxslot; s < top; s++) J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ @@ -945,7 +865,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) /* The metatables of special userdata objects are treated as immutable. */ if (udtype != UDTYPE_USERDATA) { cTValue *mo; - if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { + if (udtype == UDTYPE_FFI_CLIB) { /* Specialize to the C library namespace object. */ emitir(IRTG(IR_EQ, IRT_PGC), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); } else { @@ -975,14 +895,10 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) return 0; /* No metamethod. */ } /* The cdata metatable is treated as immutable. */ - if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; -#if LJ_GC64 + if (tref_iscdata(ix->tab)) goto immutable_mt; /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB, GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)])); -#else - ix->mt = mix.tab = lj_ir_ktab(J, mt); -#endif goto nocheck; } ix->mt = mt ? mix.tab : TREF_NIL; @@ -1026,9 +942,7 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) } ok: base[0] = ix->mobj; -#if LJ_FR2 base[1] = 0; -#endif copyTV(J->L, basev+0, &ix->mobjv); lj_record_call(J, func, 2); return 0; /* No result yet. */ @@ -1156,7 +1070,6 @@ static void rec_mm_comp(jit_State *J, RecordIndex *ix, int op) } } -#if LJ_HASFFI /* Setup call to cdata comparison metamethod. */ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) { @@ -1172,7 +1085,6 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) lj_record_mm_lookup(J, ix, mm); rec_mm_callcomp(J, ix, op); } -#endif /* -- Indexed access ------------------------------------------------------ */ @@ -1294,7 +1206,7 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref, key = ix->key; if (tref_isnumber(key)) { int32_t k = numberVint(&ix->keyv); - if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k) + if (numV(&ix->keyv) != (lua_Number)k) k = LJ_MAX_ASIZE; if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */ TRef ikey = lj_opt_narrow_index(J, key); @@ -1497,7 +1409,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) keybarrier = 0; /* Previous non-nil value kept the key alive. */ } /* Convert int to number before storing. */ - if (!LJ_DUALNUM && tref_isinteger(ix->val)) + if (tref_isinteger(ix->val)) ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT); emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val); if (keybarrier || tref_isgcv(ix->val)) @@ -1545,7 +1457,6 @@ static int rec_upvalue_constify(jit_State *J, GCupval *uvp) if (uvp->immutable) { cTValue *o = uvval(uvp); /* Don't constify objects that may retain large amounts of memory. */ -#if LJ_HASFFI if (tviscdata(o)) { GCcdata *cd = cdataV(o); if (!cdataisv(cd) && !(cd->marked & LJ_GC_CDATA_FIN)) { @@ -1555,9 +1466,6 @@ static int rec_upvalue_constify(jit_State *J, GCupval *uvp) } return 0; } -#else - UNUSED(J); -#endif if (!(tvistab(o) || tvisudata(o) || tvisthread(o))) return 1; } @@ -1579,11 +1487,7 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) goto noconstify; kfunc = lj_ir_kfunc(J, J->fn); emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); -#if LJ_FR2 J->base[-2] = kfunc; -#else - J->base[-1] = kfunc | TREF_FRAME; -#endif fn = kfunc; } tr = lj_record_constify(J, uvval(uvp)); @@ -1628,7 +1532,7 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) return res; } else { /* Upvalue store. */ /* Convert int to number before storing. */ - if (!LJ_DUALNUM && tref_isinteger(val)) + if (tref_isinteger(val)) val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); emitir(IRT(IR_USTORE, tref_type(val)), uref, val); if (needbarrier && tref_isgcv(val)) @@ -1699,9 +1603,7 @@ static void rec_func_vararg(jit_State *J) if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */ -#if LJ_FR2 J->base[vframe-1] = TREF_FRAME; -#endif /* Copy fixarg slots up and set their original slots to nil. */ fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; for (s = 0; s < fixargs; s++) { @@ -1943,15 +1845,11 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond) const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ -#if LJ_FR2 SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent]; uint64_t pcbase; memcpy(&pcbase, flink, sizeof(uint64_t)); pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8); memcpy(flink, &pcbase, sizeof(uint64_t)); -#else - J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); -#endif J->needsnap = 1; if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); lj_snap_shrink(J); /* Shrink last snapshot if possible. */ @@ -1967,6 +1865,13 @@ void lj_record_ins(jit_State *J) BCOp op; TRef ra, rb, rc; + if (J->nbclog < J->maxbclog) { + BCRecLog *log = &J->bclog[J->nbclog++]; + log->pt = J->pt; + log->pos = J->pt ? proto_bcpos(J->pt, J->pc) : -1; + log->framedepth = J->framedepth; + } + /* Perform post-processing action before recording the next instruction. */ if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { switch (J->postproc) { @@ -2039,9 +1944,6 @@ void lj_record_ins(jit_State *J) rec_check_ir(J); #endif -#if LJ_HASPROFILE - rec_profile_ins(J, pc); -#endif /* Keep a copy of the runtime values of var/num/str operands. */ #define rav (&ix.valv) @@ -2071,8 +1973,7 @@ void lj_record_ins(jit_State *J) copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; case BCMpri: setpriV(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); - copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : - lj_ir_knumint(J, numV(tv)); } break; + copyTV(J->L, rcv, tv); ix.key = rc = lj_ir_knumint(J, numV(tv)); } break; case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc)); setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; default: break; /* Handled later. */ @@ -2083,12 +1984,10 @@ void lj_record_ins(jit_State *J) /* -- Comparison ops ---------------------------------------------------- */ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: -#if LJ_HASFFI if (tref_iscdata(ra) || tref_iscdata(rc)) { rec_mm_comp_cdata(J, &ix, op, ((int)op & 2) ? MM_le : MM_lt); break; } -#endif /* Emit nothing for two numeric or string consts. */ if (!(tref_isk2(ra,rc) && tref_isnumber_str(ra) && tref_isnumber_str(rc))) { IRType ta = tref_isinteger(ra) ? IRT_INT : tref_type(ra); @@ -2135,12 +2034,10 @@ void lj_record_ins(jit_State *J) case BC_ISEQS: case BC_ISNES: case BC_ISEQN: case BC_ISNEN: case BC_ISEQP: case BC_ISNEP: -#if LJ_HASFFI if (tref_iscdata(ra) || tref_iscdata(rc)) { rec_mm_comp_cdata(J, &ix, op, MM_eq); break; } -#endif /* Emit nothing for two non-table, non-udata consts. */ if (!(tref_isk2(ra, rc) && !(tref_istab(ra) || tref_isudata(ra)))) { int diff; @@ -2166,9 +2063,7 @@ void lj_record_ins(jit_State *J) case BC_ISTYPE: case BC_ISNUM: /* These coercions need to correspond with lj_meta_istype(). */ - if (LJ_DUALNUM && rc == ~LJ_TNUMX+1) - ra = lj_opt_narrow_toint(J, ra); - else if (rc == ~LJ_TNUMX+2) + if (rc == ~LJ_TNUMX+2) ra = lj_ir_tonum(J, ra); else if (rc == ~LJ_TSTR+1) ra = lj_ir_tostr(J, ra); @@ -2250,11 +2145,7 @@ void lj_record_ins(jit_State *J) case BC_MOV: /* Clear gap of method call to avoid resurrecting previous refs. */ if (ra > J->maxslot) { -#if LJ_FR2 memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef)); -#else - J->base[ra-1] = 0; -#endif } break; case BC_KSTR: case BC_KNUM: case BC_KPRI: @@ -2269,11 +2160,9 @@ void lj_record_ins(jit_State *J) J->base[ra++] = TREF_NIL; if (rc >= J->maxslot) J->maxslot = rc+1; break; -#if LJ_HASFFI case BC_KCDATA: rc = lj_ir_kgc(J, proto_kgc(J->pt, ~(ptrdiff_t)rc), IRT_CDATA); break; -#endif /* -- Upvalue and function ops ------------------------------------------ */ @@ -2364,9 +2253,6 @@ void lj_record_ins(jit_State *J) rc = (BCReg)(J->L->top - J->L->base) - ra + 1; /* fallthrough */ case BC_RET: case BC_RET0: case BC_RET1: -#if LJ_HASPROFILE - rec_profile_ret(J); -#endif lj_record_ret(J, ra, (ptrdiff_t)rc-1); break; @@ -2406,8 +2292,6 @@ void lj_record_ins(jit_State *J) case BC_IFORL: case BC_IITERL: case BC_ILOOP: - case BC_IFUNCF: - case BC_IFUNCV: lj_trace_err(J, LJ_TRERR_BLACKL); break; @@ -2419,6 +2303,7 @@ void lj_record_ins(jit_State *J) /* -- Function headers -------------------------------------------------- */ case BC_FUNCF: + case BC_IFUNCF: rec_func_lua(J); break; case BC_JFUNCF: @@ -2426,6 +2311,7 @@ void lj_record_ins(jit_State *J) break; case BC_FUNCV: + case BC_IFUNCV: rec_func_vararg(J); rec_func_lua(J); break; @@ -2457,9 +2343,7 @@ void lj_record_ins(jit_State *J) if (bcmode_a(op) == BCMdst && rc) { J->base[ra] = rc; if (ra >= J->maxslot) { -#if LJ_FR2 if (ra > J->maxslot) J->base[ra-1] = 0; -#endif J->maxslot = ra+1; } } @@ -2560,6 +2444,8 @@ void lj_record_setup(jit_State *J) J->bc_min = NULL; /* Means no limit. */ J->bc_extent = ~(MSize)0; + J->nbclog = 0; + /* Emit instructions for fixed references. Also triggers initial IR alloc. */ emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno); for (i = 0; i <= 2; i++) { @@ -2612,10 +2498,6 @@ void lj_record_setup(jit_State *J) if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); } -#if LJ_HASPROFILE - J->prev_pt = NULL; - J->prev_line = -1; -#endif #ifdef LUAJIT_ENABLE_CHECKHOOK /* Regularly check for instruction/line hooks from compiled code and ** exit to the interpreter if the hooks are set. @@ -2643,4 +2525,3 @@ void lj_record_setup(jit_State *J) #undef emitir_raw #undef emitir -#endif diff --git a/src/lj_record.h b/src/lj_record.h index 93d374d249..bffa165493 100644 --- a/src/lj_record.h +++ b/src/lj_record.h @@ -9,7 +9,6 @@ #include "lj_obj.h" #include "lj_jit.h" -#if LJ_HASJIT /* Context for recording an indexed load/store. */ typedef struct RecordIndex { TValue tabv; /* Runtime value of table (or indexed object). */ @@ -40,6 +39,5 @@ LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix); LJ_FUNC void lj_record_ins(jit_State *J); LJ_FUNC void lj_record_setup(jit_State *J); -#endif #endif diff --git a/src/lj_snap.c b/src/lj_snap.c index bb063c2b65..5287405d44 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_gc.h" #include "lj_tab.h" @@ -21,10 +20,8 @@ #include "lj_trace.h" #include "lj_snap.h" #include "lj_target.h" -#if LJ_HASFFI #include "lj_ctype.h" #include "lj_cdata.h" -#endif /* Pass IR on to next optimization in chain (FOLD). */ #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) @@ -32,31 +29,6 @@ /* Emit raw IR without passing through optimizations. */ #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) -/* -- Snapshot buffer allocation ------------------------------------------ */ - -/* Grow snapshot buffer. */ -void lj_snap_grow_buf_(jit_State *J, MSize need) -{ - MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; - if (need > maxsnap) - lj_trace_err(J, LJ_TRERR_SNAPOV); - lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); - J->cur.snap = J->snapbuf; -} - -/* Grow snapshot map buffer. */ -void lj_snap_grow_map_(jit_State *J, MSize need) -{ - if (need < 2*J->sizesnapmap) - need = 2*J->sizesnapmap; - else if (need < 64) - need = 64; - J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, - J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry)); - J->cur.snapmap = J->snapmapbuf; - J->sizesnapmap = need; -} - /* -- Snapshot generation ------------------------------------------------- */ /* Add all modified slots to the snapshot. */ @@ -68,7 +40,6 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; IRRef ref = tref_ref(tr); -#if LJ_FR2 if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */ if ((tr & TREF_FRAME)) map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL); @@ -79,7 +50,6 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64); ref = tref_ref(tr); } -#endif if (ref) { SnapEntry sn = SNAP_TR(s, tr); IRIns *ir = &J->cur.ir[ref]; @@ -89,9 +59,8 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) if (!(ir->op2 & IRSLOAD_INHERIT)) continue; /* No need to restore readonly slots and unmodified non-parent slots. */ - if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && - (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) - sn |= SNAP_NORESTORE; + if ((ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) + sn |= SNAP_NORESTORE; } if (LJ_SOFTFP && irt_isnum(ir->t)) sn |= SNAP_SOFTFPNUM; @@ -108,31 +77,16 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) cTValue *lim = J->L->base - J->baseslot + LJ_FR2; GCfunc *fn = frame_func(frame); cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; -#if LJ_FR2 uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); lua_assert(2 <= J->baseslot && J->baseslot <= 257); memcpy(map, &pcbase, sizeof(uint64_t)); -#else - MSize f = 0; - map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ -#endif while (frame > lim) { /* Backwards traversal of all frames above base. */ if (frame_islua(frame)) { -#if !LJ_FR2 - map[f++] = SNAP_MKPC(frame_pc(frame)); -#endif frame = frame_prevl(frame); } else if (frame_iscont(frame)) { -#if !LJ_FR2 - map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); - map[f++] = SNAP_MKPC(frame_contpc(frame)); -#endif frame = frame_prevd(frame); } else { lua_assert(!frame_isc(frame)); -#if !LJ_FR2 - map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); -#endif frame = frame_prevd(frame); continue; } @@ -140,13 +94,8 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) ftop = frame + funcproto(frame_func(frame))->framesize; } *topslot = (uint8_t)(ftop - lim); -#if LJ_FR2 lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t)); return 2; -#else - lua_assert(f == (MSize)(1 + J->framedepth)); - return f; -#endif } /* Take a snapshot of the current stack. */ @@ -156,7 +105,6 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) MSize nent; SnapEntry *p; /* Conservative estimate. */ - lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1)); p = &J->cur.snapmap[nsnapmap]; nent = snapshot_slots(J, p, nslots); snap->nent = (uint8_t)nent; @@ -183,7 +131,6 @@ void lj_snap_add(jit_State *J) nsnapmap = J->cur.snap[--nsnap].mapofs; } else { nomerge: - lj_snap_grow_buf(J, nsnap+1); J->cur.nsnap = (uint16_t)(nsnap+1); } J->mergesnap = 0; @@ -511,16 +458,13 @@ void lj_snap_replay(jit_State *J, GCtrace *T) ir->o == IR_CNEW || ir->o == IR_CNEWI); if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); - if (LJ_HASFFI && ir->o == IR_CNEWI) { - if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) - snap_pref(J, T, map, nent, seen, (ir+1)->op2); - } else { + if (ir->o != IR_CNEWI) { IRIns *irs; for (irs = ir+1; irs < irlast; irs++) if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); - else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && + else if (LJ_SOFTFP && irs+1 < irlast && (irs+1)->o == IR_HIOP) snap_pref(J, T, map, nent, seen, (irs+1)->op2); } @@ -545,12 +489,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1); op2 = ir->op2; if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2); - if (LJ_HASFFI && ir->o == IR_CNEWI) { - if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) { - lj_needsplit(J); /* Emit joining HIOP. */ - op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2, - snap_pref(J, T, map, nent, seen, (ir+1)->op2)); - } + if (ir->o == IR_CNEWI) { J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2); } else { IRIns *irs; @@ -579,7 +518,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); val = snap_pref(J, T, map, nent, seen, irc->op1); val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); - } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && + } else if (LJ_SOFTFP && irs+1 < irlast && (irs+1)->o == IR_HIOP) { IRType t = IRT_I64; if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) @@ -597,7 +536,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) continue; } tmp = emitir(irs->ot, tmp, val); - } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) { + } else if (irs->o == IR_XBAR && ir->o == IR_CNEW) { emitir(IRT(IR_XBAR, IRT_NIL), 0, 0); } } @@ -635,15 +574,8 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, int32_t *sps = &ex->spill[regsp_spill(rs)]; if (irt_isinteger(t)) { setintV(o, *sps); -#if !LJ_SOFTFP } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; -#endif -#if LJ_64 && !LJ_GC64 - } else if (irt_islightud(t)) { - /* 64 bit lightuserdata which may escape already has the tag bits. */ - o->u64 = *(uint64_t *)sps; -#endif } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); @@ -653,19 +585,11 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, if (ra_noreg(r)) { lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); - if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); return; } else if (irt_isinteger(t)) { setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); -#if !LJ_SOFTFP } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); -#endif -#if LJ_64 && !LJ_GC64 - } else if (irt_is64(t)) { - /* 64 bit values that already have the tag bits. */ - o->u64 = ex->gpr[r-RID_MIN_GPR]; -#endif } else if (irt_ispri(t)) { setpriV(o, irt_toitype(t)); } else { @@ -674,7 +598,6 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, } } -#if LJ_HASFFI /* Restore raw data from the trace exit state. */ static void snap_restoredata(GCtrace *T, ExitState *ex, SnapNo snapno, BloomFilter rfilt, @@ -685,7 +608,7 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, int32_t *src; uint64_t tmp; if (irref_isk(ref)) { - if (ir->o == IR_KNUM || ir->o == IR_KINT64) { + if (ir_isk64(ir)) { src = (int32_t *)&ir[1]; } else if (sz == 8) { tmp = (uint64_t)(uint32_t)ir->i; @@ -712,20 +635,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, return; } src = (int32_t *)&ex->gpr[r-RID_MIN_GPR]; -#if !LJ_SOFTFP if (r >= RID_MAX_GPR) { src = (int32_t *)&ex->fpr[r-RID_MIN_FPR]; -#if LJ_TARGET_PPC - if (sz == 4) { /* PPC FPRs are always doubles. */ - *(float *)dst = (float)*(double *)src; - return; - } -#else - if (LJ_BE && sz == 4) src++; -#endif - } else -#endif - if (LJ_64 && LJ_BE && sz == 4) src++; + } } } lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); @@ -734,7 +646,6 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, else if (sz == 1) *(int8_t *)dst = (int8_t)*src; else *(int16_t *)dst = (int16_t)*src; } -#endif /* Unsink allocation from the trace exit state. Unsink sunk stores. */ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, @@ -743,7 +654,6 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, { lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW || ir->o == IR_CNEWI); -#if LJ_HASFFI if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { CTState *cts = ctype_cts(J->L); CTypeID id = (CTypeID)T->ir[ir->op1].i; @@ -754,11 +664,6 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, if (ir->o == IR_CNEWI) { uint8_t *p = (uint8_t *)cdataptr(cd); lua_assert(sz == 4 || sz == 8); - if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { - snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4); - if (LJ_BE) p += 4; - sz = 4; - } snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz); } else { IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; @@ -773,22 +678,16 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; else szs = 4; - if (LJ_64 && iro->o == IR_KINT64) + if (iro->o == IR_KINT64) p += (int64_t)ir_k64(iro)->u64; else p += iro->i; lua_assert(p >= (uint8_t *)cdataptr(cd) && p + szs <= (uint8_t *)cdataptr(cd) + sz); - if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { - lua_assert(szs == 4); - snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4); - if (LJ_BE) p += 4; - } snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs); } } } else -#endif { IRIns *irs, *irlast; GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) : @@ -833,9 +732,6 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) SnapEntry *map = &T->snapmap[snap->mapofs]; #if !LJ_FR2 || defined(LUA_USE_ASSERT) SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2]; -#endif -#if !LJ_FR2 - ptrdiff_t ftsz0; #endif TValue *frame; BloomFilter rfilt = snap_renamefilter(T, snapno); @@ -853,9 +749,6 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) /* Fill stack slots with data from the registers and spill slots. */ frame = L->base-1-LJ_FR2; -#if !LJ_FR2 - ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ -#endif for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; if (!(sn & SNAP_NORESTORE)) { @@ -874,22 +767,9 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) continue; } snap_restoreval(J, T, ex, snapno, rfilt, ref, o); - if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { - TValue tmp; - snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); - o->u32.hi = tmp.u32.lo; -#if !LJ_FR2 - } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { - /* Overwrite tag with frame link. */ - setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); - L->base = o+1; -#endif - } } } -#if LJ_FR2 - L->base += (map[nent+LJ_BE] & 0xff); -#endif + L->base += (map[nent] & 0xff); lua_assert(map + nent == flinks); /* Compute current stack top. */ @@ -910,4 +790,3 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) #undef emitir_raw #undef emitir -#endif diff --git a/src/lj_snap.h b/src/lj_snap.h index 2c9ae3d643..11c8b669e2 100644 --- a/src/lj_snap.h +++ b/src/lj_snap.h @@ -9,26 +9,11 @@ #include "lj_obj.h" #include "lj_jit.h" -#if LJ_HASJIT LJ_FUNC void lj_snap_add(jit_State *J); LJ_FUNC void lj_snap_purge(jit_State *J); LJ_FUNC void lj_snap_shrink(jit_State *J); LJ_FUNC IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir); LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T); LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); -LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); -LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need); - -static LJ_AINLINE void lj_snap_grow_buf(jit_State *J, MSize need) -{ - if (LJ_UNLIKELY(need > J->sizesnap)) lj_snap_grow_buf_(J, need); -} - -static LJ_AINLINE void lj_snap_grow_map(jit_State *J, MSize need) -{ - if (LJ_UNLIKELY(need > J->sizesnapmap)) lj_snap_grow_map_(J, need); -} - -#endif #endif diff --git a/src/lj_state.c b/src/lj_state.c index 3cc0fea5c1..b0a7feadf8 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -19,14 +19,11 @@ #include "lj_meta.h" #include "lj_state.h" #include "lj_frame.h" -#if LJ_HASFFI #include "lj_ctype.h" -#endif #include "lj_trace.h" #include "lj_dispatch.h" #include "lj_vm.h" #include "lj_lex.h" -#include "lj_alloc.h" #include "luajit.h" /* -- Stack handling ------------------------------------------------------ */ @@ -98,7 +95,7 @@ void lj_state_shrinkstack(lua_State *L, MSize used) } /* Try to grow stack. */ -void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need) +void lj_state_growstack(lua_State *L, MSize need) { MSize n; if (L->stacksize > LJ_STACK_MAXEX) /* Overflow while handling overflow? */ @@ -116,7 +113,7 @@ void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need) lj_err_msg(L, LJ_ERR_STKOV); } -void LJ_FASTCALL lj_state_growstack1(lua_State *L) +void lj_state_growstack1(lua_State *L) { lj_state_growstack(L, 1); } @@ -160,35 +157,31 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) static void close_state(lua_State *L) { global_State *g = G(L); + jit_State *J = L2J(L); lj_func_closeuv(L, tvref(L->stack)); lj_gc_freeall(g); lua_assert(gcref(g->gc.root) == obj2gco(L)); lua_assert(g->strnum == 0); lj_trace_freestate(g); -#if LJ_HASFFI lj_ctype_freestate(g); -#endif lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); lj_buf_free(g, &g->tmpbuf); lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); + lj_mem_free(g, J->bclog, sizeof(BCRecLog)*J->maxbclog); + lj_mem_free(g, J->snapmapbuf, sizeof(SnapEntry)*65536); + lj_mem_free(g, J->snapbuf, sizeof(SnapShot)*65536); + lj_mem_free(g, J->irbuf, 65536*sizeof(IRIns)); + lj_mem_free(g, J->trace, TRACE_MAX * sizeof(GCRef *)); lua_assert(g->gc.total == sizeof(GG_State)); -#ifndef LUAJIT_USE_SYSMALLOC - if (g->allocf == lj_alloc_f) - lj_alloc_destroy(g->allocd); - else -#endif - g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); + g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); } -#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) -lua_State *lj_state_newstate(lua_Alloc f, void *ud) -#else LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) -#endif { GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); lua_State *L = &GG->L; global_State *g = &GG->g; + jit_State *J = &GG->J; if (GG == NULL || !checkptrGC(GG)) return NULL; memset(GG, 0, sizeof(GG_State)); L->gct = ~LJ_TTHREAD; @@ -207,9 +200,6 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) setnilV(registry(L)); setnilV(&g->nilnode.val); setnilV(&g->nilnode.key); -#if !LJ_GC64 - setmref(g->nilnode.freetop, &g->nilnode); -#endif lj_buf_init(NULL, &g->tmpbuf); g->gc.state = GCSpause; setgcref(g->gc.root, obj2gco(L)); @@ -217,6 +207,19 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) g->gc.total = sizeof(GG_State); g->gc.pause = LUAI_GCPAUSE; g->gc.stepmul = LUAI_GCMUL; + /* Statically allocate generous JIT scratch buffers. */ + J->sizesnap = sizeof(SnapShot)*65536; + J->sizesnapmap = sizeof(SnapEntry)*65536; + J->snapbuf = (SnapShot *)lj_mem_new(L, J->sizesnap); + J->snapmapbuf = (SnapEntry *)lj_mem_new(L, J->sizesnapmap); + J->maxbclog = 65536; + J->bclog = (BCRecLog *)lj_mem_new(L, sizeof(BCRecLog)*J->maxbclog); + J->nbclog = 0; + J->irbuf = (IRIns *)lj_mem_new(L, sizeof(IRIns)*65536); + J->trace = (GCRef *)lj_mem_new(L, TRACE_MAX * sizeof(GCRef *)); + if (!(J->irbuf && J->snapbuf && J->bclog && J->snapmapbuf && J->trace)) + return NULL; + memset(J->trace, 0, TRACE_MAX * sizeof(GCRef *)); lj_dispatch_init((GG_State *)L); L->status = LUA_ERRERR+1; /* Avoid touching the stack upon memory error. */ if (lj_vm_cpcall(L, NULL, NULL, cpluaopen) != 0) { @@ -224,7 +227,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) close_state(L); return NULL; } - L->status = 0; + L->status = LUA_OK; return L; } @@ -243,23 +246,18 @@ LUA_API void lua_close(lua_State *L) global_State *g = G(L); int i; L = mainthread(g); /* Only the main thread can be closed. */ -#if LJ_HASPROFILE - luaJIT_profile_stop(L); -#endif setgcrefnull(g->cur_L); lj_func_closeuv(L, tvref(L->stack)); lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ -#if LJ_HASJIT G2J(g)->flags &= ~JIT_F_ON; G2J(g)->state = LJ_TRACE_IDLE; lj_dispatch_update(g); -#endif for (i = 0;;) { hook_enter(g); - L->status = 0; + L->status = LUA_OK; L->base = L->top = tvref(L->stack) + 1 + LJ_FR2; L->cframe = NULL; - if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) { + if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == LUA_OK) { if (++i >= 10) break; lj_gc_separateudata(g, 1); /* Separate udata again. */ if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */ @@ -274,7 +272,7 @@ lua_State *lj_state_new(lua_State *L) lua_State *L1 = lj_mem_newobj(L, lua_State); L1->gct = ~LJ_TTHREAD; L1->dummy_ffid = FF_C; - L1->status = 0; + L1->status = LUA_OK; L1->stacksize = 0; setmref(L1->stack, NULL); L1->cframe = NULL; @@ -287,7 +285,7 @@ lua_State *lj_state_new(lua_State *L) return L1; } -void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) +void lj_state_free(global_State *g, lua_State *L) { lua_assert(L != mainthread(g)); if (obj2gco(L) == gcref(g->cur_L)) diff --git a/src/lj_state.h b/src/lj_state.h index 02a0eafa33..86ad33ea88 100644 --- a/src/lj_state.h +++ b/src/lj_state.h @@ -16,8 +16,8 @@ LJ_FUNC void lj_state_relimitstack(lua_State *L); LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used); -LJ_FUNCA void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need); -LJ_FUNC void LJ_FASTCALL lj_state_growstack1(lua_State *L); +LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need); +LJ_FUNC void lj_state_growstack1(lua_State *L); static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) { @@ -27,9 +27,6 @@ static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) } LJ_FUNC lua_State *lj_state_new(lua_State *L); -LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); -#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) -LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); -#endif +LJ_FUNC void lj_state_free(global_State *g, lua_State *L); #endif diff --git a/src/lj_str.c b/src/lj_str.c index 264dedc16e..583f67f3f6 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -15,7 +15,7 @@ /* -- String helpers ------------------------------------------------------ */ /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ -int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) +int32_t lj_str_cmp(GCstr *a, GCstr *b) { MSize i, n = a->len > b->len ? b->len : a->len; for (i = 0; i < n; i += 4) { @@ -23,9 +23,7 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) uint32_t va = *(const uint32_t *)(strdata(a)+i); uint32_t vb = *(const uint32_t *)(strdata(b)+i); if (va != vb) { -#if LJ_LE va = lj_bswap(va); vb = lj_bswap(vb); -#endif i -= n; if ((int32_t)i >= -3) { va >>= 32+(i<<3); vb >>= 32+(i<<3); @@ -37,27 +35,6 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) return (int32_t)(a->len - b->len); } -/* Fast string data comparison. Caveat: unaligned access to 1st string! */ -static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) -{ - MSize i = 0; - lua_assert(len > 0); - lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4); - do { /* Note: innocuous access up to end of string + 3. */ - uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); - if (v) { - i -= len; -#if LJ_LE - return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1; -#else - return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1; -#endif - } - i += 4; - } while (i < len); - return 0; -} - /* Find fixed string p inside string s. */ const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen) { @@ -149,26 +126,14 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) h ^= b; h -= lj_rol(b, 16); /* Check if the string has already been interned. */ o = gcref(g->strhash[h & g->strmask]); - if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { - while (o != NULL) { - GCstr *sx = gco2str(o); - if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { - /* Resurrect if dead. Can only happen with fixstring() (keywords). */ - if (isdead(g, o)) flipwhite(o); - return sx; /* Return existing string. */ - } - o = gcnext(o); - } - } else { /* Slow path: end of string is too close to a page boundary. */ - while (o != NULL) { - GCstr *sx = gco2str(o); - if (sx->len == len && memcmp(str, strdata(sx), len) == 0) { - /* Resurrect if dead. Can only happen with fixstring() (keywords). */ - if (isdead(g, o)) flipwhite(o); - return sx; /* Return existing string. */ - } - o = gcnext(o); + while (o != NULL) { + GCstr *sx = gco2str(o); + if (sx->hash == h && sx->len == len && memcmp(str, strdata(sx), len) == 0) { + /* Resurrect if dead. Can only happen with fixstring() (keywords). */ + if (isdead(g, o)) flipwhite(o); + return sx; /* Return existing string. */ } + o = gcnext(o); } /* Nope, create a new string. */ s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); @@ -189,7 +154,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) return s; /* Return newly interned string. */ } -void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) +void lj_str_free(global_State *g, GCstr *s) { g->strnum--; lj_mem_free(g, s, sizestring(s)); diff --git a/src/lj_str.h b/src/lj_str.h index 85c1e405bd..c6e438293b 100644 --- a/src/lj_str.h +++ b/src/lj_str.h @@ -11,7 +11,7 @@ #include "lj_obj.h" /* String helpers. */ -LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); +LJ_FUNC int32_t lj_str_cmp(GCstr *a, GCstr *b); LJ_FUNC const char *lj_str_find(const char *s, const char *f, MSize slen, MSize flen); LJ_FUNC int lj_str_haspattern(GCstr *s); @@ -19,7 +19,7 @@ LJ_FUNC int lj_str_haspattern(GCstr *s); /* String interning. */ LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); -LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); +LJ_FUNC void lj_str_free(global_State *g, GCstr *s); #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c index d7893ce981..7224f89946 100644 --- a/src/lj_strfmt.c +++ b/src/lj_strfmt.c @@ -25,7 +25,7 @@ static const uint8_t strfmt_map[('x'-'A')+1] = { 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X }; -SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs) +SFormat lj_strfmt_parse(FormatState *fs) { const uint8_t *p = fs->p, *e = fs->e; fs->str = (const char *)p; @@ -93,7 +93,7 @@ SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs) { uint32_t d = (x*(((1<>sh; x -= d*sc; *p++ = (char)('0'+d); } /* Write integer to buffer. */ -char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k) +char * lj_strfmt_wint(char *p, int32_t k) { uint32_t u = (uint32_t)k; if (k < 0) { u = (uint32_t)-k; *p++ = '-'; } @@ -126,7 +126,7 @@ char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k) #undef WINT_R /* Write pointer to buffer. */ -char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v) +char * lj_strfmt_wptr(char *p, const void *v) { ptrdiff_t x = (ptrdiff_t)v; MSize i, n = STRFMT_MAXBUF_PTR; @@ -134,10 +134,8 @@ char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v) *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L'; return p; } -#if LJ_64 /* Shorten output for 64 bit pointers. */ n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0); -#endif p[0] = '0'; p[1] = 'x'; for (i = n-1; i >= 2; i--, x >>= 4) @@ -146,7 +144,7 @@ char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v) } /* Write ULEB128 to buffer. */ -char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v) +char * lj_strfmt_wuleb128(char *p, uint32_t v) { for (; v >= 0x80; v >>= 7) *p++ = (char)((v & 0x7f) | 0x80); @@ -161,8 +159,6 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) if (tvisstr(o)) { *lenp = strV(o)->len; return strVdata(o); - } else if (tvisint(o)) { - sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); } else if (tvisnum(o)) { sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n); } else { @@ -175,28 +171,26 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) /* -- Unformatted conversions to buffer ----------------------------------- */ /* Add integer to buffer. */ -SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k) +SBuf * lj_strfmt_putint(SBuf *sb, int32_t k) { setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k)); return sb; } -#if LJ_HASJIT /* Add number to buffer. */ -SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o) +SBuf * lj_strfmt_putnum(SBuf *sb, cTValue *o) { return lj_strfmt_putfnum(sb, STRFMT_G14, o->n); } -#endif -SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v) +SBuf * lj_strfmt_putptr(SBuf *sb, const void *v) { setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v)); return sb; } /* Add quoted string to buffer. */ -SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str) +SBuf * lj_strfmt_putquoted(SBuf *sb, GCstr *str) { const char *s = strdata(str); MSize len = str->len; @@ -349,7 +343,7 @@ SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) /* -- Conversions to strings ---------------------------------------------- */ /* Convert integer to string. */ -GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k) +GCstr * lj_strfmt_int(lua_State *L, int32_t k) { char buf[STRFMT_MAXBUF_INT]; MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf); @@ -357,23 +351,21 @@ GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k) } /* Convert integer or number to string. */ -GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o) +GCstr * lj_strfmt_number(lua_State *L, cTValue *o) { - return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o); + return lj_strfmt_num(L, o); } -#if LJ_HASJIT /* Convert char value to string. */ -GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c) +GCstr * lj_strfmt_char(lua_State *L, int c) { char buf[1]; buf[0] = c; return lj_str_new(L, buf, 1); } -#endif /* Raw conversion of object to string. */ -GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o) +GCstr * lj_strfmt_obj(lua_State *L, cTValue *o) { if (tvisstr(o)) { return strV(o); diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h index 6e1d9017e6..cb9f224a7e 100644 --- a/src/lj_strfmt.h +++ b/src/lj_strfmt.h @@ -73,7 +73,7 @@ typedef enum FormatType { #define STRFMT_MAXBUF_PTR (2+2*sizeof(ptrdiff_t)) /* "0x" + hex ptr. */ /* Format parser. */ -LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs); +LJ_FUNC SFormat lj_strfmt_parse(FormatState *fs); static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len) { @@ -83,18 +83,16 @@ static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len) } /* Raw conversions. */ -LJ_FUNC char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k); -LJ_FUNC char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v); -LJ_FUNC char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v); +LJ_FUNC char * lj_strfmt_wint(char *p, int32_t k); +LJ_FUNC char * lj_strfmt_wptr(char *p, const void *v); +LJ_FUNC char * lj_strfmt_wuleb128(char *p, uint32_t v); LJ_FUNC const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp); /* Unformatted conversions to buffer. */ -LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k); -#if LJ_HASJIT -LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o); -#endif -LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v); -LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str); +LJ_FUNC SBuf * lj_strfmt_putint(SBuf *sb, int32_t k); +LJ_FUNC SBuf * lj_strfmt_putnum(SBuf *sb, cTValue *o); +LJ_FUNC SBuf * lj_strfmt_putptr(SBuf *sb, const void *v); +LJ_FUNC SBuf * lj_strfmt_putquoted(SBuf *sb, GCstr *str); /* Formatted conversions to buffer. */ LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k); @@ -105,13 +103,11 @@ LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c); LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str); /* Conversions to strings. */ -LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k); -LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o); -LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o); -#if LJ_HASJIT -LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c); -#endif -LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o); +LJ_FUNC GCstr * lj_strfmt_int(lua_State *L, int32_t k); +LJ_FUNCA GCstr * lj_strfmt_num(lua_State *L, cTValue *o); +LJ_FUNCA GCstr * lj_strfmt_number(lua_State *L, cTValue *o); +LJ_FUNC GCstr * lj_strfmt_char(lua_State *L, int c); +LJ_FUNC GCstr * lj_strfmt_obj(lua_State *L, cTValue *o); /* Internal string formatting. */ LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c index 9271f68a11..49a0ed4421 100644 --- a/src/lj_strfmt_num.c +++ b/src/lj_strfmt_num.c @@ -112,13 +112,8 @@ static char *lj_strfmt_wuint9(char *p, uint32_t u) ** enough digits to make both %.99e and %.99f do the right thing. */ -#if LJ_64 #define ND_MUL2K_MAX_SHIFT 29 #define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) / 1000000000)) -#else -#define ND_MUL2K_MAX_SHIFT 11 -#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) >> 9) / 1953125) -#endif /* Multiply nd by 2^k and add carry_in (ndlo is assumed to be zero). */ static uint32_t nd_mul2k(uint32_t* nd, uint32_t ndhi, uint32_t k, @@ -583,7 +578,7 @@ SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n) /* -- Conversions to strings ---------------------------------------------- */ /* Convert number to string. */ -GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o) +GCstr * lj_strfmt_num(lua_State *L, cTValue *o) { char buf[STRFMT_MAXBUF_NUM]; MSize len = (MSize)(lj_strfmt_wfnum(NULL, STRFMT_G14, o->n, buf) - buf); diff --git a/src/lj_strscan.c b/src/lj_strscan.c index f5f35c9602..dfa7668de3 100644 --- a/src/lj_strscan.c +++ b/src/lj_strscan.c @@ -79,7 +79,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg) /* Avoid double rounding for denormals. */ if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) { /* NYI: all of this generates way too much code on 32 bit CPUs. */ -#if defined(__GNUC__) && LJ_64 +#if defined(__GNUC__) int32_t b = (int32_t)(__builtin_clzll(x)^63); #else int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) : @@ -522,7 +522,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) } } -int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o) +int lj_strscan_num(GCstr *str, TValue *o) { StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, STRSCAN_OPT_TONUM); @@ -530,16 +530,6 @@ int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o) return (fmt != STRSCAN_ERROR); } -#if LJ_DUALNUM -int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o) -{ - StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, - STRSCAN_OPT_TOINT); - lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT); - if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM); - return (fmt != STRSCAN_ERROR); -} -#endif #undef DNEXT #undef DPREV diff --git a/src/lj_strscan.h b/src/lj_strscan.h index 6fb0dda08c..dda28ff15d 100644 --- a/src/lj_strscan.h +++ b/src/lj_strscan.h @@ -23,12 +23,8 @@ typedef enum { } StrScanFmt; LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt); -LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o); -#if LJ_DUALNUM -LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o); -#else +LJ_FUNC int lj_strscan_num(GCstr *str, TValue *o); #define lj_strscan_number(s, o) lj_strscan_num((s), (o)) -#endif /* Check for number or convert string to number/int in-place (!). */ static LJ_AINLINE int lj_strscan_numberobj(TValue *o) diff --git a/src/lj_tab.c b/src/lj_tab.c index 47c0cfd34a..c181cd25a6 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -28,17 +28,12 @@ static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) #define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) #define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) -#if LJ_GC64 #define hashgcref(t, r) \ hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32)) -#else -#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) -#endif /* Hash an arbitrary key and return its anchor position in the hash table. */ static Node *hashkey(const GCtab *t, cTValue *key) { - lua_assert(!tvisint(key)); if (tvisstr(key)) return hashstr(t, strV(key)); else if (tvisnum(key)) @@ -114,9 +109,7 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) t->hmask = 0; nilnode = &G(L)->nilnode; setmref(t->node, nilnode); -#if LJ_GC64 setmref(t->freetop, nilnode); -#endif } else { /* Otherwise separately allocate the array part. */ Node *nilnode; t = lj_mem_newobj(L, GCtab); @@ -129,9 +122,7 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) t->hmask = 0; nilnode = &G(L)->nilnode; setmref(t->node, nilnode); -#if LJ_GC64 setmref(t->freetop, nilnode); -#endif if (asize > 0) { if (asize > LJ_MAX_ASIZE) lj_err_msg(L, LJ_ERR_TABOV); @@ -169,18 +160,16 @@ GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h) return lj_tab_new(L, (uint32_t)(a > 0 ? a+1 : 0), hsize2hbits(h)); } -#if LJ_HASJIT -GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) +GCtab * lj_tab_new1(lua_State *L, uint32_t ahsize) { GCtab *t = newtab(L, ahsize & 0xffffff, ahsize >> 24); clearapart(t); if (t->hmask > 0) clearhpart(t); return t; } -#endif /* Duplicate a table. */ -GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) +GCtab * lj_tab_dup(lua_State *L, const GCtab *kt) { GCtab *t; uint32_t asize, hmask; @@ -219,7 +208,7 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) } /* Clear a table. */ -void LJ_FASTCALL lj_tab_clear(GCtab *t) +void lj_tab_clear(GCtab *t) { clearapart(t); if (t->hmask > 0) { @@ -230,7 +219,7 @@ void LJ_FASTCALL lj_tab_clear(GCtab *t) } /* Free a table. */ -void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) +void lj_tab_free(global_State *g, GCtab *t) { if (t->hmask > 0) lj_mem_freevec(g, noderef(t->node), t->hmask+1, Node); @@ -278,9 +267,7 @@ void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) } else { global_State *g = G(L); setmref(t->node, &g->nilnode); -#if LJ_GC64 setmref(t->freetop, &g->nilnode); -#endif t->hmask = 0; } if (asize < oldasize) { /* Array part shrinks? */ @@ -310,7 +297,6 @@ void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) static uint32_t countint(cTValue *key, uint32_t *bins) { - lua_assert(!tvisint(key)); if (tvisnum(key)) { lua_Number nk = numV(key); int32_t k = lj_num2int(nk); @@ -385,12 +371,10 @@ static void rehashtab(lua_State *L, GCtab *t, cTValue *ek) lj_tab_resize(L, t, asize, hsize2hbits(total)); } -#if LJ_HASFFI void lj_tab_rehash(lua_State *L, GCtab *t) { rehashtab(L, t, niltv(L)); } -#endif void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize) { @@ -399,7 +383,7 @@ void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize) /* -- Table getters ------------------------------------------------------- */ -cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key) +cTValue * lj_tab_getinth(GCtab *t, int32_t key) { TValue k; Node *n; @@ -428,10 +412,6 @@ cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key) cTValue *tv = lj_tab_getstr(t, strV(key)); if (tv) return tv; - } else if (tvisint(key)) { - cTValue *tv = lj_tab_getint(t, intV(key)); - if (tv) - return tv; } else if (tvisnum(key)) { lua_Number nk = numV(key); int32_t k = lj_num2int(nk); @@ -540,8 +520,6 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key) t->nomm = 0; /* Invalidate negative metamethod cache. */ if (tvisstr(key)) { return lj_tab_setstr(L, t, strV(key)); - } else if (tvisint(key)) { - return lj_tab_setint(L, t, intV(key)); } else if (tvisnum(key)) { lua_Number nk = numV(key); int32_t k = lj_num2int(nk); @@ -566,14 +544,7 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key) /* Get the traversal index of a key. */ static uint32_t keyindex(lua_State *L, GCtab *t, cTValue *key) { - TValue tmp; - if (tvisint(key)) { - int32_t k = intV(key); - if ((uint32_t)k < t->asize) - return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */ - setnumV(&tmp, (lua_Number)k); - key = &tmp; - } else if (tvisnum(key)) { + if (tvisnum(key)) { lua_Number nk = numV(key); int32_t k = lj_num2int(nk); if ((uint32_t)k < t->asize && nk == (lua_Number)k) @@ -646,7 +617,7 @@ static MSize unbound_search(GCtab *t, MSize j) ** Try to find a boundary in table `t'. A `boundary' is an integer index ** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). */ -MSize LJ_FASTCALL lj_tab_len(GCtab *t) +MSize lj_tab_len(GCtab *t) { MSize j = (MSize)t->asize; if (j > 1 && tvisnil(arrayslot(t, j-1))) { diff --git a/src/lj_tab.h b/src/lj_tab.h index 71e34945e8..8d46263b64 100644 --- a/src/lj_tab.h +++ b/src/lj_tab.h @@ -17,17 +17,10 @@ /* Scramble the bits of numbers and pointers. */ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi) { -#if LJ_TARGET_X86ORX64 /* Prefer variant that compiles well for a 2-operand CPU. */ lo ^= hi; hi = lj_rol(hi, HASH_ROT1); lo -= hi; hi = lj_rol(hi, HASH_ROT2); hi ^= lo; hi -= lj_rol(lo, HASH_ROT3); -#else - lo ^= hi; - lo = lo - lj_rol(hi, HASH_ROT1); - hi = lo ^ lj_rol(hi, HASH_ROT1 + HASH_ROT2); - hi = hi - lj_rol(lo, HASH_ROT3); -#endif return hi; } @@ -35,21 +28,17 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi) LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); LJ_FUNC GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h); -#if LJ_HASJIT -LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); -#endif -LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); -LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t); -LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); -#if LJ_HASFFI +LJ_FUNC GCtab * lj_tab_new1(lua_State *L, uint32_t ahsize); +LJ_FUNCA GCtab * lj_tab_dup(lua_State *L, const GCtab *kt); +LJ_FUNC void lj_tab_clear(GCtab *t); +LJ_FUNC void lj_tab_free(global_State *g, GCtab *t); LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t); -#endif LJ_FUNC void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits); LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); /* Caveat: all getters except lj_tab_get() can return NULL! */ -LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key); +LJ_FUNCA cTValue * lj_tab_getinth(GCtab *t, int32_t key); LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key); LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key); @@ -68,6 +57,6 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); -LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); +LJ_FUNCA MSize lj_tab_len(GCtab *t); #endif diff --git a/src/lj_target.h b/src/lj_target.h index 8dcae957f0..14bd9ca3bb 100644 --- a/src/lj_target.h +++ b/src/lj_target.h @@ -55,11 +55,7 @@ typedef uint32_t RegSP; /* Bitset for registers. 32 registers suffice for most architectures. ** Note that one set holds bits for both GPRs and FPRs. */ -#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 -typedef uint64_t RegSet; -#else typedef uint32_t RegSet; -#endif #define RID2RSET(r) (((RegSet)1) << (r)) #define RSET_EMPTY ((RegSet)0) @@ -69,13 +65,8 @@ typedef uint32_t RegSet; #define rset_set(rs, r) (rs |= RID2RSET(r)) #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) -#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 -#define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) -#define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) -#else #define rset_picktop(rs) ((Reg)lj_fls(rs)) #define rset_pickbot(rs) ((Reg)lj_ffs(rs)) -#endif /* -- Register allocation cost -------------------------------------------- */ @@ -134,19 +125,7 @@ typedef uint32_t RegCost; /* -- Target-specific definitions ----------------------------------------- */ -#if LJ_TARGET_X86ORX64 #include "lj_target_x86.h" -#elif LJ_TARGET_ARM -#include "lj_target_arm.h" -#elif LJ_TARGET_ARM64 -#include "lj_target_arm64.h" -#elif LJ_TARGET_PPC -#include "lj_target_ppc.h" -#elif LJ_TARGET_MIPS -#include "lj_target_mips.h" -#else -#error "Missing include for target CPU" -#endif #ifdef EXITSTUBS_PER_GROUP /* Return the address of an exit stub. */ diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h deleted file mode 100644 index 5551b1f1ce..0000000000 --- a/src/lj_target_arm.h +++ /dev/null @@ -1,270 +0,0 @@ -/* -** Definitions for ARM CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#ifndef _LJ_TARGET_ARM_H -#define _LJ_TARGET_ARM_H - -/* -- Registers IDs ------------------------------------------------------- */ - -#define GPRDEF(_) \ - _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ - _(R8) _(R9) _(R10) _(R11) _(R12) _(SP) _(LR) _(PC) -#if LJ_SOFTFP -#define FPRDEF(_) -#else -#define FPRDEF(_) \ - _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ - _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) -#endif -#define VRIDDEF(_) - -#define RIDENUM(name) RID_##name, - -enum { - GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ - FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ - RID_MAX, - RID_TMP = RID_LR, - - /* Calling conventions. */ - RID_RET = RID_R0, - RID_RETLO = RID_R0, - RID_RETHI = RID_R1, -#if LJ_SOFTFP - RID_FPRET = RID_R0, -#else - RID_FPRET = RID_D0, -#endif - - /* These definitions must match with the *.dasc file(s): */ - RID_BASE = RID_R9, /* Interpreter BASE. */ - RID_LPC = RID_R6, /* Interpreter PC. */ - RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */ - RID_LREG = RID_R8, /* Interpreter L. */ - - /* Register ranges [min, max) and number of registers. */ - RID_MIN_GPR = RID_R0, - RID_MAX_GPR = RID_PC+1, - RID_MIN_FPR = RID_MAX_GPR, -#if LJ_SOFTFP - RID_MAX_FPR = RID_MIN_FPR, -#else - RID_MAX_FPR = RID_D15+1, -#endif - RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, - RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR -}; - -#define RID_NUM_KREF RID_NUM_GPR -#define RID_MIN_KREF RID_R0 - -/* -- Register sets ------------------------------------------------------- */ - -/* Make use of all registers, except sp, lr and pc. */ -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1)) -#define RSET_GPREVEN \ - (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \ - RID2RSET(RID_R8)|RID2RSET(RID_R10)) -#define RSET_GPRODD \ - (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \ - RID2RSET(RID_R9)|RID2RSET(RID_R11)) -#if LJ_SOFTFP -#define RSET_FPR 0 -#else -#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) -#endif -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL - -/* ABI-specific register sets. lr is an implicit scratch register. */ -#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12)) -#ifdef __APPLE__ -#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9)) -#else -#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_ -#endif -#if LJ_SOFTFP -#define RSET_SCRATCH_FPR 0 -#else -#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1)) -#endif -#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -#define REGARG_FIRSTGPR RID_R0 -#define REGARG_LASTGPR RID_R3 -#define REGARG_NUMGPR 4 -#if LJ_ABI_SOFTFP -#define REGARG_FIRSTFPR 0 -#define REGARG_LASTFPR 0 -#define REGARG_NUMFPR 0 -#else -#define REGARG_FIRSTFPR RID_D0 -#define REGARG_LASTFPR RID_D7 -#define REGARG_NUMFPR 8 -#endif - -/* -- Spill slots --------------------------------------------------------- */ - -/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -** -** SPS_FIXED: Available fixed spill slots in interpreter frame. -** This definition must match with the *.dasc file(s). -** -** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. -*/ -#define SPS_FIXED 2 -#define SPS_FIRST 2 - -#define SPOFS_TMP 0 - -#define sps_scale(slot) (4 * (int32_t)(slot)) -#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) - -/* -- Exit state ---------------------------------------------------------- */ - -/* This definition must match with the *.dasc file(s). */ -typedef struct { -#if !LJ_SOFTFP - lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ -#endif - int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ - int32_t spill[256]; /* Spill slots. */ -} ExitState; - -/* PC after instruction that caused an exit. Used to find the trace number. */ -#define EXITSTATE_PCREG RID_PC -/* Highest exit + 1 indicates stack check. */ -#define EXITSTATE_CHECKEXIT 1 - -#define EXITSTUB_SPACING 4 -#define EXITSTUBS_PER_GROUP 32 - -/* -- Instructions -------------------------------------------------------- */ - -/* Instruction fields. */ -#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28)) -#define ARMF_N(r) ((r) << 16) -#define ARMF_D(r) ((r) << 12) -#define ARMF_S(r) ((r) << 8) -#define ARMF_M(r) (r) -#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7)) -#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r)) - -typedef enum ARMIns { - ARMI_CCAL = 0xe0000000, - ARMI_S = 0x000100000, - ARMI_K12 = 0x02000000, - ARMI_KNEG = 0x00200000, - ARMI_LS_W = 0x00200000, - ARMI_LS_U = 0x00800000, - ARMI_LS_P = 0x01000000, - ARMI_LS_R = 0x02000000, - ARMI_LSX_I = 0x00400000, - - ARMI_AND = 0xe0000000, - ARMI_EOR = 0xe0200000, - ARMI_SUB = 0xe0400000, - ARMI_RSB = 0xe0600000, - ARMI_ADD = 0xe0800000, - ARMI_ADC = 0xe0a00000, - ARMI_SBC = 0xe0c00000, - ARMI_RSC = 0xe0e00000, - ARMI_TST = 0xe1100000, - ARMI_TEQ = 0xe1300000, - ARMI_CMP = 0xe1500000, - ARMI_CMN = 0xe1700000, - ARMI_ORR = 0xe1800000, - ARMI_MOV = 0xe1a00000, - ARMI_BIC = 0xe1c00000, - ARMI_MVN = 0xe1e00000, - - ARMI_NOP = 0xe1a00000, - - ARMI_MUL = 0xe0000090, - ARMI_SMULL = 0xe0c00090, - - ARMI_LDR = 0xe4100000, - ARMI_LDRB = 0xe4500000, - ARMI_LDRH = 0xe01000b0, - ARMI_LDRSB = 0xe01000d0, - ARMI_LDRSH = 0xe01000f0, - ARMI_LDRD = 0xe00000d0, - ARMI_STR = 0xe4000000, - ARMI_STRB = 0xe4400000, - ARMI_STRH = 0xe00000b0, - ARMI_STRD = 0xe00000f0, - ARMI_PUSH = 0xe92d0000, - - ARMI_B = 0xea000000, - ARMI_BL = 0xeb000000, - ARMI_BLX = 0xfa000000, - ARMI_BLXr = 0xe12fff30, - - /* ARMv6 */ - ARMI_REV = 0xe6bf0f30, - ARMI_SXTB = 0xe6af0070, - ARMI_SXTH = 0xe6bf0070, - ARMI_UXTB = 0xe6ef0070, - ARMI_UXTH = 0xe6ff0070, - - /* ARMv6T2 */ - ARMI_MOVW = 0xe3000000, - ARMI_MOVT = 0xe3400000, - - /* VFP */ - ARMI_VMOV_D = 0xeeb00b40, - ARMI_VMOV_S = 0xeeb00a40, - ARMI_VMOVI_D = 0xeeb00b00, - - ARMI_VMOV_R_S = 0xee100a10, - ARMI_VMOV_S_R = 0xee000a10, - ARMI_VMOV_RR_D = 0xec500b10, - ARMI_VMOV_D_RR = 0xec400b10, - - ARMI_VADD_D = 0xee300b00, - ARMI_VSUB_D = 0xee300b40, - ARMI_VMUL_D = 0xee200b00, - ARMI_VMLA_D = 0xee000b00, - ARMI_VMLS_D = 0xee000b40, - ARMI_VNMLS_D = 0xee100b00, - ARMI_VDIV_D = 0xee800b00, - - ARMI_VABS_D = 0xeeb00bc0, - ARMI_VNEG_D = 0xeeb10b40, - ARMI_VSQRT_D = 0xeeb10bc0, - - ARMI_VCMP_D = 0xeeb40b40, - ARMI_VCMPZ_D = 0xeeb50b40, - - ARMI_VMRS = 0xeef1fa10, - - ARMI_VCVT_S32_F32 = 0xeebd0ac0, - ARMI_VCVT_S32_F64 = 0xeebd0bc0, - ARMI_VCVT_U32_F32 = 0xeebc0ac0, - ARMI_VCVT_U32_F64 = 0xeebc0bc0, - ARMI_VCVT_F32_S32 = 0xeeb80ac0, - ARMI_VCVT_F64_S32 = 0xeeb80bc0, - ARMI_VCVT_F32_U32 = 0xeeb80a40, - ARMI_VCVT_F64_U32 = 0xeeb80b40, - ARMI_VCVT_F32_F64 = 0xeeb70bc0, - ARMI_VCVT_F64_F32 = 0xeeb70ac0, - - ARMI_VLDR_S = 0xed100a00, - ARMI_VLDR_D = 0xed100b00, - ARMI_VSTR_S = 0xed000a00, - ARMI_VSTR_D = 0xed000b00, -} ARMIns; - -typedef enum ARMShift { - ARMSH_LSL, ARMSH_LSR, ARMSH_ASR, ARMSH_ROR -} ARMShift; - -/* ARM condition codes. */ -typedef enum ARMCC { - CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, - CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, - CC_HS = CC_CS, CC_LO = CC_CC -} ARMCC; - -#endif diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h deleted file mode 100644 index 3f6bb39be2..0000000000 --- a/src/lj_target_arm64.h +++ /dev/null @@ -1,322 +0,0 @@ -/* -** Definitions for ARM64 CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#ifndef _LJ_TARGET_ARM64_H -#define _LJ_TARGET_ARM64_H - -/* -- Registers IDs ------------------------------------------------------- */ - -#define GPRDEF(_) \ - _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \ - _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \ - _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \ - _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP) -#define FPRDEF(_) \ - _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ - _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \ - _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \ - _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31) -#define VRIDDEF(_) - -#define RIDENUM(name) RID_##name, - -enum { - GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ - FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ - RID_MAX, - RID_TMP = RID_LR, - RID_ZERO = RID_SP, - - /* Calling conventions. */ - RID_RET = RID_X0, - RID_FPRET = RID_D0, - - /* These definitions must match with the *.dasc file(s): */ - RID_BASE = RID_X19, /* Interpreter BASE. */ - RID_LPC = RID_X21, /* Interpreter PC. */ - RID_GL = RID_X22, /* Interpreter GL. */ - RID_LREG = RID_X23, /* Interpreter L. */ - - /* Register ranges [min, max) and number of registers. */ - RID_MIN_GPR = RID_X0, - RID_MAX_GPR = RID_SP+1, - RID_MIN_FPR = RID_MAX_GPR, - RID_MAX_FPR = RID_D31+1, - RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, - RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR -}; - -#define RID_NUM_KREF RID_NUM_GPR -#define RID_MIN_KREF RID_X0 - -/* -- Register sets ------------------------------------------------------- */ - -/* Make use of all registers, except for x18, fp, lr and sp. */ -#define RSET_FIXED \ - (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\ - RID2RSET(RID_GL)) -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) -#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL - -/* lr is an implicit scratch register. */ -#define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1)) -#define RSET_SCRATCH_FPR \ - (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1)) -#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -#define REGARG_FIRSTGPR RID_X0 -#define REGARG_LASTGPR RID_X7 -#define REGARG_NUMGPR 8 -#define REGARG_FIRSTFPR RID_D0 -#define REGARG_LASTFPR RID_D7 -#define REGARG_NUMFPR 8 - -/* -- Spill slots --------------------------------------------------------- */ - -/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -** -** SPS_FIXED: Available fixed spill slots in interpreter frame. -** This definition must match with the vm_arm64.dasc file. -** Pre-allocate some slots to avoid sp adjust in every root trace. -** -** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. -*/ -#define SPS_FIXED 4 -#define SPS_FIRST 2 - -#define SPOFS_TMP 0 - -#define sps_scale(slot) (4 * (int32_t)(slot)) -#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) - -/* -- Exit state ---------------------------------------------------------- */ - -/* This definition must match with the *.dasc file(s). */ -typedef struct { - lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ - intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ - int32_t spill[256]; /* Spill slots. */ -} ExitState; - -/* Highest exit + 1 indicates stack check. */ -#define EXITSTATE_CHECKEXIT 1 - -/* Return the address of a per-trace exit stub. */ -static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) -{ - while (*p == 0xd503201f) p++; /* Skip A64I_NOP. */ - return p + 3 + exitno; -} -/* Avoid dependence on lj_jit.h if only including lj_target.h. */ -#define exitstub_trace_addr(T, exitno) \ - exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) - -/* -- Instructions -------------------------------------------------------- */ - -/* Instruction fields. */ -#define A64F_D(r) (r) -#define A64F_N(r) ((r) << 5) -#define A64F_A(r) ((r) << 10) -#define A64F_M(r) ((r) << 16) -#define A64F_IMMS(x) ((x) << 10) -#define A64F_IMMR(x) ((x) << 16) -#define A64F_U16(x) ((x) << 5) -#define A64F_U12(x) ((x) << 10) -#define A64F_S26(x) (x) -#define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5) -#define A64F_S14(x) ((x) << 5) -#define A64F_S9(x) ((x) << 12) -#define A64F_BIT(x) ((x) << 19) -#define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10)) -#define A64F_EX(ex) (A64I_EX | ((ex) << 13)) -#define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10)) -#define A64F_FP8(x) ((x) << 13) -#define A64F_CC(cc) ((cc) << 12) -#define A64F_LSL16(x) (((x) / 16) << 21) -#define A64F_BSH(sh) ((sh) << 10) - -typedef enum A64Ins { - A64I_S = 0x20000000, - A64I_X = 0x80000000, - A64I_EX = 0x00200000, - A64I_ON = 0x00200000, - A64I_K12 = 0x1a000000, - A64I_K13 = 0x18000000, - A64I_LS_U = 0x01000000, - A64I_LS_S = 0x00800000, - A64I_LS_R = 0x01200800, - A64I_LS_SH = 0x00001000, - A64I_LS_UXTWx = 0x00004000, - A64I_LS_SXTWx = 0x0000c000, - A64I_LS_SXTXx = 0x0000e000, - A64I_LS_LSLx = 0x00006000, - - A64I_ADDw = 0x0b000000, - A64I_ADDx = 0x8b000000, - A64I_ADDSw = 0x2b000000, - A64I_ADDSx = 0xab000000, - A64I_NEGw = 0x4b0003e0, - A64I_NEGx = 0xcb0003e0, - A64I_SUBw = 0x4b000000, - A64I_SUBx = 0xcb000000, - A64I_SUBSw = 0x6b000000, - A64I_SUBSx = 0xeb000000, - - A64I_MULw = 0x1b007c00, - A64I_MULx = 0x9b007c00, - A64I_SMULL = 0x9b207c00, - - A64I_ANDw = 0x0a000000, - A64I_ANDx = 0x8a000000, - A64I_ANDSw = 0x6a000000, - A64I_ANDSx = 0xea000000, - A64I_EORw = 0x4a000000, - A64I_EORx = 0xca000000, - A64I_ORRw = 0x2a000000, - A64I_ORRx = 0xaa000000, - A64I_TSTw = 0x6a00001f, - A64I_TSTx = 0xea00001f, - - A64I_CMPw = 0x6b00001f, - A64I_CMPx = 0xeb00001f, - A64I_CMNw = 0x2b00001f, - A64I_CMNx = 0xab00001f, - A64I_CCMPw = 0x7a400000, - A64I_CCMPx = 0xfa400000, - A64I_CSELw = 0x1a800000, - A64I_CSELx = 0x9a800000, - - A64I_ASRw = 0x13007c00, - A64I_ASRx = 0x9340fc00, - A64I_LSLx = 0xd3400000, - A64I_LSRx = 0xd340fc00, - A64I_SHRw = 0x1ac02000, - A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */ - A64I_REVw = 0x5ac00800, - A64I_REVx = 0xdac00c00, - - A64I_EXTRw = 0x13800000, - A64I_EXTRx = 0x93c00000, - A64I_SBFMw = 0x13000000, - A64I_SBFMx = 0x93400000, - A64I_SXTBw = 0x13001c00, - A64I_SXTHw = 0x13003c00, - A64I_SXTW = 0x93407c00, - A64I_UBFMw = 0x53000000, - A64I_UBFMx = 0xd3400000, - A64I_UXTBw = 0x53001c00, - A64I_UXTHw = 0x53003c00, - - A64I_MOVw = 0x2a0003e0, - A64I_MOVx = 0xaa0003e0, - A64I_MVNw = 0x2a2003e0, - A64I_MVNx = 0xaa2003e0, - A64I_MOVKw = 0x72800000, - A64I_MOVKx = 0xf2800000, - A64I_MOVZw = 0x52800000, - A64I_MOVZx = 0xd2800000, - A64I_MOVNw = 0x12800000, - A64I_MOVNx = 0x92800000, - - A64I_LDRB = 0x39400000, - A64I_LDRH = 0x79400000, - A64I_LDRw = 0xb9400000, - A64I_LDRx = 0xf9400000, - A64I_LDRLw = 0x18000000, - A64I_LDRLx = 0x58000000, - A64I_STRB = 0x39000000, - A64I_STRH = 0x79000000, - A64I_STRw = 0xb9000000, - A64I_STRx = 0xf9000000, - A64I_STPw = 0x29000000, - A64I_STPx = 0xa9000000, - A64I_LDPw = 0x29400000, - A64I_LDPx = 0xa9400000, - - A64I_B = 0x14000000, - A64I_BCC = 0x54000000, - A64I_BL = 0x94000000, - A64I_BR = 0xd61f0000, - A64I_BLR = 0xd63f0000, - A64I_TBZ = 0x36000000, - A64I_TBNZ = 0x37000000, - A64I_CBZ = 0x34000000, - A64I_CBNZ = 0x35000000, - - A64I_NOP = 0xd503201f, - - /* FP */ - A64I_FADDd = 0x1e602800, - A64I_FSUBd = 0x1e603800, - A64I_FMADDd = 0x1f400000, - A64I_FMSUBd = 0x1f408000, - A64I_FNMADDd = 0x1f600000, - A64I_FNMSUBd = 0x1f608000, - A64I_FMULd = 0x1e600800, - A64I_FDIVd = 0x1e601800, - A64I_FNEGd = 0x1e614000, - A64I_FABS = 0x1e60c000, - A64I_FSQRTd = 0x1e61c000, - A64I_LDRs = 0xbd400000, - A64I_LDRd = 0xfd400000, - A64I_STRs = 0xbd000000, - A64I_STRd = 0xfd000000, - A64I_LDPs = 0x2d400000, - A64I_LDPd = 0x6d400000, - A64I_STPs = 0x2d000000, - A64I_STPd = 0x6d000000, - A64I_FCMPd = 0x1e602000, - A64I_FCMPZd = 0x1e602008, - A64I_FCSELd = 0x1e600c00, - A64I_FRINTMd = 0x1e654000, - A64I_FRINTPd = 0x1e64c000, - A64I_FRINTZd = 0x1e65c000, - - A64I_FCVT_F32_F64 = 0x1e624000, - A64I_FCVT_F64_F32 = 0x1e22c000, - A64I_FCVT_F32_S32 = 0x1e220000, - A64I_FCVT_F64_S32 = 0x1e620000, - A64I_FCVT_F32_U32 = 0x1e230000, - A64I_FCVT_F64_U32 = 0x1e630000, - A64I_FCVT_F32_S64 = 0x9e220000, - A64I_FCVT_F64_S64 = 0x9e620000, - A64I_FCVT_F32_U64 = 0x9e230000, - A64I_FCVT_F64_U64 = 0x9e630000, - A64I_FCVT_S32_F64 = 0x1e780000, - A64I_FCVT_S32_F32 = 0x1e380000, - A64I_FCVT_U32_F64 = 0x1e790000, - A64I_FCVT_U32_F32 = 0x1e390000, - A64I_FCVT_S64_F64 = 0x9e780000, - A64I_FCVT_S64_F32 = 0x9e380000, - A64I_FCVT_U64_F64 = 0x9e790000, - A64I_FCVT_U64_F32 = 0x9e390000, - - A64I_FMOV_S = 0x1e204000, - A64I_FMOV_D = 0x1e604000, - A64I_FMOV_R_S = 0x1e260000, - A64I_FMOV_S_R = 0x1e270000, - A64I_FMOV_R_D = 0x9e660000, - A64I_FMOV_D_R = 0x9e670000, - A64I_FMOV_DI = 0x1e601000, -} A64Ins; - -typedef enum A64Shift { - A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR -} A64Shift; - -typedef enum A64Extend { - A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX, - A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX, -} A64Extend; - -/* ARM condition codes. */ -typedef enum A64CC { - CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, - CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, - CC_HS = CC_CS, CC_LO = CC_CC -} A64CC; - -#endif diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h deleted file mode 100644 index 740687b355..0000000000 --- a/src/lj_target_mips.h +++ /dev/null @@ -1,377 +0,0 @@ -/* -** Definitions for MIPS CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#ifndef _LJ_TARGET_MIPS_H -#define _LJ_TARGET_MIPS_H - -/* -- Registers IDs ------------------------------------------------------- */ - -#define GPRDEF(_) \ - _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ - _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ - _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ - _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA) -#if LJ_SOFTFP -#define FPRDEF(_) -#else -#define FPRDEF(_) \ - _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ - _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ - _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ - _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) -#endif -#define VRIDDEF(_) - -#define RIDENUM(name) RID_##name, - -enum { - GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ - FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ - RID_MAX, - RID_ZERO = RID_R0, - RID_TMP = RID_RA, - RID_GP = RID_R28, - - /* Calling conventions. */ - RID_RET = RID_R2, -#if LJ_LE - RID_RETHI = RID_R3, - RID_RETLO = RID_R2, -#else - RID_RETHI = RID_R2, - RID_RETLO = RID_R3, -#endif -#if LJ_SOFTFP - RID_FPRET = RID_R2, -#else - RID_FPRET = RID_F0, -#endif - RID_CFUNCADDR = RID_R25, - - /* These definitions must match with the *.dasc file(s): */ - RID_BASE = RID_R16, /* Interpreter BASE. */ - RID_LPC = RID_R18, /* Interpreter PC. */ - RID_DISPATCH = RID_R19, /* Interpreter DISPATCH table. */ - RID_LREG = RID_R20, /* Interpreter L. */ - RID_JGL = RID_R30, /* On-trace: global_State + 32768. */ - - /* Register ranges [min, max) and number of registers. */ - RID_MIN_GPR = RID_R0, - RID_MAX_GPR = RID_RA+1, - RID_MIN_FPR = RID_MAX_GPR, -#if LJ_SOFTFP - RID_MAX_FPR = RID_MIN_FPR, -#else - RID_MAX_FPR = RID_F31+1, -#endif - RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, - RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ -}; - -#define RID_NUM_KREF RID_NUM_GPR -#define RID_MIN_KREF RID_R0 - -/* -- Register sets ------------------------------------------------------- */ - -/* Make use of all registers, except ZERO, TMP, SP, SYS1, SYS2, JGL and GP. */ -#define RSET_FIXED \ - (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ - RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP)) -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) -#if LJ_SOFTFP -#define RSET_FPR 0 -#else -#if LJ_32 -#define RSET_FPR \ - (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ - RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ - RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ - RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) -#else -#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) -#endif -#endif -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL - -#define RSET_SCRATCH_GPR \ - (RSET_RANGE(RID_R1, RID_R15+1)|\ - RID2RSET(RID_R24)|RID2RSET(RID_R25)) -#if LJ_SOFTFP -#define RSET_SCRATCH_FPR 0 -#else -#if LJ_32 -#define RSET_SCRATCH_FPR \ - (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ - RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ - RID2RSET(RID_F16)|RID2RSET(RID_F18)) -#else -#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24) -#endif -#endif -#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -#define REGARG_FIRSTGPR RID_R4 -#if LJ_32 -#define REGARG_LASTGPR RID_R7 -#define REGARG_NUMGPR 4 -#else -#define REGARG_LASTGPR RID_R11 -#define REGARG_NUMGPR 8 -#endif -#if LJ_ABI_SOFTFP -#define REGARG_FIRSTFPR 0 -#define REGARG_LASTFPR 0 -#define REGARG_NUMFPR 0 -#else -#define REGARG_FIRSTFPR RID_F12 -#if LJ_32 -#define REGARG_LASTFPR RID_F14 -#define REGARG_NUMFPR 2 -#else -#define REGARG_LASTFPR RID_F19 -#define REGARG_NUMFPR 8 -#endif -#endif - -/* -- Spill slots --------------------------------------------------------- */ - -/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -** -** SPS_FIXED: Available fixed spill slots in interpreter frame. -** This definition must match with the *.dasc file(s). -** -** SPS_FIRST: First spill slot for general use. -*/ -#if LJ_32 -#define SPS_FIXED 5 -#else -#define SPS_FIXED 4 -#endif -#define SPS_FIRST 4 - -#define SPOFS_TMP 0 - -#define sps_scale(slot) (4 * (int32_t)(slot)) -#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) - -/* -- Exit state ---------------------------------------------------------- */ - -/* This definition must match with the *.dasc file(s). */ -typedef struct { -#if !LJ_SOFTFP - lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ -#endif - intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ - int32_t spill[256]; /* Spill slots. */ -} ExitState; - -/* Highest exit + 1 indicates stack check. */ -#define EXITSTATE_CHECKEXIT 1 - -/* Return the address of a per-trace exit stub. */ -static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p) -{ - while (*p == 0x00000000) p++; /* Skip MIPSI_NOP. */ - return p; -} -/* Avoid dependence on lj_jit.h if only including lj_target.h. */ -#define exitstub_trace_addr(T, exitno) \ - exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode)) - -/* -- Instructions -------------------------------------------------------- */ - -/* Instruction fields. */ -#define MIPSF_S(r) ((r) << 21) -#define MIPSF_T(r) ((r) << 16) -#define MIPSF_D(r) ((r) << 11) -#define MIPSF_R(r) ((r) << 21) -#define MIPSF_H(r) ((r) << 16) -#define MIPSF_G(r) ((r) << 11) -#define MIPSF_F(r) ((r) << 6) -#define MIPSF_A(n) ((n) << 6) -#define MIPSF_M(n) ((n) << 11) -#define MIPSF_L(n) ((n) << 6) - -typedef enum MIPSIns { - MIPSI_D = 0x38, - MIPSI_DV = 0x10, - MIPSI_D32 = 0x3c, - /* Integer instructions. */ - MIPSI_MOVE = 0x00000025, - MIPSI_NOP = 0x00000000, - - MIPSI_LI = 0x24000000, - MIPSI_LU = 0x34000000, - MIPSI_LUI = 0x3c000000, - - MIPSI_AND = 0x00000024, - MIPSI_ANDI = 0x30000000, - MIPSI_OR = 0x00000025, - MIPSI_ORI = 0x34000000, - MIPSI_XOR = 0x00000026, - MIPSI_XORI = 0x38000000, - MIPSI_NOR = 0x00000027, - - MIPSI_SLT = 0x0000002a, - MIPSI_SLTU = 0x0000002b, - MIPSI_SLTI = 0x28000000, - MIPSI_SLTIU = 0x2c000000, - - MIPSI_ADDU = 0x00000021, - MIPSI_ADDIU = 0x24000000, - MIPSI_SUB = 0x00000022, - MIPSI_SUBU = 0x00000023, - MIPSI_MUL = 0x70000002, - MIPSI_DIV = 0x0000001a, - MIPSI_DIVU = 0x0000001b, - - MIPSI_MOVZ = 0x0000000a, - MIPSI_MOVN = 0x0000000b, - MIPSI_MFHI = 0x00000010, - MIPSI_MFLO = 0x00000012, - MIPSI_MULT = 0x00000018, - - MIPSI_SLL = 0x00000000, - MIPSI_SRL = 0x00000002, - MIPSI_SRA = 0x00000003, - MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */ - MIPSI_DROTR = 0x0020003a, - MIPSI_DROTR32 = 0x0020003e, - MIPSI_SLLV = 0x00000004, - MIPSI_SRLV = 0x00000006, - MIPSI_SRAV = 0x00000007, - MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */ - MIPSI_DROTRV = 0x00000056, - - MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */ - MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */ - MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */ - MIPSI_DSBH = 0x7c0000a4, - - MIPSI_B = 0x10000000, - MIPSI_J = 0x08000000, - MIPSI_JAL = 0x0c000000, - MIPSI_JALX = 0x74000000, - MIPSI_JR = 0x00000008, - MIPSI_JALR = 0x0000f809, - - MIPSI_BEQ = 0x10000000, - MIPSI_BNE = 0x14000000, - MIPSI_BLEZ = 0x18000000, - MIPSI_BGTZ = 0x1c000000, - MIPSI_BLTZ = 0x04000000, - MIPSI_BGEZ = 0x04010000, - - /* Load/store instructions. */ - MIPSI_LW = 0x8c000000, - MIPSI_LD = 0xdc000000, - MIPSI_SW = 0xac000000, - MIPSI_SD = 0xfc000000, - MIPSI_LB = 0x80000000, - MIPSI_SB = 0xa0000000, - MIPSI_LH = 0x84000000, - MIPSI_SH = 0xa4000000, - MIPSI_LBU = 0x90000000, - MIPSI_LHU = 0x94000000, - MIPSI_LWC1 = 0xc4000000, - MIPSI_SWC1 = 0xe4000000, - MIPSI_LDC1 = 0xd4000000, - MIPSI_SDC1 = 0xf4000000, - - /* MIPS64 instructions. */ - MIPSI_DADD = 0x0000002c, - MIPSI_DADDI = 0x60000000, - MIPSI_DADDU = 0x0000002d, - MIPSI_DADDIU = 0x64000000, - MIPSI_DSUB = 0x0000002e, - MIPSI_DSUBU = 0x0000002f, - MIPSI_DDIV = 0x0000001e, - MIPSI_DDIVU = 0x0000001f, - MIPSI_DMULT = 0x0000001c, - MIPSI_DMULTU = 0x0000001d, - - MIPSI_DSLL = 0x00000038, - MIPSI_DSRL = 0x0000003a, - MIPSI_DSLLV = 0x00000014, - MIPSI_DSRLV = 0x00000016, - MIPSI_DSRA = 0x0000003b, - MIPSI_DSRAV = 0x00000017, - MIPSI_DSRA32 = 0x0000003f, - MIPSI_DSLL32 = 0x0000003c, - MIPSI_DSRL32 = 0x0000003e, - MIPSI_DSHD = 0x7c000164, - - MIPSI_AADDU = LJ_32 ? MIPSI_ADDU : MIPSI_DADDU, - MIPSI_AADDIU = LJ_32 ? MIPSI_ADDIU : MIPSI_DADDIU, - MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU, - MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD, - MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD, - - /* Extract/insert instructions. */ - MIPSI_DEXTM = 0x7c000001, - MIPSI_DEXTU = 0x7c000002, - MIPSI_DEXT = 0x7c000003, - MIPSI_DINSM = 0x7c000005, - MIPSI_DINSU = 0x7c000006, - MIPSI_DINS = 0x7c000007, - - MIPSI_RINT_D = 0x4620001a, - MIPSI_RINT_S = 0x4600001a, - MIPSI_RINT = 0x4400001a, - MIPSI_FLOOR_D = 0x4620000b, - MIPSI_CEIL_D = 0x4620000a, - MIPSI_ROUND_D = 0x46200008, - - /* FP instructions. */ - MIPSI_MOV_S = 0x46000006, - MIPSI_MOV_D = 0x46200006, - MIPSI_MOVT_D = 0x46210011, - MIPSI_MOVF_D = 0x46200011, - - MIPSI_ABS_D = 0x46200005, - MIPSI_NEG_D = 0x46200007, - - MIPSI_ADD_D = 0x46200000, - MIPSI_SUB_D = 0x46200001, - MIPSI_MUL_D = 0x46200002, - MIPSI_DIV_D = 0x46200003, - MIPSI_SQRT_D = 0x46200004, - - MIPSI_ADD_S = 0x46000000, - MIPSI_SUB_S = 0x46000001, - - MIPSI_CVT_D_S = 0x46000021, - MIPSI_CVT_W_S = 0x46000024, - MIPSI_CVT_S_D = 0x46200020, - MIPSI_CVT_W_D = 0x46200024, - MIPSI_CVT_S_W = 0x46800020, - MIPSI_CVT_D_W = 0x46800021, - MIPSI_CVT_S_L = 0x46a00020, - MIPSI_CVT_D_L = 0x46a00021, - - MIPSI_TRUNC_W_S = 0x4600000d, - MIPSI_TRUNC_W_D = 0x4620000d, - MIPSI_TRUNC_L_S = 0x46000009, - MIPSI_TRUNC_L_D = 0x46200009, - MIPSI_FLOOR_W_S = 0x4600000f, - MIPSI_FLOOR_W_D = 0x4620000f, - - MIPSI_MFC1 = 0x44000000, - MIPSI_MTC1 = 0x44800000, - MIPSI_DMTC1 = 0x44a00000, - MIPSI_DMFC1 = 0x44200000, - - MIPSI_BC1F = 0x45000000, - MIPSI_BC1T = 0x45010000, - - MIPSI_C_EQ_D = 0x46200032, - MIPSI_C_OLT_S = 0x46000034, - MIPSI_C_OLT_D = 0x46200034, - MIPSI_C_ULT_D = 0x46200035, - MIPSI_C_OLE_D = 0x46200036, - MIPSI_C_ULE_D = 0x46200037, -} MIPSIns; - -#endif diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h deleted file mode 100644 index c5c991a377..0000000000 --- a/src/lj_target_ppc.h +++ /dev/null @@ -1,280 +0,0 @@ -/* -** Definitions for PPC CPUs. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#ifndef _LJ_TARGET_PPC_H -#define _LJ_TARGET_PPC_H - -/* -- Registers IDs ------------------------------------------------------- */ - -#define GPRDEF(_) \ - _(R0) _(SP) _(SYS1) _(R3) _(R4) _(R5) _(R6) _(R7) \ - _(R8) _(R9) _(R10) _(R11) _(R12) _(SYS2) _(R14) _(R15) \ - _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ - _(R24) _(R25) _(R26) _(R27) _(R28) _(R29) _(R30) _(R31) -#define FPRDEF(_) \ - _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ - _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ - _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ - _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) -#define VRIDDEF(_) - -#define RIDENUM(name) RID_##name, - -enum { - GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ - FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ - RID_MAX, - RID_TMP = RID_R0, - - /* Calling conventions. */ - RID_RET = RID_R3, - RID_RETHI = RID_R3, - RID_RETLO = RID_R4, - RID_FPRET = RID_F1, - - /* These definitions must match with the *.dasc file(s): */ - RID_BASE = RID_R14, /* Interpreter BASE. */ - RID_LPC = RID_R16, /* Interpreter PC. */ - RID_DISPATCH = RID_R17, /* Interpreter DISPATCH table. */ - RID_LREG = RID_R18, /* Interpreter L. */ - RID_JGL = RID_R31, /* On-trace: global_State + 32768. */ - - /* Register ranges [min, max) and number of registers. */ - RID_MIN_GPR = RID_R0, - RID_MAX_GPR = RID_R31+1, - RID_MIN_FPR = RID_F0, - RID_MAX_FPR = RID_F31+1, - RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, - RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR -}; - -#define RID_NUM_KREF RID_NUM_GPR -#define RID_MIN_KREF RID_R0 - -/* -- Register sets ------------------------------------------------------- */ - -/* Make use of all registers, except TMP, SP, SYS1, SYS2 and JGL. */ -#define RSET_FIXED \ - (RID2RSET(RID_TMP)|RID2RSET(RID_SP)|RID2RSET(RID_SYS1)|\ - RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)) -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) -#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL - -#define RSET_SCRATCH_GPR (RSET_RANGE(RID_R3, RID_R12+1)) -#define RSET_SCRATCH_FPR (RSET_RANGE(RID_F0, RID_F13+1)) -#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) -#define REGARG_FIRSTGPR RID_R3 -#define REGARG_LASTGPR RID_R10 -#define REGARG_NUMGPR 8 -#define REGARG_FIRSTFPR RID_F1 -#define REGARG_LASTFPR RID_F8 -#define REGARG_NUMFPR 8 - -/* -- Spill slots --------------------------------------------------------- */ - -/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. -** -** SPS_FIXED: Available fixed spill slots in interpreter frame. -** This definition must match with the *.dasc file(s). -** -** SPS_FIRST: First spill slot for general use. -** [sp+12] tmplo word \ -** [sp+ 8] tmphi word / tmp dword, parameter area for callee -** [sp+ 4] tmpw, LR of callee -** [sp+ 0] stack chain -*/ -#define SPS_FIXED 7 -#define SPS_FIRST 4 - -/* Stack offsets for temporary slots. Used for FP<->int conversions etc. */ -#define SPOFS_TMPW 4 -#define SPOFS_TMP 8 -#define SPOFS_TMPHI 8 -#define SPOFS_TMPLO 12 - -#define sps_scale(slot) (4 * (int32_t)(slot)) -#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) - -/* -- Exit state ---------------------------------------------------------- */ - -/* This definition must match with the *.dasc file(s). */ -typedef struct { - lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ - intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ - int32_t spill[256]; /* Spill slots. */ -} ExitState; - -/* Highest exit + 1 indicates stack check. */ -#define EXITSTATE_CHECKEXIT 1 - -/* Return the address of a per-trace exit stub. */ -static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) -{ - while (*p == 0x60000000) p++; /* Skip PPCI_NOP. */ - return p + 3 + exitno; -} -/* Avoid dependence on lj_jit.h if only including lj_target.h. */ -#define exitstub_trace_addr(T, exitno) \ - exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) - -/* -- Instructions -------------------------------------------------------- */ - -/* Instruction fields. */ -#define PPCF_CC(cc) ((((cc) & 3) << 16) | (((cc) & 4) << 22)) -#define PPCF_T(r) ((r) << 21) -#define PPCF_A(r) ((r) << 16) -#define PPCF_B(r) ((r) << 11) -#define PPCF_C(r) ((r) << 6) -#define PPCF_MB(n) ((n) << 6) -#define PPCF_ME(n) ((n) << 1) -#define PPCF_Y 0x00200000 -#define PPCF_DOT 0x00000001 - -typedef enum PPCIns { - /* Integer instructions. */ - PPCI_MR = 0x7c000378, - PPCI_NOP = 0x60000000, - - PPCI_LI = 0x38000000, - PPCI_LIS = 0x3c000000, - - PPCI_ADD = 0x7c000214, - PPCI_ADDC = 0x7c000014, - PPCI_ADDO = 0x7c000614, - PPCI_ADDE = 0x7c000114, - PPCI_ADDZE = 0x7c000194, - PPCI_ADDME = 0x7c0001d4, - PPCI_ADDI = 0x38000000, - PPCI_ADDIS = 0x3c000000, - PPCI_ADDIC = 0x30000000, - PPCI_ADDICDOT = 0x34000000, - - PPCI_SUBF = 0x7c000050, - PPCI_SUBFC = 0x7c000010, - PPCI_SUBFO = 0x7c000450, - PPCI_SUBFE = 0x7c000110, - PPCI_SUBFZE = 0x7c000190, - PPCI_SUBFME = 0x7c0001d0, - PPCI_SUBFIC = 0x20000000, - - PPCI_NEG = 0x7c0000d0, - - PPCI_AND = 0x7c000038, - PPCI_ANDC = 0x7c000078, - PPCI_NAND = 0x7c0003b8, - PPCI_ANDIDOT = 0x70000000, - PPCI_ANDISDOT = 0x74000000, - - PPCI_OR = 0x7c000378, - PPCI_NOR = 0x7c0000f8, - PPCI_ORI = 0x60000000, - PPCI_ORIS = 0x64000000, - - PPCI_XOR = 0x7c000278, - PPCI_EQV = 0x7c000238, - PPCI_XORI = 0x68000000, - PPCI_XORIS = 0x6c000000, - - PPCI_CMPW = 0x7c000000, - PPCI_CMPLW = 0x7c000040, - PPCI_CMPWI = 0x2c000000, - PPCI_CMPLWI = 0x28000000, - - PPCI_MULLW = 0x7c0001d6, - PPCI_MULLI = 0x1c000000, - PPCI_MULLWO = 0x7c0005d6, - - PPCI_EXTSB = 0x7c000774, - PPCI_EXTSH = 0x7c000734, - - PPCI_SLW = 0x7c000030, - PPCI_SRW = 0x7c000430, - PPCI_SRAW = 0x7c000630, - PPCI_SRAWI = 0x7c000670, - - PPCI_RLWNM = 0x5c000000, - PPCI_RLWINM = 0x54000000, - PPCI_RLWIMI = 0x50000000, - - PPCI_B = 0x48000000, - PPCI_BL = 0x48000001, - PPCI_BC = 0x40800000, - PPCI_BCL = 0x40800001, - PPCI_BCTR = 0x4e800420, - PPCI_BCTRL = 0x4e800421, - - PPCI_CRANDC = 0x4c000102, - PPCI_CRXOR = 0x4c000182, - PPCI_CRAND = 0x4c000202, - PPCI_CREQV = 0x4c000242, - PPCI_CRORC = 0x4c000342, - PPCI_CROR = 0x4c000382, - - PPCI_MFLR = 0x7c0802a6, - PPCI_MTCTR = 0x7c0903a6, - - PPCI_MCRXR = 0x7c000400, - - /* Load/store instructions. */ - PPCI_LWZ = 0x80000000, - PPCI_LBZ = 0x88000000, - PPCI_STW = 0x90000000, - PPCI_STB = 0x98000000, - PPCI_LHZ = 0xa0000000, - PPCI_LHA = 0xa8000000, - PPCI_STH = 0xb0000000, - - PPCI_STWU = 0x94000000, - - PPCI_LFS = 0xc0000000, - PPCI_LFD = 0xc8000000, - PPCI_STFS = 0xd0000000, - PPCI_STFD = 0xd8000000, - - PPCI_LWZX = 0x7c00002e, - PPCI_LBZX = 0x7c0000ae, - PPCI_STWX = 0x7c00012e, - PPCI_STBX = 0x7c0001ae, - PPCI_LHZX = 0x7c00022e, - PPCI_LHAX = 0x7c0002ae, - PPCI_STHX = 0x7c00032e, - - PPCI_LWBRX = 0x7c00042c, - PPCI_STWBRX = 0x7c00052c, - - PPCI_LFSX = 0x7c00042e, - PPCI_LFDX = 0x7c0004ae, - PPCI_STFSX = 0x7c00052e, - PPCI_STFDX = 0x7c0005ae, - - /* FP instructions. */ - PPCI_FMR = 0xfc000090, - PPCI_FNEG = 0xfc000050, - PPCI_FABS = 0xfc000210, - - PPCI_FRSP = 0xfc000018, - PPCI_FCTIWZ = 0xfc00001e, - - PPCI_FADD = 0xfc00002a, - PPCI_FSUB = 0xfc000028, - PPCI_FMUL = 0xfc000032, - PPCI_FDIV = 0xfc000024, - PPCI_FSQRT = 0xfc00002c, - - PPCI_FMADD = 0xfc00003a, - PPCI_FMSUB = 0xfc000038, - PPCI_FNMSUB = 0xfc00003c, - - PPCI_FCMPU = 0xfc000000, - PPCI_FSEL = 0xfc00002e, -} PPCIns; - -typedef enum PPCCC { - CC_GE, CC_LE, CC_NE, CC_NS, CC_LT, CC_GT, CC_EQ, CC_SO -} PPCCC; - -#endif diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 356f792459..19fc3de386 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -8,19 +8,12 @@ /* -- Registers IDs ------------------------------------------------------- */ -#if LJ_64 #define GPRDEF(_) \ _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \ _(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D) #define FPRDEF(_) \ _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \ _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15) -#else -#define GPRDEF(_) \ - _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) -#define FPRDEF(_) \ - _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) -#endif #define VRIDDEF(_) \ _(MRM) _(RIP) @@ -36,22 +29,12 @@ enum { /* Calling conventions. */ RID_SP = RID_ESP, RID_RET = RID_EAX, -#if LJ_64 RID_FPRET = RID_XMM0, -#else - RID_RETLO = RID_EAX, - RID_RETHI = RID_EDX, -#endif /* These definitions must match with the *.dasc file(s): */ RID_BASE = RID_EDX, /* Interpreter BASE. */ -#if LJ_64 && !LJ_ABI_WIN RID_LPC = RID_EBX, /* Interpreter PC. */ RID_DISPATCH = RID_R14D, /* Interpreter DISPATCH table. */ -#else - RID_LPC = RID_ESI, /* Interpreter PC. */ - RID_DISPATCH = RID_EBX, /* Interpreter DISPATCH table. */ -#endif /* Register ranges [min, max) and number of registers. */ RID_MIN_GPR = RID_EAX, @@ -67,33 +50,16 @@ enum { /* Make use of all registers, except the stack pointer (and maybe DISPATCH). */ #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \ - RID2RSET(RID_ESP) \ - - LJ_GC64*RID2RSET(RID_DISPATCH)) + - RID2RSET(RID_DISPATCH)) #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) #define RSET_ALL (RSET_GPR|RSET_FPR) #define RSET_INIT RSET_ALL -#if LJ_64 /* Note: this requires the use of FORCE_REX! */ #define RSET_GPR8 RSET_GPR -#else -#define RSET_GPR8 (RSET_RANGE(RID_EAX, RID_EBX+1)) -#endif /* ABI-specific register sets. */ #define RSET_ACD (RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX)) -#if LJ_64 -#if LJ_ABI_WIN -/* Windows x64 ABI. */ -#define RSET_SCRATCH \ - (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) -#define REGARG_GPRS \ - (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5)) -#define REGARG_NUMGPR 4 -#define REGARG_NUMFPR 4 -#define REGARG_FIRSTFPR RID_XMM0 -#define REGARG_LASTFPR RID_XMM3 -#define STACKARG_OFS (4*8) -#else /* The rest of the civilized x64 world has a common ABI. */ #define RSET_SCRATCH \ (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR) @@ -105,21 +71,10 @@ enum { #define REGARG_FIRSTFPR RID_XMM0 #define REGARG_LASTFPR RID_XMM7 #define STACKARG_OFS 0 -#endif -#else -/* Common x86 ABI. */ -#define RSET_SCRATCH (RSET_ACD|RSET_FPR) -#define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */ -#define REGARG_NUMGPR 2 /* Fastcall only. */ -#define REGARG_NUMFPR 0 -#define STACKARG_OFS 0 -#endif -#if LJ_64 /* Prefer the low 8 regs of each type to reduce REX prefixes. */ #undef rset_picktop #define rset_picktop(rs) (lj_fls(lj_bswap(rs)) ^ 0x18) -#endif /* -- Spill slots --------------------------------------------------------- */ @@ -130,22 +85,8 @@ enum { ** ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. */ -#if LJ_64 -#if LJ_ABI_WIN -#define SPS_FIXED (4*2) -#define SPS_FIRST (4*2) /* Don't use callee register save area. */ -#else -#if LJ_GC64 #define SPS_FIXED 2 -#else -#define SPS_FIXED 4 -#endif -#define SPS_FIRST 2 -#endif -#else -#define SPS_FIXED 6 #define SPS_FIRST 2 -#endif #define SPOFS_TMP 0 diff --git a/src/lj_trace.c b/src/lj_trace.c index 80a7f024af..57184c1d47 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -6,9 +6,10 @@ #define lj_trace_c #define LUA_CORE +#include + #include "lj_obj.h" -#if LJ_HASJIT #include "lj_gc.h" #include "lj_err.h" @@ -28,8 +29,8 @@ #include "lj_asm.h" #include "lj_dispatch.h" #include "lj_vm.h" -#include "lj_vmevent.h" #include "lj_target.h" +#include "lj_auditlog.h" /* -- Error handling ------------------------------------------------------ */ @@ -48,6 +49,45 @@ void lj_trace_err_info(jit_State *J, TraceError e) lj_err_throw(J->L, LUA_ERRRUN); } +/* -- Hotcount decay ------------------------------------------------------ */ + +/* We reset all hotcounts every second. This is a rough way to establish a +** relation with elapsed time so that hotcounts provide a measure of frequency. +** +** The concrete goal is to ensure that the JIT will trace code that becomes hot +** over a short duration, but not code that becomes hot over, say, the course +** of an hour. +** +** The "one second" constant is certainly tunable. +** */ + +static void trace_clearsnapcounts(jit_State *J); /* Forward decl. */ + +static inline uint64_t gettime_ns (void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * 1000000000LL + ts.tv_nsec; +} + +/* Timestamp (ns) of last hotcount reset. */ +static uint64_t hotcount_decay_ts; + +/* Decay hotcounts every second. */ +int hotcount_decay (jit_State *J) +{ + uint64_t ts = gettime_ns(); + int decay = (ts - hotcount_decay_ts) > 1000000000LL; /* 1s elapsed? */ + if (decay) { + /* Reset hotcounts. */ + lj_dispatch_init_hotcount(J2G(J)); + trace_clearsnapcounts(J); + hotcount_decay_ts = ts; + } + return decay; +} + + /* -- Trace management ---------------------------------------------------- */ /* The current trace is first assembled in J->cur. The variable length @@ -59,22 +99,14 @@ void lj_trace_err_info(jit_State *J, TraceError e) /* Find a free trace number. */ static TraceNo trace_findfree(jit_State *J) { - MSize osz, lim; if (J->freetrace == 0) J->freetrace = 1; - for (; J->freetrace < J->sizetrace; J->freetrace++) + /* Search for a free slot. */ + for (; J->freetrace < TRACE_MAX; J->freetrace++) if (traceref(J, J->freetrace) == NULL) return J->freetrace++; - /* Need to grow trace array. */ - lim = (MSize)J->param[JIT_P_maxtrace] + 1; - if (lim < 2) lim = 2; else if (lim > 65535) lim = 65535; - osz = J->sizetrace; - if (osz >= lim) - return 0; /* Too many traces. */ - lj_mem_growvec(J->L, J->trace, J->sizetrace, lim, GCRef); - for (; osz < J->sizetrace; osz++) - setgcrefnull(J->trace[osz]); - return J->freetrace; + /* No free slot in trace array. */ + return 0; } #define TRACE_APPENDVEC(field, szfield, tp) \ @@ -82,43 +114,8 @@ static TraceNo trace_findfree(jit_State *J) memcpy(p, J->cur.field, J->cur.szfield*sizeof(tp)); \ p += J->cur.szfield*sizeof(tp); -#ifdef LUAJIT_USE_PERFTOOLS -/* -** Create symbol table of JIT-compiled code. For use with Linux perf tools. -** Example usage: -** perf record -f -e cycles luajit test.lua -** perf report -s symbol -** rm perf.data /tmp/perf-*.map -*/ -#include -#include - -static void perftools_addtrace(GCtrace *T) -{ - static FILE *fp; - GCproto *pt = &gcref(T->startpt)->pt; - const BCIns *startpc = mref(T->startpc, const BCIns); - const char *name = proto_chunknamestr(pt); - BCLine lineno; - if (name[0] == '@' || name[0] == '=') - name++; - else - name = "(string)"; - lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); - lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); - if (!fp) { - char fname[40]; - sprintf(fname, "/tmp/perf-%d.map", getpid()); - if (!(fp = fopen(fname, "w"))) return; - setlinebuf(fp); - } - fprintf(fp, "%lx %x TRACE_%d::%s:%u\n", - (long)T->mcode, T->szmcode, T->traceno, name, lineno); -} -#endif - /* Allocate space for copy of T. */ -GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T) +GCtrace * lj_trace_alloc(lua_State *L, GCtrace *T) { size_t sztr = ((sizeof(GCtrace)+7)&~7); size_t szins = (T->nins-T->nk)*sizeof(IRIns); @@ -135,6 +132,12 @@ GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T) T2->nk = T->nk; T2->nsnap = T->nsnap; T2->nsnapmap = T->nsnapmap; + /* Set szirmcode into T2 allocated memory. May be unallocated in T. + ** +2 extra spaces for the last instruction and the trace header at [0]. + */ + T2->nszirmcode = T->nins+2-REF_BIAS; + T2->szirmcode = lj_mem_newt(L, T2->nszirmcode*sizeof(uint16_t), uint16_t); + memset(T2->szirmcode, 0, T2->nszirmcode*sizeof(uint16_t)); memcpy(p, T->ir + T->nk, szins); return T2; } @@ -144,8 +147,12 @@ static void trace_save(jit_State *J, GCtrace *T) { size_t sztr = ((sizeof(GCtrace)+7)&~7); size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns); + size_t nszirmcode = T->nszirmcode; + uint16_t *szirmcode = T->szirmcode; char *p = (char *)T + sztr; memcpy(T, &J->cur, sizeof(GCtrace)); + T->parent = J->parent; + T->exitno = J->exitno; setgcrefr(T->nextgc, J2G(J)->gc.root); setgcrefp(J2G(J)->gc.root, T); newwhite(J2G(J), T); @@ -154,25 +161,26 @@ static void trace_save(jit_State *J, GCtrace *T) p += szins; TRACE_APPENDVEC(snap, nsnap, SnapShot) TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry) + /* Set szirmcode into T2 allocated memory. May be unallocated in T. */ + T->nszirmcode = nszirmcode; + T->szirmcode = szirmcode; J->cur.traceno = 0; J->curfinal = NULL; setgcrefp(J->trace[T->traceno], T); lj_gc_barriertrace(J2G(J), T->traceno); lj_gdbjit_addtrace(J, T); -#ifdef LUAJIT_USE_PERFTOOLS - perftools_addtrace(T); -#endif + lj_ctype_log(J->L); + lj_auditlog_trace_stop(J, T); } -void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T) +void lj_trace_free(global_State *g, GCtrace *T) { jit_State *J = G2J(g); if (T->traceno) { lj_gdbjit_deltrace(J, T); - if (T->traceno < J->freetrace) - J->freetrace = T->traceno; setgcrefnull(J->trace[T->traceno]); } + lj_mem_free(g, T->szirmcode, T->nszirmcode*sizeof(uint16_t)); lj_mem_free(g, T, ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) + T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry)); @@ -258,7 +266,7 @@ static void trace_flushroot(jit_State *J, GCtrace *T) /* Flush a trace. Only root traces are considered. */ void lj_trace_flush(jit_State *J, TraceNo traceno) { - if (traceno > 0 && traceno < J->sizetrace) { + if (traceno > 0 && traceno < TRACE_MAX) { GCtrace *T = traceref(J, traceno); if (T && T->root == 0) trace_flushroot(J, T); @@ -276,29 +284,45 @@ void lj_trace_flushproto(global_State *g, GCproto *pt) int lj_trace_flushall(lua_State *L) { jit_State *J = L2J(L); + global_State *g = G(L); ptrdiff_t i; if ((J2G(J)->hookmask & HOOK_GC)) return 1; - for (i = (ptrdiff_t)J->sizetrace-1; i > 0; i--) { - GCtrace *T = traceref(J, i); - if (T) { - if (T->root == 0) - trace_flushroot(J, T); - lj_gdbjit_deltrace(J, T); - T->traceno = T->link = 0; /* Blacklist the link for cont_stitch. */ - setgcrefnull(J->trace[i]); + lj_auditlog_trace_flushall(J); + if (J->trace) { + for (i = (ptrdiff_t)TRACE_MAX-1; i > 0; i--) { + GCtrace *T = traceref(J, i); + if (T) { + if (T->root == 0) + trace_flushroot(J, T); + lj_gdbjit_deltrace(J, T); + T->traceno = T->link = 0; /* Blacklist the link for cont_stitch. */ + setgcrefnull(J->trace[i]); + } } } J->cur.traceno = 0; + J->ntraces = 0; J->freetrace = 0; + g->lasttrace = 0; + /* Unpatch blacklisted byte codes. */ + GCRef *p = &(G(L)->gc.root); + GCobj *o; + while ((o = gcref(*p)) != NULL) { + if (o->gch.gct == ~LJ_TPROTO) { + lj_trace_reenableproto(gco2pt(o)); + } + p = &o->gch.nextgc; + } /* Clear penalty cache. */ memset(J->penalty, 0, sizeof(J->penalty)); + /* Reset hotcounts. */ + lj_dispatch_init_hotcount(J2G(J)); + /* Initialize hotcount decay timestamp. */ + hotcount_decay_ts = gettime_ns(); /* Free the whole machine code and invalidate all exit stub groups. */ lj_mcode_free(J); memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup)); - lj_vmevent_send(L, TRACE, - setstrV(L, L->top++, lj_str_newlit(L, "flush")); - ); return 0; } @@ -317,32 +341,10 @@ void lj_trace_initstate(global_State *g) tv[1].u64 = U64x(80000000,00000000); /* Initialize 32/64 bit constants. */ -#if LJ_TARGET_X86ORX64 J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); -#if LJ_32 - J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000); -#endif J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); - J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; -#endif -#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 + J->k32[LJ_K32_M2P64_31] = 0xdf800000; J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); -#endif -#if LJ_TARGET_PPC - J->k32[LJ_K32_2P52_2P31] = 0x59800004; - J->k32[LJ_K32_2P52] = 0x59800000; -#endif -#if LJ_TARGET_PPC || LJ_TARGET_MIPS - J->k32[LJ_K32_2P31] = 0x4f000000; -#endif -#if LJ_TARGET_MIPS - J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); -#if LJ_64 - J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); - J->k32[LJ_K32_2P63] = 0x5f000000; - J->k32[LJ_K32_M2P64] = 0xdf800000; -#endif -#endif } /* Free everything associated with the JIT compiler state. */ @@ -352,15 +354,26 @@ void lj_trace_freestate(global_State *g) #ifdef LUA_USE_ASSERT { /* This assumes all traces have already been freed. */ ptrdiff_t i; - for (i = 1; i < (ptrdiff_t)J->sizetrace; i++) + for (i = 1; i < (ptrdiff_t)TRACE_MAX-1; i++) lua_assert(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL); } #endif lj_mcode_free(J); - lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry); - lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); - lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); - lj_mem_freevec(g, J->trace, J->sizetrace, GCRef); +} + +/* Clear all trace snap counts (side-exit hot counters). */ +static void trace_clearsnapcounts(jit_State *J) +{ + int i, s; + GCtrace *t; + /* Clear hotcounts for all snapshots of all traces. */ + for (i = 1; i < TRACE_MAX; i++) { + t = traceref(J, i); + if (t != NULL) + for (s = 0; s < t->nsnap; s++) + if (t->snap[s].count != SNAPCOUNT_DONE) + t->snap[s].count = 0; + } } /* -- Penalties and blacklisting ------------------------------------------ */ @@ -372,8 +385,8 @@ static void blacklist_pc(GCproto *pt, BCIns *pc) pt->flags |= PROTO_ILOOP; } -/* Penalize a bytecode instruction. */ -static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) +/* Penalize a bytecode instruction. Return true when blacklisted. */ +static int penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) { uint32_t i, val = PENALTY_MIN; for (i = 0; i < PENALTY_SLOTS; i++) @@ -383,7 +396,7 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) LJ_PRNG_BITS(J, PENALTY_RNDBITS); if (val > PENALTY_MAX) { blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */ - return; + return 1; } goto setpenalty; } @@ -392,17 +405,28 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) J->penaltyslot = (J->penaltyslot + 1) & (PENALTY_SLOTS-1); setmref(J->penalty[i].pc, pc); setpenalty: - J->penalty[i].val = (uint16_t)val; + J->penalty[i].val = val; J->penalty[i].reason = e; hotcount_set(J2GG(J), pc+1, val); + return 0; } +/* Check if this is the last attempt to compile a side trace. +** (If so the next attempt will just record a fallback to the interpreter.) +**/ +static int last_try(jit_State *J) +{ + GCtrace *parent = traceref(J, J->parent); + int count = parent->snap[J->exitno].count; + return count+1 >= J->param[JIT_P_hotexit] + J->param[JIT_P_tryside]; +} + + /* -- Trace compiler state machine ---------------------------------------- */ /* Start tracing. */ static void trace_start(jit_State *J) { - lua_State *L; TraceNo traceno; if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ @@ -419,7 +443,7 @@ static void trace_start(jit_State *J) /* Get a new trace number. */ traceno = trace_findfree(J); - if (LJ_UNLIKELY(traceno == 0)) { /* No free trace? */ + if (traceno == 0 || J->ntraces >= J->param[JIT_P_maxtrace]) { /* No free trace? */ lua_assert((J2G(J)->hookmask & HOOK_GC) == 0); lj_trace_flushall(J->L); J->state = LJ_TRACE_IDLE; /* Silently ignored. */ @@ -427,13 +451,16 @@ static void trace_start(jit_State *J) } setgcrefp(J->trace[traceno], &J->cur); - /* Setup enough of the current trace to be able to send the vmevent. */ + /* Setup enough of the current trace to be able to send the vmevent. + XXX Still needed with vmevent removed? -lukego */ memset(&J->cur, 0, sizeof(GCtrace)); J->cur.traceno = traceno; J->cur.nins = J->cur.nk = REF_BASE; J->cur.ir = J->irbuf; J->cur.snap = J->snapbuf; J->cur.snapmap = J->snapmapbuf; + J->cur.nszirmcode = 0; /* Only present in assembled trace. */ + J->cur.szirmcode = NULL; J->mergesnap = 0; J->needsnap = 0; J->bcskip = 0; @@ -444,23 +471,6 @@ static void trace_start(jit_State *J) J->ktrace = 0; setgcref(J->cur.startpt, obj2gco(J->pt)); - L = J->L; - lj_vmevent_send(L, TRACE, - setstrV(L, L->top++, lj_str_newlit(L, "start")); - setintV(L->top++, traceno); - setfuncV(L, L->top++, J->fn); - setintV(L->top++, proto_bcpos(J->pt, J->pc)); - if (J->parent) { - setintV(L->top++, J->parent); - setintV(L->top++, J->exitno); - } else { - BCOp op = bc_op(*J->pc); - if (op == BC_CALLM || op == BC_CALL || op == BC_ITERC) { - setintV(L->top++, J->exitno); /* Parent of stitched trace. */ - setintV(L->top++, -1); - } - } - ); lj_record_setup(J); } @@ -472,7 +482,7 @@ static void trace_stop(jit_State *J) GCproto *pt = &gcref(J->cur.startpt)->pt; TraceNo traceno = J->cur.traceno; GCtrace *T = J->curfinal; - lua_State *L; + int i; switch (op) { case BC_FORL: @@ -523,13 +533,12 @@ static void trace_stop(jit_State *J) lj_mcode_commit(J, J->cur.mcode); J->postproc = LJ_POST_NONE; trace_save(J, T); + J->ntraces++; - L = J->L; - lj_vmevent_send(L, TRACE, - setstrV(L, L->top++, lj_str_newlit(L, "stop")); - setintV(L->top++, traceno); - setfuncV(L, L->top++, J->fn); - ); + /* Clear any penalty after successful recording. */ + for (i = 0; i < PENALTY_SLOTS; i++) + if (mref(J->penalty[i].pc, const BCIns) == pc) + J->penalty[i].val = PENALTY_MIN; } /* Start a new root trace for down-recursion. */ @@ -567,6 +576,7 @@ static int trace_abort(jit_State *J) J->state = LJ_TRACE_ASM; return 1; /* Retry ASM with new MCode area. */ } + /* Penalize or blacklist starting bytecode instruction. */ if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { if (J->exitno == 0) { @@ -574,38 +584,25 @@ static int trace_abort(jit_State *J) if (e == LJ_TRERR_RETRY) hotcount_set(J2GG(J), startpc+1, 1); /* Immediate retry. */ else - penalty_pc(J, &gcref(J->cur.startpt)->pt, startpc, e); + J->final = penalty_pc(J, &gcref(J->cur.startpt)->pt, startpc, e); } else { traceref(J, J->exitno)->link = J->exitno; /* Self-link is blacklisted. */ } } + /* Is this the last attempt at a side trace? */ + if (J->parent && last_try(J)) J->final = 1; + + lj_ctype_log(J->L); + lj_auditlog_trace_abort(J, e); + /* Is there anything to abort? */ traceno = J->cur.traceno; if (traceno) { - ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */ J->cur.link = 0; J->cur.linktype = LJ_TRLINK_NONE; - lj_vmevent_send(L, TRACE, - TValue *frame; - const BCIns *pc; - GCfunc *fn; - setstrV(L, L->top++, lj_str_newlit(L, "abort")); - setintV(L->top++, traceno); - /* Find original Lua function call to generate a better error message. */ - frame = J->L->base-1; - pc = J->pc; - while (!isluafunc(frame_func(frame))) { - pc = (frame_iscont(frame) ? frame_contpc(frame) : frame_pc(frame)) - 1; - frame = frame_prev(frame); - } - fn = frame_func(frame); - setfuncV(L, L->top++, fn); - setintV(L->top++, proto_bcpos(funcproto(fn), pc)); - copyTV(L, L->top++, restorestack(L, errobj)); - copyTV(L, L->top++, &J->errinfo); - ); - /* Drop aborted trace after the vmevent (which may still access it). */ + /* Drop aborted trace after the vmevent (which may still access it). + XXX Rethink now that vmevent is removed? -lukego */ setgcrefnull(J->trace[traceno]); if (traceno < J->freetrace) J->freetrace = traceno; @@ -649,18 +646,6 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) case LJ_TRACE_RECORD: trace_pendpatch(J, 0); setvmstate(J2G(J), RECORD); - lj_vmevent_send_(L, RECORD, - /* Save/restore tmptv state for trace recorder. */ - TValue savetv = J2G(J)->tmptv; - TValue savetv2 = J2G(J)->tmptv2; - setintV(L->top++, J->cur.traceno); - setfuncV(L, L->top++, J->fn); - setintV(L->top++, J->pt ? (int32_t)proto_bcpos(J->pt, J->pc) : -1); - setintV(L->top++, J->framedepth); - , - J2G(J)->tmptv = savetv; - J2G(J)->tmptv2 = savetv2; - ); lj_record_ins(J); break; @@ -725,15 +710,17 @@ void lj_trace_ins(jit_State *J, const BCIns *pc) } /* A hotcount triggered. Start recording a root trace. */ -void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc) +void lj_trace_hot(jit_State *J, const BCIns *pc) { /* Note: pc is the interpreter bytecode PC here. It's offset by 1. */ + if (hotcount_decay(J)) + /* Check for hotcount decay, do nothing if hotcounts have decayed. */ + return; ERRNO_SAVE /* Reset hotcount. */ hotcount_set(J2GG(J), pc, J->param[JIT_P_hotloop]*HOTCOUNT_LOOP); - /* Only start a new trace if not recording or inside __gc call or vmevent. */ - if (J->state == LJ_TRACE_IDLE && - !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) { + /* Only start a new trace if not recording or inside __gc call. */ + if (J->state == LJ_TRACE_IDLE && !(J2G(J)->hookmask & HOOK_GC)) { J->parent = 0; /* Root trace. */ J->exitno = 0; J->state = LJ_TRACE_START; @@ -745,8 +732,11 @@ void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc) /* Check for a hot side exit. If yes, start recording a side trace. */ static void trace_hotside(jit_State *J, const BCIns *pc) { + if (hotcount_decay(J)) + /* Check for hotcount decay, do nothing if hotcounts have decayed. */ + return; SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno]; - if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) && + if (!(J2G(J)->hookmask & HOOK_GC) && isluafunc(curr_func(J->L)) && snap->count != SNAPCOUNT_DONE && ++snap->count >= J->param[JIT_P_hotexit]) { @@ -758,11 +748,10 @@ static void trace_hotside(jit_State *J, const BCIns *pc) } /* Stitch a new trace to the previous trace. */ -void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc) +void lj_trace_stitch(jit_State *J, const BCIns *pc) { - /* Only start a new trace if not recording or inside __gc call or vmevent. */ - if (J->state == LJ_TRACE_IDLE && - !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) { + /* Only start a new trace if not recording or inside __gc call. */ + if (J->state == LJ_TRACE_IDLE && !(J2G(J)->hookmask & HOOK_GC)) { J->parent = 0; /* Have to treat it like a root trace. */ /* J->exitno is set to the invoking trace. */ J->state = LJ_TRACE_START; @@ -788,59 +777,17 @@ static TValue *trace_exit_cp(lua_State *L, lua_CFunction dummy, void *ud) return NULL; } -#ifndef LUAJIT_DISABLE_VMEVENT -/* Push all registers from exit state. */ -static void trace_exit_regs(lua_State *L, ExitState *ex) -{ - int32_t i; - setintV(L->top++, RID_NUM_GPR); - setintV(L->top++, RID_NUM_FPR); - for (i = 0; i < RID_NUM_GPR; i++) { - if (sizeof(ex->gpr[i]) == sizeof(int32_t)) - setintV(L->top++, (int32_t)ex->gpr[i]); - else - setnumV(L->top++, (lua_Number)ex->gpr[i]); - } -#if !LJ_SOFTFP - for (i = 0; i < RID_NUM_FPR; i++) { - setnumV(L->top, ex->fpr[i]); - if (LJ_UNLIKELY(tvisnan(L->top))) - setnanV(L->top); - L->top++; - } -#endif -} -#endif - -#ifdef EXITSTATE_PCREG -/* Determine trace number from pc of exit instruction. */ -static TraceNo trace_exit_find(jit_State *J, MCode *pc) -{ - TraceNo traceno; - for (traceno = 1; traceno < J->sizetrace; traceno++) { - GCtrace *T = traceref(J, traceno); - if (T && pc >= T->mcode && pc < (MCode *)((char *)T->mcode + T->szmcode)) - return traceno; - } - lua_assert(0); - return 0; -} -#endif /* A trace exited. Restore interpreter state. */ -int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) +int lj_trace_exit(jit_State *J, void *exptr) { ERRNO_SAVE lua_State *L = J->L; - ExitState *ex = (ExitState *)exptr; ExitDataCP exd; int errcode; const BCIns *pc; void *cf; GCtrace *T; -#ifdef EXITSTATE_PCREG - J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]); -#endif T = traceref(J, J->parent); UNUSED(T); #ifdef EXITSTATE_CHECKEXIT if (J->exitno == T->nsnap) { /* Treat stack check like a parent exit. */ @@ -857,20 +804,10 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) if (errcode) return -errcode; /* Return negated error code. */ - if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE))) - lj_vmevent_send(L, TEXIT, - lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); - setintV(L->top++, J->parent); - setintV(L->top++, J->exitno); - trace_exit_regs(L, ex); - ); - pc = exd.pc; cf = cframe_raw(L->cframe); setcframe_pc(cf, pc); - if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) { - /* Just exit to interpreter. */ - } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { + if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { if (!(G(L)->hookmask & HOOK_GC)) lj_gc_step(L); /* Exited because of GC: drive GC forward. */ } else { @@ -894,7 +831,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) ERRNO_RESTORE switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: - return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) + LJ_FR2); + return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) - LJ_FR2); case BC_RETM: return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc)); case BC_TSETM: @@ -906,4 +843,3 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) } } -#endif diff --git a/src/lj_trace.h b/src/lj_trace.h index 22cae741f3..8a66dca8e5 100644 --- a/src/lj_trace.h +++ b/src/lj_trace.h @@ -8,7 +8,6 @@ #include "lj_obj.h" -#if LJ_HASJIT #include "lj_jit.h" #include "lj_dispatch.h" @@ -23,8 +22,8 @@ LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e); LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e); /* Trace management. */ -LJ_FUNC GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T); -LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T); +LJ_FUNC GCtrace * lj_trace_alloc(lua_State *L, GCtrace *T); +LJ_FUNC void lj_trace_free(global_State *g, GCtrace *T); LJ_FUNC void lj_trace_reenableproto(GCproto *pt); LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); LJ_FUNC void lj_trace_flush(jit_State *J, TraceNo traceno); @@ -34,22 +33,13 @@ LJ_FUNC void lj_trace_freestate(global_State *g); /* Event handling. */ LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); -LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); -LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc); -LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); +LJ_FUNCA void lj_trace_hot(jit_State *J, const BCIns *pc); +LJ_FUNCA void lj_trace_stitch(jit_State *J, const BCIns *pc); +LJ_FUNCA int lj_trace_exit(jit_State *J, void *exptr); /* Signal asynchronous abort of trace or end of trace. */ #define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE) #define lj_trace_end(J) (J->state = LJ_TRACE_END) -#else - -#define lj_trace_flushall(L) (UNUSED(L), 0) -#define lj_trace_initstate(g) UNUSED(g) -#define lj_trace_freestate(g) UNUSED(g) -#define lj_trace_abort(g) UNUSED(g) -#define lj_trace_end(J) UNUSED(J) - -#endif #endif diff --git a/src/lj_udata.c b/src/lj_udata.c index bd0321b834..6bc0015b92 100644 --- a/src/lj_udata.c +++ b/src/lj_udata.c @@ -27,7 +27,7 @@ GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env) return ud; } -void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud) +void lj_udata_free(global_State *g, GCudata *ud) { lj_mem_free(g, ud, sizeudata(ud)); } diff --git a/src/lj_udata.h b/src/lj_udata.h index f271a42d32..b802ef637d 100644 --- a/src/lj_udata.h +++ b/src/lj_udata.h @@ -9,6 +9,6 @@ #include "lj_obj.h" LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env); -LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud); +LJ_FUNC void lj_udata_free(global_State *g, GCudata *ud); #endif diff --git a/src/lj_vm.h b/src/lj_vm.h index 1cc7eed782..974f26dac6 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -15,29 +15,16 @@ typedef TValue *(*lua_CPFunction)(lua_State *L, lua_CFunction func, void *ud); LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp); LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); -LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode); -LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe); -#if LJ_ABI_WIN && LJ_TARGET_X86 -LJ_ASMF_NORET void LJ_FASTCALL lj_vm_rtlunwind(void *cframe, void *excptrec, - void *unwinder, int errcode); -#endif +LJ_ASMF_NORET void lj_vm_unwind_c(void *cframe, int errcode); +LJ_ASMF_NORET void lj_vm_unwind_ff(void *cframe); LJ_ASMF void lj_vm_unwind_c_eh(void); LJ_ASMF void lj_vm_unwind_ff_eh(void); -#if LJ_TARGET_X86ORX64 LJ_ASMF void lj_vm_unwind_rethrow(void); -#endif /* Miscellaneous functions. */ -#if LJ_TARGET_X86ORX64 LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]); -#endif -#if LJ_TARGET_PPC -void lj_vm_cachesync(void *start, void *end); -#endif LJ_ASMF double lj_vm_foldarith(double x, double y, int op); -#if LJ_HASJIT LJ_ASMF double lj_vm_foldfpm(double x, int op); -#endif #if !LJ_ARCH_HASFPU /* Declared in lj_obj.h: LJ_ASMF int32_t lj_vm_tobit(double x); */ #endif @@ -47,60 +34,34 @@ LJ_ASMF void lj_vm_record(void); LJ_ASMF void lj_vm_inshook(void); LJ_ASMF void lj_vm_rethook(void); LJ_ASMF void lj_vm_callhook(void); -LJ_ASMF void lj_vm_profhook(void); /* Trace exit handling. */ LJ_ASMF void lj_vm_exit_handler(void); LJ_ASMF void lj_vm_exit_interp(void); +LJ_ASMF void lj_vm_exit_interp_notrack(void); /* Internal math helper functions. */ -#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP) -#define lj_vm_floor floor -#define lj_vm_ceil ceil -#else LJ_ASMF double lj_vm_floor(double); LJ_ASMF double lj_vm_ceil(double); -#if LJ_TARGET_ARM -LJ_ASMF double lj_vm_floor_sf(double); -LJ_ASMF double lj_vm_ceil_sf(double); -#endif -#endif #ifdef LUAJIT_NO_LOG2 LJ_ASMF double lj_vm_log2(double); #else #define lj_vm_log2 log2 #endif -#if !(defined(_LJ_DISPATCH_H) && LJ_TARGET_MIPS) -LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); -#endif +LJ_ASMF int32_t lj_vm_modi(int32_t, int32_t); -#if LJ_HASJIT -#if LJ_TARGET_X86ORX64 LJ_ASMF void lj_vm_floor_sse(void); LJ_ASMF void lj_vm_ceil_sse(void); LJ_ASMF void lj_vm_trunc_sse(void); LJ_ASMF void lj_vm_powi_sse(void); #define lj_vm_powi NULL -#else -LJ_ASMF double lj_vm_powi(double, int32_t); -#endif -#if LJ_TARGET_PPC || LJ_TARGET_ARM64 -#define lj_vm_trunc trunc -#else LJ_ASMF double lj_vm_trunc(double); -#if LJ_TARGET_ARM -LJ_ASMF double lj_vm_trunc_sf(double); -#endif -#endif #ifdef LUAJIT_NO_EXP2 LJ_ASMF double lj_vm_exp2(double); #else #define lj_vm_exp2 exp2 #endif -#if LJ_HASFFI LJ_ASMF int lj_vm_errno(void); -#endif -#endif /* Continuations for metamethods. */ LJ_ASMF void lj_cont_cat(void); /* Continue with concatenation. */ diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c deleted file mode 100644 index 86640804be..0000000000 --- a/src/lj_vmevent.c +++ /dev/null @@ -1,58 +0,0 @@ -/* -** VM event handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#include - -#define lj_vmevent_c -#define LUA_CORE - -#include "lj_obj.h" -#include "lj_str.h" -#include "lj_tab.h" -#include "lj_state.h" -#include "lj_dispatch.h" -#include "lj_vm.h" -#include "lj_vmevent.h" - -ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev) -{ - global_State *g = G(L); - GCstr *s = lj_str_newlit(L, LJ_VMEVENTS_REGKEY); - cTValue *tv = lj_tab_getstr(tabV(registry(L)), s); - if (tvistab(tv)) { - int hash = VMEVENT_HASH(ev); - tv = lj_tab_getint(tabV(tv), hash); - if (tv && tvisfunc(tv)) { - lj_state_checkstack(L, LUA_MINSTACK); - setfuncV(L, L->top++, funcV(tv)); - if (LJ_FR2) setnilV(L->top++); - return savestack(L, L->top); - } - } - g->vmevmask &= ~VMEVENT_MASK(ev); /* No handler: cache this fact. */ - return 0; -} - -void lj_vmevent_call(lua_State *L, ptrdiff_t argbase) -{ - global_State *g = G(L); - uint8_t oldmask = g->vmevmask; - uint8_t oldh = hook_save(g); - int status; - g->vmevmask = 0; /* Disable all events. */ - hook_vmevent(g); - status = lj_vm_pcall(L, restorestack(L, argbase), 0+1, 0); - if (LJ_UNLIKELY(status)) { - /* Really shouldn't use stderr here, but where else to complain? */ - L->top--; - fputs("VM handler failed: ", stderr); - fputs(tvisstr(L->top) ? strVdata(L->top) : "?", stderr); - fputc('\n', stderr); - } - hook_restore(g, oldh); - if (g->vmevmask != VMEVENT_NOCACHE) - g->vmevmask = oldmask; /* Restore event mask, but not if not modified. */ -} - diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h deleted file mode 100644 index 050fb4dd24..0000000000 --- a/src/lj_vmevent.h +++ /dev/null @@ -1,59 +0,0 @@ -/* -** VM event handling. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -#ifndef _LJ_VMEVENT_H -#define _LJ_VMEVENT_H - -#include "lj_obj.h" - -/* Registry key for VM event handler table. */ -#define LJ_VMEVENTS_REGKEY "_VMEVENTS" -#define LJ_VMEVENTS_HSIZE 4 - -#define VMEVENT_MASK(ev) ((uint8_t)1 << ((int)(ev) & 7)) -#define VMEVENT_HASH(ev) ((int)(ev) & ~7) -#define VMEVENT_HASHIDX(h) ((int)(h) << 3) -#define VMEVENT_NOCACHE 255 - -#define VMEVENT_DEF(name, hash) \ - LJ_VMEVENT_##name##_, \ - LJ_VMEVENT_##name = ((LJ_VMEVENT_##name##_) & 7)|((hash) << 3) - -/* VM event IDs. */ -typedef enum { - VMEVENT_DEF(BC, 0x00003883), - VMEVENT_DEF(TRACE, 0xb2d91467), - VMEVENT_DEF(RECORD, 0x9284bf4f), - VMEVENT_DEF(TEXIT, 0xb29df2b0), - LJ_VMEVENT__MAX -} VMEvent; - -#ifdef LUAJIT_DISABLE_VMEVENT -#define lj_vmevent_send(L, ev, args) UNUSED(L) -#define lj_vmevent_send_(L, ev, args, post) UNUSED(L) -#else -#define lj_vmevent_send(L, ev, args) \ - if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ - ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \ - if (argbase) { \ - args \ - lj_vmevent_call(L, argbase); \ - } \ - } -#define lj_vmevent_send_(L, ev, args, post) \ - if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ - ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \ - if (argbase) { \ - args \ - lj_vmevent_call(L, argbase); \ - post \ - } \ - } - -LJ_FUNC ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev); -LJ_FUNC void lj_vmevent_call(lua_State *L, ptrdiff_t argbase); -#endif - -#endif diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index b231d3e811..ecd56f21df 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -15,24 +15,6 @@ /* -- Wrapper functions --------------------------------------------------- */ -#if LJ_TARGET_X86 && __ELF__ && __PIC__ -/* Wrapper functions to deal with the ELF/x86 PIC disaster. */ -LJ_FUNCA double lj_wrap_log(double x) { return log(x); } -LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); } -LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); } -LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); } -LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); } -LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); } -LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); } -LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); } -LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); } -LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); } -LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } -LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } -LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } -LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); } -LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } -#endif /* -- Helper functions for generated machine code ------------------------- */ @@ -47,18 +29,15 @@ double lj_vm_foldarith(double x, double y, int op) case IR_POW - IR_ADD: return pow(x, y); break; case IR_NEG - IR_ADD: return -x; break; case IR_ABS - IR_ADD: return fabs(x); break; -#if LJ_HASJIT case IR_ATAN2 - IR_ADD: return atan2(x, y); break; case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; case IR_MIN - IR_ADD: return x > y ? y : x; break; case IR_MAX - IR_ADD: return x < y ? y : x; break; -#endif default: return x; } } -#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS -int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) +int32_t lj_vm_modi(int32_t a, int32_t b) { uint32_t y, ua, ub; lua_assert(b != 0); /* This must be checked before using this function. */ @@ -69,9 +48,7 @@ int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y; return (int32_t)y; } -#endif -#if LJ_HASJIT #ifdef LUAJIT_NO_LOG2 double lj_vm_log2(double a) @@ -87,39 +64,6 @@ double lj_vm_exp2(double a) } #endif -#if !LJ_TARGET_X86ORX64 -/* Unsigned x^k. */ -static double lj_vm_powui(double x, uint32_t k) -{ - double y; - lua_assert(k != 0); - for (; (k & 1) == 0; k >>= 1) x *= x; - y = x; - if ((k >>= 1) != 0) { - for (;;) { - x *= x; - if (k == 1) break; - if (k & 1) y *= x; - k >>= 1; - } - y *= x; - } - return y; -} - -/* Signed x^k. */ -double lj_vm_powi(double x, int32_t k) -{ - if (k > 1) - return lj_vm_powui(x, (uint32_t)k); - else if (k == 1) - return x; - else if (k == 0) - return 1.0; - else - return 1.0 / lj_vm_powui(x, (uint32_t)-k); -} -#endif /* Computes fpm(x) for extended math functions. */ double lj_vm_foldfpm(double x, int fpm) @@ -142,11 +86,8 @@ double lj_vm_foldfpm(double x, int fpm) return 0; } -#if LJ_HASFFI int lj_vm_errno(void) { return errno; } -#endif -#endif diff --git a/src/lj_vmprofile.c b/src/lj_vmprofile.c new file mode 100644 index 0000000000..1e035bc6f5 --- /dev/null +++ b/src/lj_vmprofile.c @@ -0,0 +1,193 @@ +/* +** VM profiling. +** Copyright (C) 2016 Luke Gorrie. See Copyright Notice in luajit.h +*/ + +#define lj_vmprofile_c +#define LUA_CORE + +#define _GNU_SOURCE 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#undef _GNU_SOURCE + +#include "lj_err.h" +#include "lj_obj.h" +#include "lj_dispatch.h" +#include "lj_jit.h" +#include "lj_trace.h" +#include "lj_vmprofile.h" + +static struct { + global_State *g; + struct sigaction oldsa; +} state; + +static int started; + +static VMProfile *profile; /* Current counters */ + +/* -- Signal handler ------------------------------------------------------ */ + +/* Signal handler: bumps one counter. */ +static void vmprofile_signal(int sig, siginfo_t *si, void *data) +{ + if (profile != NULL) { + int vmstate, trace; /* sample matrix indices */ + lua_State *L = gco2th(gcref(state.g->cur_L)); + /* + * The basic job of this function is to select the right indices + * into the profile counter matrix. That requires deciding which + * logical state the VM is in and which trace the sample should be + * attributed to. Heuristics are needed to pick appropriate values. + */ + if (state.g->vmstate > 0) { /* Running JIT mcode. */ + GCtrace *T = traceref(L2J(L), (TraceNo)state.g->vmstate); + intptr_t ip = (intptr_t)((ucontext_t*)data)->uc_mcontext.gregs[REG_RIP]; + ptrdiff_t mcposition = ip - (intptr_t)T->mcode; + if ((mcposition < 0) || (mcposition >= T->szmcode)) { + vmstate = LJ_VMST_FFI; /* IP is outside the trace mcode. */ + } else if ((T->mcloop != 0) && (mcposition >= T->mcloop)) { + vmstate = LJ_VMST_LOOP; /* IP is inside the mcode loop. */ + } else { + vmstate = LJ_VMST_HEAD; /* IP is inside mcode but not loop. */ + } + trace = state.g->vmstate; + } else { /* Running VM code (not JIT mcode.) */ + if (~state.g->vmstate == LJ_VMST_GC && state.g->gcvmstate > 0) { + /* Special case: GC invoked from JIT mcode. */ + vmstate = LJ_VMST_JGC; + trace = state.g->gcvmstate; + } else { + /* General case: count towards most recently exited trace. */ + vmstate = ~state.g->vmstate; + trace = state.g->lasttrace; + } + } + /* Handle overflow from individual trace counters. */ + trace = trace <= LJ_VMPROFILE_TRACE_MAX ? trace : 0; + /* Phew! We have calculated the indices and now we can bump the counter. */ + lua_assert(vmstate >= 0 && vmstate <= LJ_VMST__MAX); + lua_assert(trace >= 0 && trace <= LJ_VMPROFILE_TRACE_MAX); + profile->count[trace][vmstate]++; + } +} + +static void start_timer(int interval) +{ + struct itimerval tm; + struct sigaction sa; + tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000; + tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000; + setitimer(ITIMER_VIRTUAL, &tm, NULL); + sa.sa_flags = SA_SIGINFO | SA_RESTART; + sa.sa_sigaction = vmprofile_signal; + sigemptyset(&sa.sa_mask); + sigaction(SIGVTALRM, &sa, &state.oldsa); +} + +static void stop_timer() +{ + struct itimerval tm; + tm.it_value.tv_sec = tm.it_interval.tv_sec = 0; + tm.it_value.tv_usec = tm.it_interval.tv_usec = 0; + setitimer(ITIMER_VIRTUAL, &tm, NULL); + sigaction(SIGVTALRM, NULL, &state.oldsa); +} + +/* -- State that the application can manage via FFI ----------------------- */ + +/* How much memory to allocate for profiler counters. */ +int vmprofile_get_profile_size() { + return sizeof(VMProfile); +} + +/* Open a counter file on disk and returned the mmapped data structure. */ +void *vmprofile_open_file(const char *filename) +{ + int fd; + void *ptr = MAP_FAILED; + if (((fd = open(filename, O_RDWR|O_CREAT, 0666)) != -1) && + ((ftruncate(fd, sizeof(VMProfile))) != -1) && + ((ptr = mmap(NULL, sizeof(VMProfile), PROT_READ|PROT_WRITE, + MAP_SHARED, fd, 0)) != MAP_FAILED)) { + memset(ptr, 0, sizeof(VMProfile)); + } + if (fd != -1) close(fd); + return ptr == MAP_FAILED ? NULL : ptr; +} + +/* Set the memory where the next samples will be counted. + Size of the memory must match vmprofile_get_profile_size(). */ +void vmprofile_set_profile(void *counters) { + profile = (VMProfile*)counters; + profile->magic = 0x1d50f007; + profile->major = 4; + profile->minor = 0; +} + +void vmprofile_start(lua_State *L) +{ + if (!started) { + memset(&state, 0, sizeof(state)); + state.g = G(L); + start_timer(1); /* Sample every 1ms */ + started = 1; + } +} + +void vmprofile_stop() +{ + stop_timer(); + started = 0; +} + +/* -- Lua API ------------------------------------------------------------- */ + +LUA_API int luaJIT_vmprofile_open(lua_State *L, const char *str, int noselect, int nostart) +{ + void *ptr; + if ((ptr = vmprofile_open_file(str)) != NULL) { + setlightudV(L->base, checklightudptr(L, ptr)); + if (!noselect) vmprofile_set_profile(ptr); + if (!nostart) vmprofile_start(L); + } else { + setnilV(L->base); + } + return 1; +} + +LUA_API int luaJIT_vmprofile_close(lua_State *L, void *ud) +{ + munmap(ud, sizeof(VMProfile)); + return 0; +} + +LUA_API int luaJIT_vmprofile_select(lua_State *L, void *ud) +{ + setlightudV(L->base, checklightudptr(L, profile)); + vmprofile_set_profile(ud); + return 1; +} + +LUA_API int luaJIT_vmprofile_start(lua_State *L) +{ + vmprofile_start(L); + return 0; +} + +LUA_API int luaJIT_vmprofile_stop(lua_State *L) +{ + vmprofile_stop(); + return 0; +} + diff --git a/src/lj_vmprofile.h b/src/lj_vmprofile.h new file mode 100644 index 0000000000..c9d023a072 --- /dev/null +++ b/src/lj_vmprofile.h @@ -0,0 +1,40 @@ +/* +** Virtual machine profiling. +** Copyright (C) 2017 Luke Gorrie. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_VMPROFILE_H +#define _LJ_VMPROFILE_H + +#include +#include "lj_obj.h" + +/* Counters are 64-bit to avoid overflow even in long running processes. */ +typedef uint64_t VMProfileCount; + +/* Maximum trace number for distinct counter buckets. Traces with + higher numbers will be counted together in a shared overflow bucket. */ +#define LJ_VMPROFILE_TRACE_MAX 4096 + +/* Complete set of counters for VM and traces. */ +typedef struct VMProfile { + uint32_t magic; /* 0x1d50f007 */ + uint16_t major, minor; /* 4, 0 */ + /* Profile counters are stored in a 2D matrix of count[trace][state]. + ** + ** The profiler attempts to attribute each sample to one vmstate and + ** one trace. The vmstate is an LJ_VMST_* constant. The trace is + ** either 1..4096 (counter for one individual trace) or 0 (shared + ** counter for all higher-numbered traces and for samples that can't + ** be attributed to a specific trace at all.) + **/ + VMProfileCount count[LJ_VMPROFILE_TRACE_MAX+1][LJ_VMST__MAX]; +} VMProfile; + +/* Functions that should be accessed via FFI. */ + +void *vmprofile_open_file(const char *filename); +void vmprofile_set_profile(void *counters); +int vmprofile_get_profile_size(); + +#endif diff --git a/src/ljamalg.c b/src/ljamalg.c deleted file mode 100644 index f1f28623fa..0000000000 --- a/src/ljamalg.c +++ /dev/null @@ -1,97 +0,0 @@ -/* -** LuaJIT core and libraries amalgamation. -** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -*/ - -/* -+--------------------------------------------------------------------------+ -| WARNING: Compiling the amalgamation needs a lot of virtual memory | -| (around 300 MB with GCC 4.x)! If you don't have enough physical memory | -| your machine will start swapping to disk and the compile will not finish | -| within a reasonable amount of time. | -| So either compile on a bigger machine or use the non-amalgamated build. | -+--------------------------------------------------------------------------+ -*/ - -#define ljamalg_c -#define LUA_CORE - -/* To get the mremap prototype. Must be defined before any system includes. */ -#if defined(__linux__) && !defined(_GNU_SOURCE) -#define _GNU_SOURCE -#endif - -#ifndef WINVER -#define WINVER 0x0501 -#endif - -#include "lua.h" -#include "lauxlib.h" - -#include "lj_gc.c" -#include "lj_err.c" -#include "lj_char.c" -#include "lj_bc.c" -#include "lj_obj.c" -#include "lj_buf.c" -#include "lj_str.c" -#include "lj_tab.c" -#include "lj_func.c" -#include "lj_udata.c" -#include "lj_meta.c" -#include "lj_debug.c" -#include "lj_state.c" -#include "lj_dispatch.c" -#include "lj_vmevent.c" -#include "lj_vmmath.c" -#include "lj_strscan.c" -#include "lj_strfmt.c" -#include "lj_strfmt_num.c" -#include "lj_api.c" -#include "lj_profile.c" -#include "lj_lex.c" -#include "lj_parse.c" -#include "lj_bcread.c" -#include "lj_bcwrite.c" -#include "lj_load.c" -#include "lj_ctype.c" -#include "lj_cdata.c" -#include "lj_cconv.c" -#include "lj_ccall.c" -#include "lj_ccallback.c" -#include "lj_carith.c" -#include "lj_clib.c" -#include "lj_cparse.c" -#include "lj_lib.c" -#include "lj_ir.c" -#include "lj_opt_mem.c" -#include "lj_opt_fold.c" -#include "lj_opt_narrow.c" -#include "lj_opt_dce.c" -#include "lj_opt_loop.c" -#include "lj_opt_split.c" -#include "lj_opt_sink.c" -#include "lj_mcode.c" -#include "lj_snap.c" -#include "lj_record.c" -#include "lj_crecord.c" -#include "lj_ffrecord.c" -#include "lj_asm.c" -#include "lj_trace.c" -#include "lj_gdbjit.c" -#include "lj_alloc.c" - -#include "lib_aux.c" -#include "lib_base.c" -#include "lib_math.c" -#include "lib_string.c" -#include "lib_table.c" -#include "lib_io.c" -#include "lib_os.c" -#include "lib_package.c" -#include "lib_debug.c" -#include "lib_bit.c" -#include "lib_jit.c" -#include "lib_ffi.c" -#include "lib_init.c" - diff --git a/src/lua.h b/src/lua.h index 352d29f3cd..850bd796ca 100644 --- a/src/lua.h +++ b/src/lua.h @@ -39,7 +39,8 @@ #define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i)) -/* thread status; 0 is OK */ +/* thread status */ +#define LUA_OK 0 #define LUA_YIELD 1 #define LUA_ERRRUN 2 #define LUA_ERRSYNTAX 3 @@ -347,6 +348,13 @@ LUA_API void *lua_upvalueid (lua_State *L, int idx, int n); LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2); LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt, const char *chunkname, const char *mode); +LUA_API const lua_Number *lua_version (lua_State *L); +LUA_API void lua_copy (lua_State *L, int fromidx, int toidx); +LUA_API lua_Number lua_tonumberx (lua_State *L, int idx, int *isnum); +LUA_API lua_Integer lua_tointegerx (lua_State *L, int idx, int *isnum); + +/* From Lua 5.3. */ +LUA_API int lua_isyieldable (lua_State *L); struct lua_Debug { diff --git a/src/luaconf.h b/src/luaconf.h index 87b052dba0..e43396bc01 100644 --- a/src/luaconf.h +++ b/src/luaconf.h @@ -37,7 +37,7 @@ #endif #define LUA_LROOT "/usr/local" #define LUA_LUADIR "/lua/5.1/" -#define LUA_LJDIR "/luajit-2.1.0-beta2/" +#define LUA_LJDIR "/raptorjit-1.0.0/" #ifdef LUA_ROOT #define LUA_JROOT LUA_ROOT @@ -79,7 +79,7 @@ #define LUA_IGMARK "-" #define LUA_PATH_CONFIG \ LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \ - LUA_EXECDIR "\n" LUA_IGMARK + LUA_EXECDIR "\n" LUA_IGMARK "\n" /* Quoting in error messages. */ #define LUA_QL(x) "'" x "'" @@ -92,10 +92,6 @@ #define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */ #define LUA_MAXCAPTURES 32 /* Max. pattern captures. */ -/* Compatibility with older library function names. */ -#define LUA_COMPAT_MOD /* OLD: math.mod, NEW: math.fmod */ -#define LUA_COMPAT_GFIND /* OLD: string.gfind, NEW: string.gmatch */ - /* Configuration for the frontend (the luajit executable). */ #if defined(luajit_c) #define LUA_PROGNAME "luajit" /* Fallback frontend name. */ diff --git a/src/luajit.c b/src/luajit.c index 8c8cf9e6aa..dff189fa8e 100644 --- a/src/luajit.c +++ b/src/luajit.c @@ -16,31 +16,19 @@ #include "lauxlib.h" #include "lualib.h" #include "luajit.h" +#include "lj_vmprofile.h" #include "lj_arch.h" +#include "lj_auditlog.h" -#if LJ_TARGET_POSIX #include #define lua_stdin_is_tty() isatty(0) -#elif LJ_TARGET_WINDOWS -#include -#ifdef __BORLANDC__ -#define lua_stdin_is_tty() isatty(_fileno(stdin)) -#else -#define lua_stdin_is_tty() _isatty(_fileno(stdin)) -#endif -#else -#define lua_stdin_is_tty() 1 -#endif - -#if !LJ_TARGET_CONSOLE + #include -#endif static lua_State *globalL = NULL; static const char *progname = LUA_PROGNAME; -#if !LJ_TARGET_CONSOLE static void lstop(lua_State *L, lua_Debug *ar) { (void)ar; /* unused arg. */ @@ -57,7 +45,6 @@ static void laction(int i) terminate process (default action) */ lua_sethook(globalL, lstop, LUA_MASKCALL | LUA_MASKRET | LUA_MASKCOUNT, 1); } -#endif static void print_usage(void) { @@ -68,11 +55,13 @@ static void print_usage(void) " -e chunk Execute string " LUA_QL("chunk") ".\n" " -l name Require library " LUA_QL("name") ".\n" " -b ... Save or list bytecode.\n" - " -j cmd Perform LuaJIT control command.\n" - " -O[opt] Control LuaJIT optimizations.\n" + " -j cmd Perform RaptorJIT control command.\n" + " -O[opt] Control RaptorJIT optimizations.\n" " -i Enter interactive mode after executing " LUA_QL("script") ".\n" + " -p file Enable trace profiling to a VMProfile file.\n" " -v Show version information.\n" " -E Ignore environment variables.\n" + " -a path Enable auditlog at path.\n" " -- Stop handling options.\n" " - Execute stdin and stop handling options.\n", stderr); fflush(stderr); @@ -115,22 +104,18 @@ static int docall(lua_State *L, int narg, int clear) int base = lua_gettop(L) - narg; /* function index */ lua_pushcfunction(L, traceback); /* push traceback function */ lua_insert(L, base); /* put it under chunk and args */ -#if !LJ_TARGET_CONSOLE signal(SIGINT, laction); -#endif status = lua_pcall(L, narg, (clear ? 0 : LUA_MULTRET), base); -#if !LJ_TARGET_CONSOLE signal(SIGINT, SIG_DFL); -#endif lua_remove(L, base); /* remove traceback function */ /* force a complete garbage collection in case of errors */ - if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0); + if (status != LUA_OK) lua_gc(L, LUA_GCCOLLECT, 0); return status; } static void print_version(void) { - fputs(LUAJIT_VERSION " -- " LUAJIT_COPYRIGHT ". " LUAJIT_URL "\n", stdout); + fputs(LUAJIT_VERSION " -- " LUAJIT_URL "\n", stdout); } static void print_jit_status(lua_State *L) @@ -249,9 +234,9 @@ static void dotty(lua_State *L) const char *oldprogname = progname; progname = NULL; while ((status = loadline(L)) != -1) { - if (status == 0) status = docall(L, 0, 0); + if (status == LUA_OK) status = docall(L, 0, 0); report(L, status); - if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */ + if (status == LUA_OK && lua_gettop(L) > 0) { /* any result to print? */ lua_getglobal(L, "print"); lua_insert(L, 1); if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) @@ -273,7 +258,7 @@ static int handle_script(lua_State *L, char **argx) if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0) fname = NULL; /* stdin */ status = luaL_loadfile(L, fname); - if (status == 0) { + if (status == LUA_OK) { /* Fetch args from arg table. LUA_INIT or -e might have changed them. */ int narg = 0; lua_getglobal(L, "arg"); @@ -421,8 +406,10 @@ static int collectargs(char **argv, int *flags) break; case 'e': *flags |= FLAGS_EXEC; + case 'a': /* RaptorJIT extension */ case 'j': /* LuaJIT extension */ case 'l': + case 'p': /* RaptorJIT extension */ *flags |= FLAGS_OPTION; if (argv[i][2] == '\0') { i++; @@ -480,21 +467,37 @@ static int runargs(lua_State *L, char **argv, int argn) break; case 'b': /* LuaJIT extension. */ return dobytecode(L, argv+i); + case 'a': { /* RaptorJIT extension. */ + const char *filename = argv[i] + 2; + if (*filename == '\0') filename = argv[++i]; + /* XXX Support auditlog file size limit argument. */ + if (!lj_auditlog_open(filename, 0)) { + fprintf(stderr, "unable to open auditlog\n"); + fflush(stderr); + } + break; + } + case 'p': { + const char *filename = argv[i] + 2; + if (*filename == '\0') filename = argv[++i]; + luaJIT_vmprofile_open(L, filename, 0, 0); + if (lua_isnil(L, -1)) { + fprintf(stderr, "unable to open vmprofile: %s\n", filename); + fflush(stderr); + } + break; + } default: break; } } - return 0; + return LUA_OK; } static int handle_luainit(lua_State *L) { -#if LJ_TARGET_CONSOLE - const char *init = NULL; -#else const char *init = getenv(LUA_INIT); -#endif if (init == NULL) - return 0; /* status OK */ + return LUA_OK; else if (init[0] == '@') return dofile(L, init+1); else @@ -539,17 +542,17 @@ static int pmain(lua_State *L) if (!(flags & FLAGS_NOENV)) { s->status = handle_luainit(L); - if (s->status != 0) return 0; + if (s->status != LUA_OK) return 0; } if ((flags & FLAGS_VERSION)) print_version(); s->status = runargs(L, argv, argn); - if (s->status != 0) return 0; + if (s->status != LUA_OK) return 0; if (s->argc > argn) { s->status = handle_script(L, argv + argn); - if (s->status != 0) return 0; + if (s->status != LUA_OK) return 0; } if ((flags & FLAGS_INTERACTIVE)) { diff --git a/src/luajit.h b/src/luajit.h index c1c801c9a0..44aa92ea88 100644 --- a/src/luajit.h +++ b/src/luajit.h @@ -30,11 +30,11 @@ #include "lua.h" -#define LUAJIT_VERSION "LuaJIT 2.1.0-beta2" -#define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */ -#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta2 -#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2017 Mike Pall" -#define LUAJIT_URL "http://luajit.org/" +#define LUAJIT_VERSION "RaptorJIT 1.0.0" +#define LUAJIT_VERSION_NUM 10000 /* Version 1.0.0 = 01.00.00. */ +#define LUAJIT_VERSION_SYM luaJIT_version_1_0_0 + +#define LUAJIT_URL "http://github.com/raptorjit/raptorjit" /* Modes for luaJIT_setmode. */ #define LUAJIT_MODE_MASK 0x00ff @@ -64,14 +64,12 @@ enum { /* Control the JIT engine. */ LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); -/* Low-overhead profiling API. */ -typedef void (*luaJIT_profile_callback)(void *data, lua_State *L, - int samples, int vmstate); -LUA_API void luaJIT_profile_start(lua_State *L, const char *mode, - luaJIT_profile_callback cb, void *data); -LUA_API void luaJIT_profile_stop(lua_State *L); -LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, - int depth, size_t *len); +/* VM profiling API. */ +LUA_API int luaJIT_vmprofile_start(lua_State *L); +LUA_API int luaJIT_vmprofile_open(lua_State *L, const char *str, int noselect, int nostart); +LUA_API int luaJIT_vmprofile_select(lua_State *L, void *ud); +LUA_API int luaJIT_vmprofile_close(lua_State *L, void *ud); +LUA_API int luaJIT_vmprofile_stop(lua_State *L); /* Enforce (dynamic) linker error for version mismatches. Call from main. */ LUA_API void LUAJIT_VERSION_SYM(void); diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat deleted file mode 100644 index 5648a89922..0000000000 --- a/src/msvcbuild.bat +++ /dev/null @@ -1,122 +0,0 @@ -@rem Script to build LuaJIT with MSVC. -@rem Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -@rem -@rem Either open a "Visual Studio .NET Command Prompt" -@rem (Note that the Express Edition does not contain an x64 compiler) -@rem -or- -@rem Open a "Windows SDK Command Shell" and set the compiler environment: -@rem setenv /release /x86 -@rem -or- -@rem setenv /release /x64 -@rem -@rem Then cd to this directory and run this script. - -@if not defined INCLUDE goto :FAIL - -@setlocal -@set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE -@set LJLINK=link /nologo -@set LJMT=mt /nologo -@set LJLIB=lib /nologo /nodefaultlib -@set DASMDIR=..\dynasm -@set DASM=%DASMDIR%\dynasm.lua -@set DASC=vm_x86.dasc -@set LJDLLNAME=lua51.dll -@set LJLIBNAME=lua51.lib -@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c - -%LJCOMPILE% host\minilua.c -@if errorlevel 1 goto :BAD -%LJLINK% /out:minilua.exe minilua.obj -@if errorlevel 1 goto :BAD -if exist minilua.exe.manifest^ - %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe - -@set DASMFLAGS=-D WIN -D JIT -D FFI -D P64 -@set LJARCH=x64 -@minilua -@if errorlevel 8 goto :X64 -@set DASMFLAGS=-D WIN -D JIT -D FFI -@set LJARCH=x86 -@set LJCOMPILE=%LJCOMPILE% /arch:SSE2 -:X64 -@if "%1" neq "gc64" goto :NOGC64 -@shift -@set DASC=vm_x64.dasc -@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_ENABLE_GC64 -:NOGC64 -minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% -@if errorlevel 1 goto :BAD - -%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c -@if errorlevel 1 goto :BAD -%LJLINK% /out:buildvm.exe buildvm*.obj -@if errorlevel 1 goto :BAD -if exist buildvm.exe.manifest^ - %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe - -buildvm -m peobj -o lj_vm.obj -@if errorlevel 1 goto :BAD -buildvm -m bcdef -o lj_bcdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m ffdef -o lj_ffdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m libdef -o lj_libdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m recdef -o lj_recdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m folddef -o lj_folddef.h lj_opt_fold.c -@if errorlevel 1 goto :BAD - -@if "%1" neq "debug" goto :NODEBUG -@shift -@set LJCOMPILE=%LJCOMPILE% /Zi -@set LJLINK=%LJLINK% /debug /opt:ref /opt:icf /incremental:no -:NODEBUG -@if "%1"=="amalg" goto :AMALGDLL -@if "%1"=="static" goto :STATIC -%LJCOMPILE% /MD /DLUA_BUILD_AS_DLL lj_*.c lib_*.c -@if errorlevel 1 goto :BAD -%LJLINK% /DLL /out:%LJDLLNAME% lj_*.obj lib_*.obj -@if errorlevel 1 goto :BAD -@goto :MTDLL -:STATIC -%LJCOMPILE% lj_*.c lib_*.c -@if errorlevel 1 goto :BAD -%LJLIB% /OUT:%LJLIBNAME% lj_*.obj lib_*.obj -@if errorlevel 1 goto :BAD -@goto :MTDLL -:AMALGDLL -%LJCOMPILE% /MD /DLUA_BUILD_AS_DLL ljamalg.c -@if errorlevel 1 goto :BAD -%LJLINK% /DLL /out:%LJDLLNAME% ljamalg.obj lj_vm.obj -@if errorlevel 1 goto :BAD -:MTDLL -if exist %LJDLLNAME%.manifest^ - %LJMT% -manifest %LJDLLNAME%.manifest -outputresource:%LJDLLNAME%;2 - -%LJCOMPILE% luajit.c -@if errorlevel 1 goto :BAD -%LJLINK% /out:luajit.exe luajit.obj %LJLIBNAME% -@if errorlevel 1 goto :BAD -if exist luajit.exe.manifest^ - %LJMT% -manifest luajit.exe.manifest -outputresource:luajit.exe - -@del *.obj *.manifest minilua.exe buildvm.exe -@del host\buildvm_arch.h -@del lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h -@echo. -@echo === Successfully built LuaJIT for Windows/%LJARCH% === - -@goto :END -:BAD -@echo. -@echo ******************************************************* -@echo *** Build FAILED -- Please check the error messages *** -@echo ******************************************************* -@goto :END -:FAIL -@echo You must open a "Visual Studio .NET Command Prompt" to run this script -:END diff --git a/src/ps4build.bat b/src/ps4build.bat deleted file mode 100644 index e4a7defe7a..0000000000 --- a/src/ps4build.bat +++ /dev/null @@ -1,123 +0,0 @@ -@rem Script to build LuaJIT with the PS4 SDK. -@rem Donated to the public domain. -@rem -@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler) -@rem or "VS2015 x64 Native Tools Command Prompt". -@rem -@rem Then cd to this directory and run this script. -@rem -@rem Recommended invocation: -@rem -@rem ps4build release build, amalgamated, 64-bit GC -@rem ps4build debug debug build, amalgamated, 64-bit GC -@rem -@rem Additional command-line options (not generally recommended): -@rem -@rem gc32 (before debug) 32-bit GC -@rem noamalg (after debug) non-amalgamated build - -@if not defined INCLUDE goto :FAIL -@if not defined SCE_ORBIS_SDK_DIR goto :FAIL - -@setlocal -@rem ---- Host compiler ---- -@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE -@set LJLINK=link /nologo -@set LJMT=mt /nologo -@set DASMDIR=..\dynasm -@set DASM=%DASMDIR%\dynasm.lua -@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c -@set GC64=-DLUAJIT_ENABLE_GC64 -@set DASC=vm_x64.dasc - -@if "%1" neq "gc32" goto :NOGC32 -@shift -@set GC64= -@set DASC=vm_x86.dasc -:NOGC32 - -%LJCOMPILE% host\minilua.c -@if errorlevel 1 goto :BAD -%LJLINK% /out:minilua.exe minilua.obj -@if errorlevel 1 goto :BAD -if exist minilua.exe.manifest^ - %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe - -@rem Check for 64 bit host compiler. -@minilua -@if not errorlevel 8 goto :FAIL - -@set DASMFLAGS=-D P64 -D NO_UNWIND -minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% -@if errorlevel 1 goto :BAD - -%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c -@if errorlevel 1 goto :BAD -%LJLINK% /out:buildvm.exe buildvm*.obj -@if errorlevel 1 goto :BAD -if exist buildvm.exe.manifest^ - %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe - -buildvm -m elfasm -o lj_vm.s -@if errorlevel 1 goto :BAD -buildvm -m bcdef -o lj_bcdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m ffdef -o lj_ffdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m libdef -o lj_libdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m recdef -o lj_recdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m folddef -o lj_folddef.h lj_opt_fold.c -@if errorlevel 1 goto :BAD - -@rem ---- Cross compiler ---- -@set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI %GC64% -@set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus -@set INCLUDE="" - -orbis-as -o lj_vm.o lj_vm.s - -@if "%1" neq "debug" goto :NODEBUG -@shift -@set LJCOMPILE=%LJCOMPILE% -g -O0 -@set TARGETLIB=libluajitD_ps4.a -goto :BUILD -:NODEBUG -@set LJCOMPILE=%LJCOMPILE% -O2 -@set TARGETLIB=libluajit_ps4.a -:BUILD -del %TARGETLIB% -@if "%1" neq "noamalg" goto :AMALG -for %%f in (lj_*.c lib_*.c) do ( - %LJCOMPILE% %%f - @if errorlevel 1 goto :BAD -) - -%LJLIB% %TARGETLIB% lj_*.o lib_*.o -@if errorlevel 1 goto :BAD -@goto :NOAMALG -:AMALG -%LJCOMPILE% ljamalg.c -@if errorlevel 1 goto :BAD -%LJLIB% %TARGETLIB% ljamalg.o lj_vm.o -@if errorlevel 1 goto :BAD -:NOAMALG - -@del *.o *.obj *.manifest minilua.exe buildvm.exe -@echo. -@echo === Successfully built LuaJIT for PS4 === - -@goto :END -:BAD -@echo. -@echo ******************************************************* -@echo *** Build FAILED -- Please check the error messages *** -@echo ******************************************************* -@goto :END -:FAIL -@echo To run this script you must open a "Visual Studio .NET Command Prompt" -@echo (64 bit host compiler). The PS4 Orbis SDK must be installed, too. -:END diff --git a/src/psvitabuild.bat b/src/psvitabuild.bat deleted file mode 100644 index 3991dc6535..0000000000 --- a/src/psvitabuild.bat +++ /dev/null @@ -1,93 +0,0 @@ -@rem Script to build LuaJIT with the PS Vita SDK. -@rem Donated to the public domain. -@rem -@rem Open a "Visual Studio .NET Command Prompt" (32 bit host compiler) -@rem Then cd to this directory and run this script. - -@if not defined INCLUDE goto :FAIL -@if not defined SCE_PSP2_SDK_DIR goto :FAIL - -@setlocal -@rem ---- Host compiler ---- -@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE -@set LJLINK=link /nologo -@set LJMT=mt /nologo -@set DASMDIR=..\dynasm -@set DASM=%DASMDIR%\dynasm.lua -@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c - -%LJCOMPILE% host\minilua.c -@if errorlevel 1 goto :BAD -%LJLINK% /out:minilua.exe minilua.obj -@if errorlevel 1 goto :BAD -if exist minilua.exe.manifest^ - %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe - -@rem Check for 32 bit host compiler. -@minilua -@if errorlevel 8 goto :FAIL - -@set DASMFLAGS=-D FPU -D HFABI -minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_arm.dasc -@if errorlevel 1 goto :BAD - -%LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_ARM -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLJ_TARGET_PSVITA=1 host\buildvm*.c -@if errorlevel 1 goto :BAD -%LJLINK% /out:buildvm.exe buildvm*.obj -@if errorlevel 1 goto :BAD -if exist buildvm.exe.manifest^ - %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe - -buildvm -m elfasm -o lj_vm.s -@if errorlevel 1 goto :BAD -buildvm -m bcdef -o lj_bcdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m ffdef -o lj_ffdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m libdef -o lj_libdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m recdef -o lj_recdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m folddef -o lj_folddef.h lj_opt_fold.c -@if errorlevel 1 goto :BAD - -@rem ---- Cross compiler ---- -@set LJCOMPILE="%SCE_PSP2_SDK_DIR%\host_tools\build\bin\psp2snc" -c -w -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -@set LJLIB="%SCE_PSP2_SDK_DIR%\host_tools\build\bin\psp2ld32" -r --output= -@set INCLUDE="" - -"%SCE_PSP2_SDK_DIR%\host_tools\build\bin\psp2as" -o lj_vm.o lj_vm.s - -@if "%1" neq "debug" goto :NODEBUG -@shift -@set LJCOMPILE=%LJCOMPILE% -g -O0 -@set TARGETLIB=libluajitD.a -goto :BUILD -:NODEBUG -@set LJCOMPILE=%LJCOMPILE% -O2 -@set TARGETLIB=libluajit.a -:BUILD -del %TARGETLIB% - -%LJCOMPILE% ljamalg.c -@if errorlevel 1 goto :BAD -%LJLIB%%TARGETLIB% ljamalg.o lj_vm.o -@if errorlevel 1 goto :BAD - -@del *.o *.obj *.manifest minilua.exe buildvm.exe -@echo. -@echo === Successfully built LuaJIT for PS Vita === - -@goto :END -:BAD -@echo. -@echo ******************************************************* -@echo *** Build FAILED -- Please check the error messages *** -@echo ******************************************************* -@goto :END -:FAIL -@echo To run this script you must open a "Visual Studio .NET Command Prompt" -@echo (32 bit host compiler). The PS Vita SDK must be installed, too. -:END diff --git a/src/reusevm/host/buildvm_arch.h b/src/reusevm/host/buildvm_arch.h new file mode 100644 index 0000000000..c7098fa21b --- /dev/null +++ b/src/reusevm/host/buildvm_arch.h @@ -0,0 +1,5759 @@ +/* +** This file has been pre-processed with DynASM. +** http://luajit.org/dynasm.html +** DynASM version 1.4.0, DynASM x64 version 1.4.0 +** DO NOT EDIT! The original file is in "vm_x64.dasc". +*/ + +#line 1 "vm_x64.dasc" +//|// Low-level VM code for x64 CPUs in LJ_GC64 mode. +//|// Bytecode interpreter, fast functions and helper functions. +//|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +//| +//|.arch x64 +#if DASM_VERSION != 10400 +#error "Version mismatch between DynASM and included encoding engine" +#endif +#line 6 "vm_x64.dasc" +//|.section code_op, code_sub +#define DASM_SECTION_CODE_OP 0 +#define DASM_SECTION_CODE_SUB 1 +#define DASM_MAXSECTION 2 +#line 7 "vm_x64.dasc" +//| +//|.actionlist build_actionlist +static const unsigned char build_actionlist[15846] = { + 254,1,248,10,252,247,195,237,15,132,244,11,72,131,227,252,248,72,41,218,72, + 141,76,25,252,248,72,139,90,252,248,73,187,237,237,76,137,28,10,248,12,131, + 192,1,15,132,244,13,137,4,36,72,252,247,195,237,15,132,244,14,248,15,72,129, + 252,243,239,252,247,195,237,15,133,244,10,65,199,134,233,237,255,72,131,227, + 252,248,72,41,211,72,252,247,219,131,232,1,15,132,244,248,248,1,72,139,44, + 10,72,137,106,252,240,72,131,194,8,131,232,1,15,133,244,1,248,2,72,139,108, + 36,16,72,137,157,233,248,3,139,4,36,139,76,36,8,248,4,57,193,15,133,244,252, + 248,5,72,131,252,234,16,72,137,149,233,248,16,72,139,76,36,32,72,137,141, + 233,49,192,248,17,72,131,196,40,65,92,65,93,65,94,65,95,91,93,195,248,6,15, + 130,244,253,255,72,59,149,233,15,135,244,254,72,199,66,252,240,237,72,131, + 194,8,131,192,1,252,233,244,4,248,7,133,201,15,132,244,5,72,41,193,72,141, + 20,202,252,233,244,5,248,8,72,137,149,233,137,4,36,137,206,72,137,252,239, + 232,251,1,0,72,139,149,233,252,233,244,3,248,13,176,235,252,233,244,18,248, + 19,255,137,252,240,72,137,252,252,248,18,72,139,108,36,16,72,139,173,233, + 199,133,233,237,252,233,244,17,248,20,72,139,124,36,16,137,198,72,131,196, + 40,65,92,65,93,65,94,65,95,91,93,252,233,251,1,1,248,21,72,129,231,239,72, + 137,252,252,248,22,72,139,108,36,16,184,237,72,139,149,233,76,139,181,233, + 73,129,198,239,72,139,90,252,248,72,185,237,237,72,139,42,72,137,74,252,240, + 72,137,106,252,248,72,199,193,252,240,252,255,252,255,252,255,65,199,134, + 233,237,252,233,244,12,248,23,255,190,237,252,233,244,248,248,24,72,131,232, + 16,252,233,244,247,248,25,72,141,68,194,252,248,248,1,15,182,139,233,72,131, + 195,4,72,137,149,233,72,137,133,233,72,137,92,36,24,72,137,206,248,2,72,137, + 252,239,232,251,1,0,72,139,149,233,72,139,133,233,72,139,106,252,240,72,193, + 229,17,72,193,252,237,17,72,41,208,193,232,3,131,192,1,72,139,157,233,139, + 11,15,182,252,233,15,182,205,72,131,195,4,65,252,255,36,252,238,248,26,85, + 83,65,87,65,86,65,85,65,84,72,131,252,236,40,72,137,252,253,72,137,124,36, + 16,72,137,252,241,187,237,49,192,76,141,188,253,36,233,76,139,181,233,255, + 73,129,198,239,72,137,68,36,24,72,137,68,36,32,137,68,36,8,137,68,36,12,76, + 137,189,233,56,133,233,15,132,244,248,73,137,174,233,65,199,134,233,237,136, + 133,233,72,139,149,233,72,139,133,233,72,41,200,193,232,3,131,192,1,72,41, + 209,72,139,90,252,248,137,4,36,252,247,195,237,15,132,244,14,252,233,244, + 15,248,27,85,83,65,87,65,86,65,85,65,84,72,131,252,236,40,187,237,137,76, + 36,12,252,233,244,247,248,28,255,85,83,65,87,65,86,65,85,65,84,72,131,252, + 236,40,187,237,248,1,137,84,36,8,72,137,252,253,72,137,124,36,16,72,137,252, + 241,76,139,181,233,76,139,189,233,76,137,124,36,32,72,137,108,36,24,73,129, + 198,239,72,137,165,233,248,2,73,137,174,233,65,199,134,233,237,72,139,149, + 233,72,1,203,72,41,211,72,139,133,233,72,41,200,193,232,3,131,192,1,248,29, + 72,139,105,252,240,73,137,252,235,72,193,229,17,72,193,252,237,17,73,193, + 252,251,47,65,129,252,251,239,15,133,244,30,248,31,72,137,202,72,137,90,252, + 248,72,139,157,233,139,11,15,182,252,233,15,182,205,72,131,195,4,65,252,255, + 36,252,238,248,32,255,85,83,65,87,65,86,65,85,65,84,72,131,252,236,40,72, + 137,252,253,72,137,124,36,16,72,137,108,36,24,76,139,189,233,76,43,189,233, + 76,139,181,233,199,68,36,12,0,0,0,0,68,137,124,36,8,73,129,198,239,76,139, + 189,233,76,137,124,36,32,72,137,165,233,73,137,174,233,252,255,209,72,133, + 192,15,132,244,16,72,137,193,187,237,252,233,244,2,248,11,72,1,209,72,131, + 227,252,248,72,137,213,72,41,218,72,199,68,193,252,248,237,72,137,200,72, + 139,93,232,72,139,77,224,72,131,252,249,1,15,134,244,247,76,139,122,252,240, + 73,193,231,17,73,193,252,239,17,77,139,191,233,77,139,191,233,252,255,225, + 248,1,255,15,132,244,33,72,41,213,193,252,237,3,141,69,252,253,252,233,244, + 34,248,35,15,182,75,252,255,72,131,252,237,32,72,141,12,202,72,41,252,233, + 15,132,244,36,72,252,247,217,193,252,233,3,72,139,124,36,16,72,137,151,233, + 137,202,72,139,8,72,137,77,0,72,137,252,238,252,233,244,37,248,38,73,187, + 237,237,76,9,216,72,137,4,36,72,141,4,36,128,123,252,252,235,15,133,244,247, + 72,185,237,237,72,9,252,233,73,141,174,233,255,72,137,77,0,252,233,244,248, + 248,39,15,182,67,252,254,252,242,15,42,192,252,242,15,17,4,36,72,141,4,36, + 252,233,244,247,248,40,15,182,67,252,254,72,141,4,194,248,1,15,182,107,252, + 255,72,141,44,252,234,248,2,72,139,124,36,16,72,137,151,233,72,137,252,238, + 72,137,194,72,137,252,253,72,137,92,36,24,232,251,1,2,72,139,149,233,72,133, + 192,15,132,244,249,248,36,15,182,75,252,253,72,139,40,72,137,44,202,139,3, + 15,182,204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,248,3, + 72,139,141,233,72,137,89,232,72,141,153,233,72,41,211,72,139,105,252,240, + 184,237,72,193,229,17,72,193,252,237,17,252,233,244,31,248,41,255,72,137, + 252,239,72,137,213,137,198,232,251,1,3,15,182,75,252,253,72,137,252,234,72, + 133,192,15,133,244,42,73,199,195,237,252,233,244,43,248,44,73,187,237,237, + 76,9,216,72,137,4,36,72,141,4,36,128,123,252,252,235,15,133,244,247,72,185, + 237,237,72,9,252,233,73,141,174,233,72,137,77,0,252,233,244,248,248,45,15, + 182,67,252,254,252,242,15,42,192,252,242,15,17,4,36,72,141,4,36,252,233,244, + 247,248,46,255,15,182,67,252,254,72,141,4,194,248,1,15,182,107,252,255,72, + 141,44,252,234,248,2,72,139,124,36,16,72,137,151,233,72,137,252,238,72,137, + 194,72,137,252,253,72,137,92,36,24,232,251,1,4,72,139,149,233,72,133,192, + 15,132,244,249,15,182,75,252,253,72,139,44,202,72,137,40,248,47,139,3,15, + 182,204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,248,3,72, + 139,141,233,72,137,89,232,15,182,67,252,253,72,139,44,194,72,137,105,16,72, + 141,153,233,72,41,211,72,139,105,252,240,184,237,72,193,229,17,72,193,252, + 237,17,252,233,244,31,248,48,72,139,124,36,16,72,137,252,238,72,137,151,233, + 72,137,213,137,194,72,137,92,36,24,232,251,1,5,15,182,75,252,253,72,137,252, + 234,252,233,244,49,248,50,15,183,67,252,254,15,182,75,252,253,72,139,108, + 36,16,72,137,149,233,255,72,141,52,202,72,141,20,194,72,137,252,239,15,182, + 75,252,252,72,137,92,36,24,232,251,1,6,248,3,72,139,149,233,72,131,252,248, + 1,15,135,244,51,248,4,72,141,91,4,15,130,244,252,248,5,15,183,67,252,254, + 72,141,156,253,131,233,248,6,139,3,15,182,204,15,182,232,72,131,195,4,193, + 232,16,65,252,255,36,252,238,248,52,72,131,195,4,76,139,24,73,193,252,251, + 47,65,129,252,251,239,15,130,244,5,252,233,244,6,248,53,76,139,24,73,193, + 252,251,47,65,129,252,251,239,252,233,244,4,248,54,255,72,193,224,17,72,193, + 232,17,72,131,252,235,4,72,137,206,137,252,233,72,139,108,36,16,72,137,149, + 233,72,137,194,72,137,252,239,72,137,92,36,24,232,251,1,7,252,233,244,3,248, + 55,72,131,252,235,4,72,139,108,36,16,72,137,149,233,72,137,252,239,139,115, + 252,252,72,137,92,36,24,232,251,1,8,252,233,244,3,248,56,72,139,108,36,16, + 72,137,149,233,137,206,137,194,72,137,252,239,72,137,92,36,24,232,251,1,9, + 72,139,149,233,252,233,244,6,248,57,248,58,73,141,4,199,252,233,244,247,248, + 59,248,60,77,141,20,199,72,141,4,252,234,76,137,213,252,233,244,248,248,61, + 255,72,141,4,194,72,137,197,252,233,244,248,248,62,248,63,72,141,4,194,248, + 1,72,141,44,252,234,248,2,72,141,12,202,68,15,182,67,252,252,72,137,206,72, + 137,193,72,139,124,36,16,72,137,151,233,72,137,252,234,72,137,252,253,72, + 137,92,36,24,232,251,1,10,72,139,149,233,72,133,192,15,132,244,47,248,51, + 72,137,193,72,41,208,72,137,89,232,72,141,152,233,184,237,252,233,244,29, + 248,64,15,183,67,252,254,72,139,108,36,16,72,137,149,233,72,141,52,194,72, + 137,252,239,72,137,92,36,24,232,251,1,11,72,139,149,233,255,72,133,192,15, + 133,244,51,15,183,67,252,254,72,139,60,194,72,193,231,17,72,193,252,239,17, + 252,233,244,65,255,252,233,244,51,255,248,66,72,141,76,202,16,248,30,137, + 4,36,72,137,205,72,139,124,36,16,72,137,151,233,72,141,113,252,240,72,141, + 84,193,252,248,72,137,92,36,24,232,251,1,12,72,137,252,233,72,139,108,36, + 16,72,139,149,233,139,4,36,72,139,105,252,240,131,192,1,73,57,215,15,132, + 244,67,72,193,229,17,72,193,252,237,17,72,137,202,72,137,90,252,248,72,139, + 157,233,139,11,15,182,252,233,15,182,205,72,131,195,4,65,252,255,36,252,238, + 248,68,72,139,108,36,16,72,137,149,233,72,137,206,72,137,252,239,72,137,92, + 36,24,232,251,1,13,72,139,149,233,139,67,252,252,15,182,204,15,182,232,193, + 232,16,65,252,255,164,253,252,238,233,248,69,129,252,248,239,15,130,244,70, + 76,139,26,76,137,221,73,193,252,251,47,65,129,252,251,239,15,131,244,70,72, + 139,90,252,248,137,4,36,72,139,42,72,137,106,252,240,131,232,2,15,132,244, + 248,255,72,137,209,248,1,72,131,193,8,72,139,41,72,137,105,252,240,131,232, + 1,15,133,244,1,248,2,139,4,36,252,233,244,71,248,72,129,252,248,239,15,130, + 244,70,72,139,2,72,193,252,248,47,189,237,57,232,15,66,197,252,247,208,248, + 2,72,139,106,252,240,72,193,229,17,72,193,252,237,17,72,139,132,253,197,233, + 72,139,90,252,248,73,187,237,237,76,9,216,72,137,66,252,240,252,233,244,73, + 248,74,129,252,248,239,255,15,130,244,70,72,139,42,72,139,90,252,248,73,137, + 252,235,72,193,229,17,72,193,252,237,17,73,193,252,251,47,65,129,252,251, + 239,15,133,244,252,248,1,72,139,173,233,248,2,72,133,252,237,72,199,66,252, + 240,237,15,132,244,73,72,184,237,237,72,9,232,72,137,66,252,240,73,139,134, + 233,139,141,233,35,136,233,73,187,237,237,76,9,216,105,201,239,255,72,3,141, + 233,248,3,72,57,129,233,15,132,244,251,248,4,72,139,137,233,72,133,201,15, + 133,244,3,252,233,244,73,248,5,72,139,169,233,72,129,252,253,239,15,132,244, + 73,72,137,106,252,240,252,233,244,73,248,6,255,65,129,252,251,239,15,132, + 244,1,65,129,252,251,239,15,135,244,253,65,187,237,248,7,65,252,247,211,75, + 139,172,253,222,233,252,233,244,2,248,75,129,252,248,239,15,130,244,70,72, + 139,42,73,137,252,234,73,137,252,235,72,193,229,17,72,193,252,237,17,73,193, + 252,251,47,65,129,252,251,239,15,133,244,70,72,131,189,233,0,255,15,133,244, + 70,72,139,74,8,73,137,203,72,193,225,17,72,193,252,233,17,73,193,252,251, + 47,65,129,252,251,239,15,133,244,70,72,137,141,233,72,139,90,252,248,76,137, + 82,252,240,252,246,133,233,235,15,132,244,247,128,165,233,235,73,139,134, + 233,73,137,174,233,72,137,133,233,248,1,252,233,244,73,248,76,255,129,252, + 248,239,15,130,244,70,72,139,50,73,137,252,243,72,193,230,17,72,193,252,238, + 17,73,193,252,251,47,65,129,252,251,239,15,133,244,70,72,137,213,72,141,82, + 8,72,139,124,36,16,232,251,1,14,72,137,252,234,72,139,40,72,139,90,252,248, + 72,137,106,252,240,252,233,244,73,248,77,129,252,248,239,15,133,244,70,72, + 139,42,73,137,252,235,73,193,252,251,47,65,129,252,251,239,15,135,244,70, + 72,139,90,252,248,72,137,106,252,240,252,233,244,73,248,78,129,252,248,239, + 255,15,130,244,70,72,139,90,252,248,72,139,42,73,137,252,235,73,193,252,251, + 47,65,129,252,251,239,15,133,244,249,248,2,72,137,106,252,240,252,233,244, + 73,248,3,65,129,252,251,239,15,135,244,79,73,131,190,233,0,15,133,244,70, + 73,139,174,233,73,59,174,233,15,130,244,247,255,232,244,80,248,1,72,139,108, + 36,16,72,137,149,233,72,137,92,36,24,72,137,214,72,137,252,239,232,251,1, + 15,72,139,149,233,72,189,237,237,72,9,197,252,233,244,2,248,81,129,252,248, + 239,15,130,244,70,15,132,244,248,248,1,72,139,50,73,137,252,243,72,193,230, + 17,72,193,252,238,17,73,193,252,251,47,65,129,252,251,239,15,133,244,70,255, + 72,139,108,36,16,72,137,149,233,72,137,149,233,72,139,90,252,248,72,141,82, + 8,72,137,252,239,72,137,92,36,24,232,251,1,16,72,139,149,233,133,192,15,132, + 244,249,72,139,106,8,72,139,66,16,72,137,106,252,240,72,137,66,252,248,248, + 82,184,237,252,233,244,83,248,2,72,199,66,8,237,252,233,244,1,248,3,72,199, + 66,252,240,237,252,233,244,73,248,84,129,252,248,239,255,15,130,244,70,72, + 139,42,73,137,252,234,73,137,252,235,72,193,229,17,72,193,252,237,17,73,193, + 252,251,47,65,129,252,251,239,15,133,244,70,255,72,131,189,233,0,15,133,244, + 70,255,72,139,66,252,240,72,193,224,17,72,193,232,17,72,139,128,233,73,187, + 237,237,76,9,216,72,139,90,252,248,72,137,66,252,240,76,137,82,252,248,72, + 199,2,237,184,237,252,233,244,83,248,85,129,252,248,239,15,130,244,70,72, + 139,42,73,137,252,235,72,193,229,17,72,193,252,237,17,73,193,252,251,47,65, + 129,252,251,239,15,133,244,70,76,139,90,8,73,193,252,251,47,65,129,252,251, + 239,15,131,244,70,252,242,15,16,66,8,72,139,90,252,248,73,186,237,237,255, + 102,73,15,110,202,252,242,15,88,193,252,242,15,44,200,252,242,15,17,66,252, + 240,59,141,233,15,131,244,248,72,139,133,233,72,141,4,200,248,1,72,129,56, + 239,15,132,244,86,72,139,40,72,137,106,252,248,252,233,244,82,248,2,131,189, + 233,0,15,132,244,86,72,137,252,239,72,137,213,137,206,232,251,1,3,72,137, + 252,234,72,133,192,15,133,244,1,248,86,184,237,252,233,244,83,248,87,255, + 129,252,248,239,15,130,244,70,72,139,42,73,137,252,234,73,137,252,235,72, + 193,229,17,72,193,252,237,17,73,193,252,251,47,65,129,252,251,239,15,133, + 244,70,255,72,139,66,252,240,72,193,224,17,72,193,232,17,72,139,128,233,73, + 187,237,237,76,9,216,72,139,90,252,248,72,137,66,252,240,76,137,82,252,248, + 72,199,2,0,0,0,0,184,237,252,233,244,83,248,88,129,252,248,239,15,130,244, + 70,72,141,74,16,131,232,1,187,237,248,1,65,15,182,174,233,72,193,252,237, + 235,72,131,229,1,72,1,252,235,73,137,199,248,2,74,139,108,252,249,232,74, + 137,108,252,249,252,240,73,131,252,239,1,15,135,244,2,252,233,244,29,248, + 89,255,129,252,248,239,15,130,244,70,72,139,74,8,73,137,203,73,193,252,251, + 47,65,129,252,251,239,15,133,244,70,72,139,42,72,137,10,72,137,106,8,72,141, + 74,24,131,232,2,187,237,252,233,244,1,248,90,129,252,248,239,15,130,244,70, + 72,139,42,72,193,229,17,72,193,252,237,17,72,139,90,252,248,72,137,92,36, + 24,72,137,44,36,76,139,26,73,193,252,251,47,65,129,252,251,239,15,133,244, + 70,72,131,189,233,0,15,133,244,70,255,128,189,233,235,15,135,244,70,72,139, + 141,233,15,132,244,247,72,59,141,233,15,132,244,70,72,139,89,252,248,72,137, + 25,72,131,193,8,248,1,72,141,92,193,252,240,72,59,157,233,15,135,244,70,72, + 137,157,233,72,139,108,36,16,72,137,149,233,72,131,194,8,72,137,149,233,72, + 141,108,194,232,72,41,221,72,57,203,15,132,244,249,248,2,255,72,139,4,43, + 72,137,67,252,248,72,131,252,235,8,72,57,203,15,133,244,2,248,3,72,137,206, + 72,139,60,36,232,244,26,72,139,108,36,16,72,139,28,36,72,139,149,233,73,137, + 174,233,65,199,134,233,237,129,252,248,239,15,135,244,254,248,4,72,139,139, + 233,76,139,187,233,72,137,139,233,76,137,252,251,72,41,203,15,132,244,252, + 72,141,4,26,193,252,235,3,72,59,133,233,255,15,135,244,255,72,137,213,72, + 41,205,248,5,72,139,1,72,137,4,41,72,131,193,8,76,57,252,249,15,133,244,5, + 248,6,141,67,2,73,187,237,237,76,137,90,252,248,248,7,72,139,92,36,24,137, + 4,36,72,199,193,252,248,252,255,252,255,252,255,252,247,195,237,15,132,244, + 14,252,233,244,15,248,8,73,187,237,237,76,137,90,252,248,72,139,139,233,72, + 131,252,233,8,72,137,139,233,255,72,139,1,72,137,2,184,237,252,233,244,7, + 248,9,72,139,12,36,76,137,185,233,72,137,222,72,137,252,239,232,251,1,0,72, + 139,28,36,72,139,149,233,252,233,244,4,248,91,72,139,106,252,240,72,193,229, + 17,72,193,252,237,17,72,139,173,233,72,193,229,17,72,193,252,237,17,72,139, + 90,252,248,72,137,92,36,24,72,137,44,36,72,131,189,233,0,15,133,244,70,128, + 189,233,235,15,135,244,70,72,139,141,233,15,132,244,247,255,72,59,141,233, + 15,132,244,70,72,139,89,252,248,72,137,25,72,131,193,8,248,1,72,141,92,193, + 252,248,72,59,157,233,15,135,244,70,72,137,157,233,72,139,108,36,16,72,137, + 149,233,72,137,149,233,72,141,108,194,252,240,72,41,221,72,57,203,15,132, + 244,249,248,2,72,139,4,43,72,137,67,252,248,72,131,252,235,8,72,57,203,15, + 133,244,2,248,3,72,137,206,72,139,60,36,232,244,26,72,139,108,36,16,72,139, + 28,36,72,139,149,233,255,73,137,174,233,65,199,134,233,237,129,252,248,239, + 15,135,244,254,248,4,72,139,139,233,76,139,187,233,72,137,139,233,76,137, + 252,251,72,41,203,15,132,244,252,72,141,4,26,193,252,235,3,72,59,133,233, + 15,135,244,255,72,137,213,72,41,205,248,5,72,139,1,72,137,4,41,72,131,193, + 8,76,57,252,249,15,133,244,5,248,6,255,141,67,1,248,7,72,139,92,36,24,137, + 4,36,49,201,252,247,195,237,15,132,244,14,252,233,244,15,248,8,72,137,222, + 72,137,252,239,232,251,1,17,248,9,72,139,12,36,76,137,185,233,72,137,222, + 72,137,252,239,232,251,1,0,72,139,28,36,72,139,149,233,252,233,244,4,248, + 92,72,139,108,36,16,72,252,247,133,233,237,15,132,244,70,72,137,149,233,72, + 141,68,194,252,248,72,137,133,233,255,49,192,72,137,133,233,176,235,136,133, + 233,252,233,244,17,248,93,129,252,248,239,15,130,244,70,72,139,42,73,137, + 252,235,73,193,252,251,47,65,129,252,251,239,15,131,244,70,72,209,229,72, + 209,252,237,72,139,90,252,248,72,137,106,252,240,252,233,244,73,248,94,129, + 252,248,239,15,130,244,70,76,139,26,73,193,252,251,47,65,129,252,251,239, + 255,15,131,244,70,252,242,15,81,2,248,95,72,139,90,252,248,252,242,15,17, + 66,252,240,248,73,184,237,248,83,137,4,36,248,71,252,247,195,237,15,133,244, + 253,248,5,56,67,252,255,15,135,244,252,15,182,75,252,253,72,252,247,217,72, + 141,84,202,252,240,139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65, + 252,255,36,252,238,248,6,72,199,68,194,232,237,72,131,192,1,252,233,244,5, + 248,7,72,199,193,252,240,252,255,252,255,252,255,252,233,244,15,248,96,255, + 76,139,26,73,193,252,251,47,65,129,252,251,239,15,131,244,70,252,242,15,16, + 2,232,244,97,252,233,244,95,248,98,76,139,26,73,193,252,251,47,65,129,252, + 251,239,15,131,244,70,252,242,15,16,2,232,244,99,252,233,244,95,248,100,129, + 252,248,239,15,133,244,70,255,76,139,26,73,193,252,251,47,65,129,252,251, + 239,15,131,244,70,252,242,15,16,2,72,137,213,232,251,1,18,72,137,252,234, + 252,233,244,95,248,101,129,252,248,239,15,130,244,70,76,139,26,73,193,252, + 251,47,65,129,252,251,239,15,131,244,70,252,242,15,16,2,72,137,213,232,251, + 1,19,72,137,252,234,252,233,244,95,248,102,129,252,248,239,15,130,244,70, + 76,139,26,73,193,252,251,47,65,129,252,251,239,255,15,131,244,70,252,242, + 15,16,2,72,137,213,232,251,1,20,72,137,252,234,252,233,244,95,248,103,129, + 252,248,239,15,130,244,70,76,139,26,73,193,252,251,47,65,129,252,251,239, + 15,131,244,70,252,242,15,16,2,72,137,213,232,251,1,21,72,137,252,234,252, + 233,244,95,248,104,129,252,248,239,15,130,244,70,76,139,26,73,193,252,251, + 47,65,129,252,251,239,15,131,244,70,255,252,242,15,16,2,72,137,213,232,251, + 1,22,72,137,252,234,252,233,244,95,248,105,129,252,248,239,15,130,244,70, + 76,139,26,73,193,252,251,47,65,129,252,251,239,15,131,244,70,252,242,15,16, + 2,72,137,213,232,251,1,23,72,137,252,234,252,233,244,95,248,106,129,252,248, + 239,15,130,244,70,76,139,26,73,193,252,251,47,65,129,252,251,239,15,131,244, + 70,252,242,15,16,2,72,137,213,232,251,1,24,72,137,252,234,252,233,244,95, + 248,107,255,129,252,248,239,15,130,244,70,76,139,26,73,193,252,251,47,65, + 129,252,251,239,15,131,244,70,252,242,15,16,2,72,137,213,232,251,1,25,72, + 137,252,234,252,233,244,95,248,108,129,252,248,239,15,130,244,70,76,139,26, + 73,193,252,251,47,65,129,252,251,239,15,131,244,70,252,242,15,16,2,72,137, + 213,232,251,1,26,72,137,252,234,252,233,244,95,248,109,129,252,248,239,255, + 15,130,244,70,76,139,26,73,193,252,251,47,65,129,252,251,239,15,131,244,70, + 252,242,15,16,2,72,137,213,232,251,1,27,72,137,252,234,252,233,244,95,248, + 110,129,252,248,239,15,130,244,70,76,139,26,73,193,252,251,47,65,129,252, + 251,239,15,131,244,70,252,242,15,16,2,72,137,213,232,251,1,28,72,137,252, + 234,252,233,244,95,248,111,129,252,248,239,15,130,244,70,255,76,139,26,73, + 193,252,251,47,65,129,252,251,239,15,131,244,70,252,242,15,16,2,72,137,213, + 232,251,1,29,72,137,252,234,252,233,244,95,248,112,129,252,248,239,15,130, + 244,70,76,139,26,73,193,252,251,47,65,129,252,251,239,15,131,244,70,76,139, + 90,8,73,193,252,251,47,65,129,252,251,239,15,131,244,70,252,242,15,16,2,252, + 242,15,16,74,8,72,137,213,232,251,1,30,72,137,252,234,252,233,244,95,248, + 113,129,252,248,239,255,15,130,244,70,76,139,26,73,193,252,251,47,65,129, + 252,251,239,15,131,244,70,76,139,90,8,73,193,252,251,47,65,129,252,251,239, + 15,131,244,70,252,242,15,16,2,252,242,15,16,74,8,72,137,213,232,251,1,31, + 72,137,252,234,252,233,244,95,248,114,129,252,248,239,15,130,244,70,76,139, + 26,73,193,252,251,47,65,129,252,251,239,15,131,244,70,76,139,90,8,73,193, + 252,251,47,65,129,252,251,239,15,131,244,70,255,252,242,15,16,2,252,242,15, + 16,74,8,72,137,213,232,251,1,32,72,137,252,234,252,233,244,95,248,115,129, + 252,248,239,15,130,244,70,76,139,26,73,193,252,251,47,65,129,252,251,239, + 15,131,244,70,76,139,90,8,73,193,252,251,47,65,129,252,251,239,15,131,244, + 70,221,66,8,221,2,217,252,253,221,217,72,139,90,252,248,221,90,252,240,252, + 233,244,73,248,116,129,252,248,239,15,130,244,70,76,139,26,73,193,252,251, + 47,65,129,252,251,239,255,15,131,244,70,252,242,15,16,2,72,137,213,72,141, + 60,36,232,251,1,33,72,137,252,234,139,44,36,72,139,90,252,248,252,242,15, + 17,66,252,240,252,242,15,42,205,252,242,15,17,74,252,248,184,237,252,233, + 244,83,248,117,129,252,248,239,15,130,244,70,76,139,26,73,193,252,251,47, + 65,129,252,251,239,15,131,244,70,252,242,15,16,2,72,137,213,72,141,122,252, + 240,232,251,1,34,72,137,252,234,72,139,90,252,248,252,242,15,17,66,252,248, + 184,237,252,233,244,83,248,118,185,2,0,0,0,76,139,26,73,193,252,251,47,65, + 129,252,251,239,15,131,244,70,255,252,242,15,16,2,248,5,57,193,15,131,244, + 95,76,139,92,202,252,248,73,193,252,251,47,65,129,252,251,239,15,131,244, + 70,248,6,252,242,15,16,76,202,252,248,248,7,252,242,15,93,193,131,193,1,252, + 233,244,5,248,119,185,2,0,0,0,76,139,26,73,193,252,251,47,65,129,252,251, + 239,15,131,244,70,252,242,15,16,2,248,5,57,193,15,131,244,95,76,139,92,202, + 252,248,73,193,252,251,47,65,129,252,251,239,15,131,244,70,248,6,255,252, + 242,15,16,76,202,252,248,248,7,252,242,15,95,193,131,193,1,252,233,244,5, + 248,120,129,252,248,239,15,133,244,70,72,139,42,73,137,252,235,72,193,229, + 17,72,193,252,237,17,73,193,252,251,47,65,129,252,251,239,15,133,244,70,72, + 139,90,252,248,131,189,233,1,15,130,244,86,15,182,173,233,252,242,15,42,197, + 252,233,244,95,248,121,73,139,174,233,73,59,174,233,255,15,130,244,247,232, + 244,80,248,1,129,252,248,239,15,133,244,70,76,139,26,73,193,252,251,47,65, + 129,252,251,239,15,131,244,70,252,242,15,44,42,129,252,253,252,255,0,0,0, + 15,135,244,70,137,44,36,65,186,1,0,0,0,72,141,4,36,248,122,72,139,108,36, + 16,72,137,149,233,68,137,210,72,137,198,72,137,252,239,72,137,92,36,24,232, + 251,1,35,248,123,72,139,149,233,72,139,90,252,248,73,187,237,237,255,76,9, + 216,72,137,66,252,240,252,233,244,73,248,124,73,139,174,233,73,59,174,233, + 15,130,244,247,232,244,80,248,1,65,186,252,255,252,255,252,255,252,255,129, + 252,248,239,15,130,244,70,15,134,244,247,76,139,90,16,73,193,252,251,47,65, + 129,252,251,239,15,131,244,70,252,242,68,15,44,82,16,248,1,255,72,139,42, + 73,137,252,235,72,193,229,17,72,193,252,237,17,73,193,252,251,47,65,129,252, + 251,239,15,133,244,70,76,139,90,8,73,193,252,251,47,65,129,252,251,239,15, + 131,244,70,252,242,15,44,74,8,139,133,233,68,57,208,15,130,244,251,248,2, + 133,201,15,142,244,253,248,3,65,41,202,15,140,244,125,72,141,132,253,13,233, + 65,131,194,1,248,4,252,233,244,122,248,5,255,15,140,244,252,69,141,84,2,1, + 252,233,244,2,248,6,65,137,194,252,233,244,2,248,7,15,132,244,254,1,193,131, + 193,1,15,143,244,3,248,8,185,1,0,0,0,252,233,244,3,248,125,69,49,210,252, + 233,244,4,248,126,255,129,252,248,239,15,130,244,70,73,139,174,233,73,59, + 174,233,15,130,244,247,232,244,80,248,1,72,139,50,73,137,252,243,72,193,230, + 17,72,193,252,238,17,73,193,252,251,47,65,129,252,251,239,15,133,244,70,72, + 139,108,36,16,73,141,190,233,72,137,149,233,72,139,135,233,72,137,175,233, + 72,137,135,233,72,137,92,36,24,232,251,1,36,72,137,199,232,251,1,37,252,233, + 244,123,248,127,255,129,252,248,239,15,130,244,70,73,139,174,233,73,59,174, + 233,15,130,244,247,232,244,80,248,1,72,139,50,73,137,252,243,72,193,230,17, + 72,193,252,238,17,73,193,252,251,47,65,129,252,251,239,15,133,244,70,72,139, + 108,36,16,73,141,190,233,72,137,149,233,72,139,135,233,72,137,175,233,72, + 137,135,233,72,137,92,36,24,232,251,1,38,72,137,199,232,251,1,37,252,233, + 244,123,248,128,255,129,252,248,239,15,130,244,70,73,139,174,233,73,59,174, + 233,15,130,244,247,232,244,80,248,1,72,139,50,73,137,252,243,72,193,230,17, + 72,193,252,238,17,73,193,252,251,47,65,129,252,251,239,15,133,244,70,72,139, + 108,36,16,73,141,190,233,72,137,149,233,72,139,135,233,72,137,175,233,72, + 137,135,233,72,137,92,36,24,232,251,1,39,72,137,199,232,251,1,37,252,233, + 244,123,248,129,255,129,252,248,239,15,130,244,70,76,139,26,73,193,252,251, + 47,65,129,252,251,239,15,131,244,70,252,242,15,16,2,72,189,237,237,102,72, + 15,110,205,252,242,15,88,193,102,15,126,197,248,2,252,233,244,130,248,131, + 129,252,248,239,15,130,244,70,72,189,237,237,102,72,15,110,205,76,139,26, + 73,193,252,251,47,65,129,252,251,239,15,131,244,70,255,252,242,15,16,2,252, + 242,15,88,193,102,15,126,197,248,2,65,137,194,72,141,68,194,252,240,248,1, + 72,57,208,15,134,244,130,76,139,24,73,193,252,251,47,65,129,252,251,239,15, + 131,244,132,252,242,15,16,0,252,242,15,88,193,102,15,126,193,33,205,72,131, + 232,8,252,233,244,1,248,133,129,252,248,239,15,130,244,70,72,189,237,237, + 102,72,15,110,205,76,139,26,73,193,252,251,47,65,129,252,251,239,15,131,244, + 70,252,242,15,16,2,252,242,15,88,193,102,15,126,197,248,2,255,65,137,194, + 72,141,68,194,252,240,248,1,72,57,208,15,134,244,130,76,139,24,73,193,252, + 251,47,65,129,252,251,239,15,131,244,132,252,242,15,16,0,252,242,15,88,193, + 102,15,126,193,9,205,72,131,232,8,252,233,244,1,248,134,129,252,248,239,15, + 130,244,70,72,189,237,237,102,72,15,110,205,76,139,26,73,193,252,251,47,65, + 129,252,251,239,15,131,244,70,252,242,15,16,2,252,242,15,88,193,102,15,126, + 197,248,2,65,137,194,72,141,68,194,252,240,248,1,255,72,57,208,15,134,244, + 130,76,139,24,73,193,252,251,47,65,129,252,251,239,15,131,244,132,252,242, + 15,16,0,252,242,15,88,193,102,15,126,193,49,205,72,131,232,8,252,233,244, + 1,248,135,129,252,248,239,15,130,244,70,76,139,26,73,193,252,251,47,65,129, + 252,251,239,15,131,244,70,252,242,15,16,2,72,189,237,237,102,72,15,110,205, + 252,242,15,88,193,102,15,126,197,248,2,15,205,252,233,244,130,248,136,255, + 129,252,248,239,15,130,244,70,76,139,26,73,193,252,251,47,65,129,252,251, + 239,15,131,244,70,252,242,15,16,2,72,189,237,237,102,72,15,110,205,252,242, + 15,88,193,102,15,126,197,248,2,252,247,213,248,130,252,242,15,42,197,252, + 233,244,95,248,132,68,137,208,252,233,244,70,248,137,129,252,248,239,15,130, + 244,70,255,76,139,26,73,193,252,251,47,65,129,252,251,239,15,131,244,70,76, + 139,90,8,73,193,252,251,47,65,129,252,251,239,15,131,244,70,252,242,15,16, + 2,252,242,15,16,74,8,72,189,237,237,102,72,15,110,213,252,242,15,88,194,252, + 242,15,88,202,102,15,126,197,102,15,126,201,211,229,252,233,244,130,248,138, + 129,252,248,239,15,130,244,70,76,139,26,73,193,252,251,47,65,129,252,251, + 239,15,131,244,70,76,139,90,8,73,193,252,251,47,65,129,252,251,239,15,131, + 244,70,255,252,242,15,16,2,252,242,15,16,74,8,72,189,237,237,102,72,15,110, + 213,252,242,15,88,194,252,242,15,88,202,102,15,126,197,102,15,126,201,211, + 252,237,252,233,244,130,248,139,129,252,248,239,15,130,244,70,76,139,26,73, + 193,252,251,47,65,129,252,251,239,15,131,244,70,76,139,90,8,73,193,252,251, + 47,65,129,252,251,239,15,131,244,70,252,242,15,16,2,252,242,15,16,74,8,72, + 189,237,237,102,72,15,110,213,252,242,15,88,194,252,242,15,88,202,102,15, + 126,197,102,15,126,201,211,252,253,252,233,244,130,248,140,255,129,252,248, + 239,15,130,244,70,76,139,26,73,193,252,251,47,65,129,252,251,239,15,131,244, + 70,76,139,90,8,73,193,252,251,47,65,129,252,251,239,15,131,244,70,252,242, + 15,16,2,252,242,15,16,74,8,72,189,237,237,102,72,15,110,213,252,242,15,88, + 194,252,242,15,88,202,102,15,126,197,102,15,126,201,211,197,252,233,244,130, + 248,141,129,252,248,239,15,130,244,70,76,139,26,73,193,252,251,47,65,129, + 252,251,239,15,131,244,70,255,76,139,90,8,73,193,252,251,47,65,129,252,251, + 239,15,131,244,70,252,242,15,16,2,252,242,15,16,74,8,72,189,237,237,102,72, + 15,110,213,252,242,15,88,194,252,242,15,88,202,102,15,126,197,102,15,126, + 201,211,205,252,233,244,130,248,142,184,237,252,233,244,70,248,79,184,237, + 248,70,72,139,108,36,16,72,139,90,252,248,72,137,92,36,24,72,137,149,233, + 72,141,68,194,252,248,72,141,136,233,72,137,133,233,72,139,66,252,240,72, + 193,224,17,72,193,232,17,72,59,141,233,15,135,244,251,255,72,137,252,239, + 252,255,144,233,72,139,149,233,133,192,15,143,244,83,248,1,72,139,141,233, + 72,41,209,193,252,233,3,133,192,141,65,1,72,139,106,252,240,15,133,244,34, + 72,193,229,17,72,193,252,237,17,72,139,157,233,139,11,15,182,252,233,15,182, + 205,72,131,195,4,65,252,255,36,252,238,248,34,72,137,209,252,247,195,237, + 15,133,244,249,15,182,107,252,253,72,252,247,221,72,141,84,252,234,252,240, + 252,233,244,29,248,3,72,137,221,72,131,229,252,248,72,41,252,234,252,233, + 244,29,248,5,255,190,237,72,137,252,239,232,251,1,0,72,139,149,233,49,192, + 252,233,244,1,248,80,93,72,137,44,36,72,139,108,36,16,72,137,92,36,24,72, + 137,149,233,72,141,68,194,252,248,72,137,252,239,72,137,133,233,232,251,1, + 40,72,139,149,233,72,139,133,233,72,41,208,193,232,3,131,192,1,72,139,44, + 36,85,195,248,143,168,235,15,133,244,247,168,235,15,132,244,247,65,252,255, + 142,233,252,233,244,247,248,144,255,65,15,182,134,233,168,235,15,133,244, + 251,252,233,244,247,248,145,65,15,182,134,233,168,235,15,133,244,251,168, + 235,15,132,244,251,65,252,255,142,233,15,132,244,247,168,235,15,132,244,251, + 248,1,255,72,139,108,36,16,72,137,149,233,72,137,222,72,137,252,239,232,251, + 1,41,248,3,72,139,149,233,248,4,15,182,75,252,253,248,5,15,182,107,252,252, + 15,183,67,252,254,65,252,255,164,253,252,238,233,248,146,72,131,195,4,72, + 139,77,216,137,12,36,252,233,244,4,248,147,72,139,106,252,240,72,193,229, + 17,72,193,252,237,17,72,139,173,233,15,182,133,233,72,141,4,194,72,139,108, + 36,16,72,137,149,233,72,137,133,233,72,137,222,73,141,190,233,73,137,174, + 233,72,137,92,36,24,232,251,1,42,252,233,244,3,248,148,255,72,137,92,36,24, + 252,233,244,247,248,149,72,137,92,36,24,72,131,203,1,248,1,72,141,68,194, + 252,248,72,139,108,36,16,72,137,149,233,72,137,133,233,72,137,222,72,137, + 252,239,232,251,1,43,72,199,68,36,24,0,0,0,0,72,131,227,252,254,72,139,149, + 233,72,137,193,72,139,133,233,72,41,208,72,137,205,15,182,75,252,253,193, + 232,3,131,192,1,252,255,229,248,150,76,139,93,216,73,193,227,17,73,193,252, + 235,17,68,139,20,36,15,182,75,252,253,72,141,12,202,65,131,252,234,1,15,132, + 244,248,248,1,72,139,40,72,137,41,72,131,192,8,72,131,193,8,65,131,252,234, + 1,15,133,244,1,248,2,15,182,67,252,253,15,182,107,252,255,72,1,232,72,141, + 68,194,252,248,248,3,72,57,200,15,135,244,255,77,133,219,15,132,244,47,255, + 65,15,183,171,233,65,15,183,131,233,57,232,15,132,244,47,133,192,15,133,245, + 73,137,174,233,72,139,108,36,16,72,137,149,233,72,137,222,73,141,190,233, + 73,137,174,233,232,251,1,44,72,139,149,233,252,233,244,47,248,9,72,199,1, + 237,72,131,193,8,252,233,244,3,248,151,65,85,65,84,65,83,65,82,65,81,65,80, + 87,86,85,72,141,108,36,88,85,83,82,81,80,15,182,69,252,248,138,101,252,240, + 76,137,125,252,248,76,137,117,252,240,65,139,142,233,255,65,199,134,233,237, + 65,137,134,233,65,137,142,233,65,137,142,233,72,129,252,236,239,72,131,197, + 128,252,242,68,15,17,125,252,248,252,242,68,15,17,117,252,240,252,242,68, + 15,17,109,232,252,242,68,15,17,101,224,252,242,68,15,17,93,216,252,242,68, + 15,17,85,208,252,242,68,15,17,77,200,252,242,68,15,17,69,192,252,242,15,17, + 125,184,252,242,15,17,117,176,252,242,15,17,109,168,252,242,15,17,101,160, + 252,242,15,17,93,152,252,242,15,17,85,144,252,242,15,17,77,136,252,242,15, + 17,69,128,73,139,174,233,73,139,150,233,73,137,174,233,72,137,149,233,72, + 137,230,73,141,190,233,73,199,134,233,0,0,0,0,232,251,1,45,72,139,141,233, + 72,129,225,239,72,137,169,233,72,139,149,233,72,139,153,233,252,233,244,247, + 248,152,255,69,139,150,233,69,137,150,233,248,153,72,141,76,36,16,248,1,76, + 139,105,252,248,76,139,33,72,137,204,133,192,15,136,244,255,72,139,108,36, + 16,137,4,36,76,139,122,252,240,73,193,231,17,73,193,252,239,17,77,139,191, + 233,77,139,191,233,72,137,149,233,73,199,134,233,0,0,0,0,69,139,150,233,65, + 199,134,233,237,139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,129,252, + 253,239,15,130,244,249,129,252,253,239,15,131,244,250,248,2,255,139,4,36, + 248,3,65,252,255,36,252,238,248,4,72,139,66,252,248,169,237,15,133,244,2, + 15,182,64,252,253,72,252,247,216,76,139,124,194,224,73,193,231,17,73,193, + 252,239,17,77,139,191,233,77,139,191,233,252,233,244,2,248,9,72,252,247,216, + 72,137,252,239,72,137,198,232,251,1,1,248,154,248,97,72,184,237,237,102,72, + 15,110,208,72,184,237,237,102,72,15,110,216,15,40,200,102,15,84,202,102,15, + 46,217,15,134,244,247,102,15,85,208,252,242,15,88,203,252,242,15,92,203,102, + 15,86,202,72,184,237,237,255,102,72,15,110,208,252,242,15,194,193,1,102,15, + 84,194,252,242,15,92,200,15,40,193,248,1,195,248,155,248,99,72,184,237,237, + 102,72,15,110,208,72,184,237,237,102,72,15,110,216,15,40,200,102,15,84,202, + 102,15,46,217,15,134,244,247,102,15,85,208,252,242,15,88,203,252,242,15,92, + 203,102,15,86,202,72,184,237,237,102,72,15,110,208,252,242,15,194,193,6,102, + 15,84,194,252,242,15,92,200,15,40,193,248,1,195,248,156,248,157,72,184,237, + 237,102,72,15,110,208,72,184,237,237,102,72,15,110,216,15,40,200,102,15,84, + 202,102,15,46,217,15,134,244,247,255,102,15,85,208,15,40,193,252,242,15,88, + 203,252,242,15,92,203,72,184,237,237,102,72,15,110,216,252,242,15,194,193, + 1,102,15,84,195,252,242,15,92,200,102,15,86,202,15,40,193,248,1,195,248,158, + 15,40,232,252,242,15,94,193,72,184,237,237,102,72,15,110,208,72,184,237,237, + 102,72,15,110,216,15,40,224,102,15,84,226,102,15,46,220,15,134,244,247,102, + 15,85,208,252,242,15,88,227,252,242,15,92,227,102,15,86,226,72,184,237,237, + 102,72,15,110,208,252,242,15,194,196,1,102,15,84,194,252,242,15,92,224,15, + 40,197,252,242,15,89,204,252,242,15,92,193,195,248,1,252,242,15,89,200,15, + 40,197,252,242,15,92,193,195,248,159,131,252,248,1,15,142,244,252,248,1,169, + 1,0,0,0,15,133,244,248,255,252,242,15,89,192,209,232,252,233,244,1,248,2, + 209,232,15,132,244,251,15,40,200,248,3,252,242,15,89,192,209,232,15,132,244, + 250,15,131,244,3,252,242,15,89,200,252,233,244,3,248,4,252,242,15,89,193, + 248,5,195,248,6,15,132,244,5,15,130,244,253,255,252,247,216,232,244,1,72, + 184,237,237,102,72,15,110,200,252,242,15,94,200,15,40,193,195,248,7,72,184, + 237,237,102,72,15,110,192,195,248,160,137,252,248,83,49,201,15,162,137,6, + 137,94,4,137,78,8,137,86,12,91,195,248,161,255,204,255,204,248,162,83,65, + 87,65,86,65,85,65,84,72,131,252,236,40,76,141,181,233,72,139,157,233,15,183, + 192,137,131,233,72,137,187,233,72,137,179,233,72,137,147,233,72,137,139,233, + 252,242,15,17,131,233,252,242,15,17,139,233,252,242,15,17,147,233,252,242, + 15,17,155,233,72,141,132,253,36,233,76,137,131,233,76,137,139,233,252,242, + 15,17,163,233,252,242,15,17,171,233,252,242,15,17,179,233,252,242,15,17,187, + 233,255,72,137,131,233,72,137,230,72,137,92,36,24,72,137,223,232,251,1,46, + 65,199,134,233,237,72,139,144,233,72,139,128,233,72,41,208,72,139,106,252, + 240,72,193,229,17,72,193,252,237,17,72,193,232,3,72,131,192,1,72,139,157, + 233,139,11,15,182,252,233,15,182,205,72,131,195,4,65,252,255,36,252,238,248, + 33,72,139,76,36,16,73,139,158,233,72,137,139,233,72,137,145,233,72,137,169, + 233,72,137,223,72,137,198,232,251,1,47,72,139,131,233,252,242,15,16,131,233, + 252,233,244,17,248,163,85,72,137,229,83,72,137,252,251,139,131,233,72,41, + 196,15,182,139,233,131,252,233,1,15,136,244,248,248,1,255,72,139,132,253, + 203,233,72,137,132,253,204,233,131,252,233,1,15,137,244,1,248,2,15,182,131, + 233,72,139,187,233,72,139,179,233,72,139,147,233,72,139,139,233,76,139,131, + 233,76,139,139,233,133,192,15,132,244,251,15,40,131,233,15,40,139,233,15, + 40,147,233,15,40,155,233,131,252,248,4,15,134,244,251,255,15,40,163,233,15, + 40,171,233,15,40,179,233,15,40,187,233,248,5,252,255,147,233,72,137,131,233, + 15,41,131,233,72,137,147,233,15,41,139,233,72,139,93,252,248,201,195,255, + 249,255,76,139,28,202,72,139,44,194,76,137,217,72,137,232,73,193,252,251, + 47,72,193,252,253,47,65,129,252,251,239,15,131,244,50,129,252,253,239,15, + 131,244,50,248,1,102,72,15,110,192,248,2,102,72,15,110,201,248,3,72,131,195, + 4,102,15,46,193,255,15,135,244,247,255,15,131,244,247,255,15,183,67,252,254, + 72,141,156,253,131,233,248,1,139,3,15,182,204,15,182,232,72,131,195,4,193, + 232,16,65,252,255,36,252,238,255,72,139,44,194,76,139,28,202,72,131,195,4, + 72,137,232,76,137,217,72,193,252,253,47,73,193,252,251,47,129,252,253,239, + 15,131,244,251,65,129,252,251,239,15,131,244,251,102,72,15,110,200,248,1, + 102,72,15,110,193,248,2,102,15,46,193,248,4,255,15,138,244,248,15,133,244, + 248,255,15,138,244,248,15,132,244,247,255,248,1,15,183,67,252,254,72,141, + 156,253,131,233,248,2,255,248,2,15,183,67,252,254,72,141,156,253,131,233, + 248,1,255,248,5,129,252,253,239,15,132,244,55,65,129,252,251,239,15,132,244, + 55,72,57,193,15,132,244,1,68,57,221,15,133,244,2,129,252,253,239,15,135,244, + 2,72,193,225,17,72,193,252,233,17,72,139,169,233,72,133,252,237,15,132,244, + 2,252,246,133,233,235,255,15,133,244,2,255,49,252,237,255,189,1,0,0,0,255, + 252,233,244,54,255,248,3,65,129,252,251,239,15,133,244,2,252,233,244,55,255, + 72,252,247,208,72,139,44,202,72,131,195,4,73,137,252,235,72,193,229,17,72, + 193,252,237,17,73,193,252,251,47,65,129,252,251,239,15,133,244,249,73,59, + 44,199,255,72,139,44,202,72,131,195,4,73,137,252,235,73,193,252,251,47,65, + 129,252,251,239,15,131,244,249,248,1,252,242,65,15,16,4,199,248,2,102,15, + 46,4,202,248,4,255,72,252,247,208,72,139,44,202,72,193,252,253,47,72,131, + 195,4,57,197,255,15,133,244,249,15,183,67,252,254,72,141,156,253,131,233, + 248,2,139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252, + 238,248,3,129,252,253,239,15,133,244,2,252,233,244,55,255,15,132,244,248, + 129,252,253,239,15,132,244,55,15,183,67,252,254,72,141,156,253,131,233,248, + 2,139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238, + 255,76,139,28,194,72,131,195,4,255,76,137,221,255,72,137,44,202,255,72,139, + 44,202,72,193,252,253,47,1,197,15,133,244,56,139,3,15,182,204,15,182,232, + 72,131,195,4,193,232,16,65,252,255,36,252,238,255,76,139,28,202,73,193,252, + 251,47,65,129,252,251,239,15,131,244,56,139,3,15,182,204,15,182,232,72,131, + 195,4,193,232,16,65,252,255,36,252,238,255,72,139,44,194,72,137,44,202,139, + 3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,255, + 72,139,44,194,72,193,252,253,47,184,2,0,0,0,72,129,252,253,239,131,216,0, + 72,193,224,47,72,252,247,208,72,137,4,202,139,3,15,182,204,15,182,232,72, + 131,195,4,193,232,16,65,252,255,36,252,238,255,72,139,44,194,73,137,252,235, + 73,193,252,251,47,65,129,252,251,239,15,131,244,61,72,184,237,237,72,49,197, + 72,137,44,202,139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,252, + 255,36,252,238,255,72,139,4,194,73,137,195,72,193,224,17,72,193,232,17,73, + 193,252,251,47,65,129,252,251,239,15,133,244,248,15,87,192,252,242,15,42, + 128,233,248,1,252,242,15,17,4,202,139,3,15,182,204,15,182,232,72,131,195, + 4,193,232,16,65,252,255,36,252,238,248,2,65,129,252,251,239,15,133,244,64, + 72,137,199,255,72,139,168,233,72,131,252,253,0,15,133,244,255,248,3,255,248, + 65,72,137,213,232,251,1,48,252,242,15,42,192,72,137,252,234,15,182,75,252, + 253,252,233,244,1,255,248,9,252,246,133,233,235,15,133,244,3,252,233,244, + 64,255,15,182,252,236,15,182,192,255,76,139,28,252,234,73,193,252,251,47, + 65,129,252,251,239,15,131,244,58,252,242,15,16,4,252,234,252,242,65,15,88, + 4,199,255,76,139,28,252,234,73,193,252,251,47,65,129,252,251,239,15,131,244, + 60,252,242,65,15,16,4,199,252,242,15,88,4,252,234,255,76,139,28,252,234,73, + 193,252,251,47,65,129,252,251,239,15,131,244,63,76,139,28,194,73,193,252, + 251,47,65,129,252,251,239,15,131,244,63,252,242,15,16,4,252,234,252,242,15, + 88,4,194,255,252,242,15,17,4,202,139,3,15,182,204,15,182,232,72,131,195,4, + 193,232,16,65,252,255,36,252,238,255,76,139,28,252,234,73,193,252,251,47, + 65,129,252,251,239,15,131,244,58,252,242,15,16,4,252,234,252,242,65,15,92, + 4,199,255,76,139,28,252,234,73,193,252,251,47,65,129,252,251,239,15,131,244, + 60,252,242,65,15,16,4,199,252,242,15,92,4,252,234,255,76,139,28,252,234,73, + 193,252,251,47,65,129,252,251,239,15,131,244,63,76,139,28,194,73,193,252, + 251,47,65,129,252,251,239,15,131,244,63,252,242,15,16,4,252,234,252,242,15, + 92,4,194,255,76,139,28,252,234,73,193,252,251,47,65,129,252,251,239,15,131, + 244,58,252,242,15,16,4,252,234,252,242,65,15,89,4,199,255,76,139,28,252,234, + 73,193,252,251,47,65,129,252,251,239,15,131,244,60,252,242,65,15,16,4,199, + 252,242,15,89,4,252,234,255,76,139,28,252,234,73,193,252,251,47,65,129,252, + 251,239,15,131,244,63,76,139,28,194,73,193,252,251,47,65,129,252,251,239, + 15,131,244,63,252,242,15,16,4,252,234,252,242,15,89,4,194,255,76,139,28,252, + 234,73,193,252,251,47,65,129,252,251,239,15,131,244,58,252,242,15,16,4,252, + 234,252,242,65,15,94,4,199,255,76,139,28,252,234,73,193,252,251,47,65,129, + 252,251,239,15,131,244,60,252,242,65,15,16,4,199,252,242,15,94,4,252,234, + 255,76,139,28,252,234,73,193,252,251,47,65,129,252,251,239,15,131,244,63, + 76,139,28,194,73,193,252,251,47,65,129,252,251,239,15,131,244,63,252,242, + 15,16,4,252,234,252,242,15,94,4,194,255,76,139,28,252,234,73,193,252,251, + 47,65,129,252,251,239,15,131,244,58,252,242,15,16,4,252,234,252,242,65,15, + 16,12,199,255,76,139,28,252,234,73,193,252,251,47,65,129,252,251,239,15,131, + 244,60,252,242,65,15,16,4,199,252,242,15,16,12,252,234,255,76,139,28,252, + 234,73,193,252,251,47,65,129,252,251,239,15,131,244,63,76,139,28,194,73,193, + 252,251,47,65,129,252,251,239,15,131,244,63,252,242,15,16,4,252,234,252,242, + 15,16,12,194,255,248,164,232,244,158,252,242,15,17,4,202,139,3,15,182,204, + 15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,255,252,233,244, + 164,255,72,137,213,232,251,1,30,15,182,75,252,253,72,137,252,234,252,242, + 15,17,4,202,139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,252,255, + 36,252,238,255,15,182,252,236,15,182,192,72,139,124,36,16,72,137,151,233, + 72,141,52,194,137,194,41,252,234,248,37,72,137,252,253,72,137,92,36,24,232, + 251,1,49,72,139,149,233,72,133,192,15,133,244,51,15,182,107,252,255,15,182, + 75,252,253,72,139,4,252,234,72,137,4,202,139,3,15,182,204,15,182,232,72,131, + 195,4,193,232,16,65,252,255,36,252,238,255,72,252,247,208,73,139,4,199,73, + 187,237,237,76,9,216,72,137,4,202,139,3,15,182,204,15,182,232,72,131,195, + 4,193,232,16,65,252,255,36,252,238,255,15,191,192,252,242,15,42,192,252,242, + 15,17,4,202,139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,252,255, + 36,252,238,255,252,242,65,15,16,4,199,252,242,15,17,4,202,139,3,15,182,204, + 15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,255,72,141,76,202, + 8,72,141,4,194,72,199,197,237,72,137,105,252,248,248,1,72,137,41,72,131,193, + 8,72,57,193,15,134,244,1,139,3,15,182,204,15,182,232,72,131,195,4,193,232, + 16,65,252,255,36,252,238,255,72,139,106,252,240,72,193,229,17,72,193,252, + 237,17,72,139,172,253,197,233,72,139,173,233,72,139,69,0,72,137,4,202,139, + 3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,255, + 72,139,106,252,240,72,193,229,17,72,193,252,237,17,72,139,172,253,205,233, + 128,189,233,0,72,139,173,233,72,139,12,194,72,137,77,0,15,132,244,247,252, + 246,133,233,235,15,133,244,248,248,1,139,3,15,182,204,15,182,232,72,131,195, + 4,193,232,16,65,252,255,36,252,238,248,2,72,137,200,72,193,252,248,47,129, + 232,239,129,252,248,239,15,134,244,1,72,193,225,17,72,193,252,233,17,252, + 246,129,233,235,15,132,244,1,255,72,137,252,238,72,137,213,73,141,190,233, + 232,251,1,50,72,137,252,234,252,233,244,1,255,72,252,247,208,72,139,106,252, + 240,72,193,229,17,72,193,252,237,17,72,139,172,253,205,233,73,139,12,199, + 72,139,133,233,73,187,237,237,73,9,203,76,137,24,252,246,133,233,235,15,133, + 244,248,248,1,139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,252, + 255,36,252,238,248,2,252,246,129,233,235,15,132,244,1,128,189,233,0,15,132, + 244,1,72,137,213,72,137,198,73,141,190,233,232,251,1,50,72,137,252,234,252, + 233,244,1,255,72,139,106,252,240,72,193,229,17,72,193,252,237,17,252,242, + 65,15,16,4,199,72,139,172,253,205,233,72,139,141,233,252,242,15,17,1,139, + 3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,255, + 72,139,106,252,240,72,193,229,17,72,193,252,237,17,72,139,172,253,205,233, + 72,193,224,47,72,252,247,208,72,139,141,233,72,137,1,139,3,15,182,204,15, + 182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,255,72,141,156,253, + 131,233,72,139,108,36,16,72,131,189,233,0,15,132,244,247,72,137,149,233,72, + 141,52,202,72,137,252,239,232,251,1,51,72,139,149,233,248,1,139,3,15,182, + 204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,255,72,252,247, + 208,72,139,108,36,16,72,137,149,233,72,139,82,252,240,72,193,226,17,72,193, + 252,234,17,73,139,52,199,72,137,252,239,72,137,92,36,24,232,251,1,52,72,139, + 149,233,15,182,75,252,253,73,187,237,237,76,9,216,72,137,4,202,139,3,15,182, + 204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,255,72,139,108, + 36,16,72,137,149,233,73,139,142,233,73,59,142,233,72,137,92,36,24,15,131, + 244,251,248,1,137,194,37,252,255,7,0,0,193,252,234,11,61,252,255,7,0,0,15, + 132,244,249,248,2,72,137,252,239,137,198,232,251,1,53,72,139,149,233,15,182, + 75,252,253,73,187,237,237,76,9,216,72,137,4,202,139,3,15,182,204,15,182,232, + 72,131,195,4,193,232,16,65,252,255,36,252,238,248,3,184,1,8,0,0,252,233,244, + 2,248,5,72,137,252,239,232,251,1,54,15,183,67,252,254,252,233,244,1,255,72, + 252,247,208,72,139,108,36,16,73,139,142,233,72,137,92,36,24,73,59,142,233, + 72,137,149,233,15,131,244,249,248,2,73,139,52,199,72,137,252,239,232,251, + 1,55,72,139,149,233,15,182,75,252,253,73,187,237,237,76,9,216,72,137,4,202, + 139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238, + 248,3,72,137,252,239,232,251,1,54,15,183,67,252,254,72,252,247,208,252,233, + 244,2,255,72,252,247,208,72,139,106,252,240,72,193,229,17,72,193,252,237, + 17,72,139,173,233,73,139,4,199,252,233,244,165,255,72,252,247,208,72,139, + 106,252,240,72,193,229,17,72,193,252,237,17,72,139,173,233,73,139,4,199,252, + 233,244,166,255,15,182,252,236,15,182,192,72,139,44,252,234,72,139,4,194, + 73,137,252,235,72,193,229,17,72,193,252,237,17,73,193,252,251,47,65,129,252, + 251,239,15,133,244,40,73,137,195,73,193,252,251,47,65,129,252,251,239,15, + 131,244,251,102,72,15,110,192,252,242,15,44,192,252,242,15,42,200,102,15, + 46,193,15,133,244,40,59,133,233,15,131,244,40,193,224,3,72,3,133,233,76,139, + 24,73,129,252,251,239,15,132,244,248,248,1,76,137,28,202,139,3,15,182,204, + 15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,248,2,76,139,149, + 233,77,133,210,15,132,244,1,255,65,252,246,130,233,235,15,132,244,40,252, + 233,244,1,248,5,65,129,252,251,239,15,133,244,40,72,193,224,17,72,193,232, + 17,252,233,244,165,255,15,182,252,236,15,182,192,72,139,44,252,234,72,252, + 247,208,73,139,4,199,73,137,252,235,72,193,229,17,72,193,252,237,17,73,193, + 252,251,47,65,129,252,251,239,15,133,244,38,248,165,68,139,149,233,68,35, + 144,233,69,105,210,239,76,3,149,233,73,187,237,237,73,9,195,248,1,77,57,154, + 233,15,133,244,250,77,139,154,233,73,129,252,251,239,15,132,244,251,248,2, + 255,76,137,28,202,139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65, + 252,255,36,252,238,248,4,77,139,146,233,77,133,210,15,133,244,1,73,199,195, + 237,248,5,76,139,149,233,77,133,210,15,132,244,2,65,252,246,130,233,235,15, + 133,244,2,252,233,244,38,255,15,182,252,236,15,182,192,72,139,44,252,234, + 73,137,252,235,72,193,229,17,72,193,252,237,17,73,193,252,251,47,65,129,252, + 251,239,15,133,244,39,59,133,233,15,131,244,39,193,224,3,72,3,133,233,76, + 139,24,73,129,252,251,239,15,132,244,248,248,1,76,137,28,202,139,3,15,182, + 204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,248,2,76,139, + 149,233,77,133,210,15,132,244,1,65,252,246,130,233,235,15,132,244,39,255, + 15,182,252,236,15,182,192,72,139,44,252,234,72,193,229,17,72,193,252,237, + 17,252,242,15,44,4,194,59,133,233,15,131,244,41,193,224,3,72,3,133,233,248, + 42,76,139,24,248,43,76,137,28,202,139,3,15,182,204,15,182,232,72,131,195, + 4,193,232,16,65,252,255,36,252,238,255,15,182,252,236,15,182,192,72,139,44, + 252,234,72,139,4,194,73,137,252,235,72,193,229,17,72,193,252,237,17,73,193, + 252,251,47,65,129,252,251,239,15,133,244,46,73,137,195,73,193,252,251,47, + 65,129,252,251,239,15,131,244,251,102,72,15,110,192,252,242,15,44,192,252, + 242,15,42,200,102,15,46,193,15,133,244,46,59,133,233,15,131,244,46,193,224, + 3,72,3,133,233,72,129,56,239,15,132,244,249,248,1,252,246,133,233,235,15, + 133,244,253,248,2,255,72,139,44,202,72,137,40,139,3,15,182,204,15,182,232, + 72,131,195,4,193,232,16,65,252,255,36,252,238,248,3,76,139,149,233,77,133, + 210,15,132,244,1,65,252,246,130,233,235,15,132,244,46,252,233,244,1,248,5, + 65,129,252,251,239,15,133,244,46,72,193,224,17,72,193,232,17,252,233,244, + 166,248,7,128,165,233,235,255,77,139,150,233,73,137,174,233,76,137,149,233, + 252,233,244,2,255,15,182,252,236,15,182,192,72,139,44,252,234,72,252,247, + 208,73,139,4,199,73,137,252,235,72,193,229,17,72,193,252,237,17,73,193,252, + 251,47,65,129,252,251,239,15,133,244,44,248,166,68,139,149,233,68,35,144, + 233,69,105,210,239,198,133,233,0,76,3,149,233,73,187,237,237,73,9,195,248, + 1,77,57,154,233,15,133,244,251,73,129,58,239,15,132,244,250,248,2,255,252, + 246,133,233,235,15,133,244,253,248,3,76,139,28,202,77,137,26,139,3,15,182, + 204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,248,4,76,139, + 157,233,77,133,219,15,132,244,2,65,252,246,131,233,235,15,132,244,44,252, + 233,244,2,248,5,77,139,146,233,77,133,210,15,133,244,1,255,76,139,149,233, + 77,133,210,15,132,244,252,65,252,246,130,233,235,15,132,244,44,248,6,76,137, + 28,36,72,139,124,36,16,72,137,151,233,72,141,20,36,72,137,252,238,72,137, + 92,36,24,232,251,1,56,72,139,124,36,16,72,139,151,233,73,137,194,15,182,75, + 252,253,252,233,244,2,248,7,128,165,233,235,77,139,158,233,73,137,174,233, + 76,137,157,233,252,233,244,3,255,15,182,252,236,15,182,192,72,139,44,252, + 234,73,137,252,235,72,193,229,17,72,193,252,237,17,73,193,252,251,47,65,129, + 252,251,239,15,133,244,45,59,133,233,15,131,244,45,193,224,3,72,3,133,233, + 72,129,56,239,15,132,244,249,248,1,252,246,133,233,235,15,133,244,253,248, + 2,76,139,28,202,76,137,24,139,3,15,182,204,15,182,232,72,131,195,4,193,232, + 16,65,252,255,36,252,238,248,3,76,139,149,233,77,133,210,15,132,244,1,255, + 65,252,246,130,233,235,15,132,244,45,252,233,244,1,248,7,128,165,233,235, + 77,139,150,233,73,137,174,233,76,137,149,233,252,233,244,2,255,15,182,252, + 236,15,182,192,72,139,44,252,234,72,193,229,17,72,193,252,237,17,252,242, + 15,44,4,194,252,246,133,233,235,15,133,244,253,248,2,59,133,233,15,131,244, + 48,193,224,3,72,3,133,233,248,49,76,139,28,202,76,137,24,139,3,15,182,204, + 15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,248,7,128,165,233, + 235,77,139,150,233,73,137,174,233,76,137,149,233,252,233,244,2,255,248,1, + 69,139,20,199,72,141,12,202,72,139,105,252,248,72,193,229,17,72,193,252,237, + 17,252,246,133,233,235,15,133,244,253,248,2,139,4,36,131,232,1,15,132,244, + 250,68,1,208,59,133,233,15,135,244,251,68,41,208,65,193,226,3,76,3,149,233, + 248,3,72,139,41,72,131,193,8,73,137,42,73,131,194,8,131,232,1,15,133,244, + 3,248,4,139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,252,255,36, + 252,238,248,5,72,139,124,36,16,72,137,151,233,72,137,252,238,137,194,72,137, + 252,253,72,137,92,36,24,232,251,1,57,72,139,149,233,255,15,182,75,252,253, + 15,183,67,252,254,252,233,244,1,248,7,128,165,233,235,73,139,134,233,73,137, + 174,233,72,137,133,233,252,233,244,2,255,3,4,36,255,72,139,44,202,73,137, + 252,235,72,193,229,17,72,193,252,237,17,73,193,252,251,47,65,129,252,251, + 239,15,133,244,66,72,141,84,202,16,72,137,90,252,248,72,139,157,233,139,11, + 15,182,252,233,15,182,205,72,131,195,4,65,252,255,36,252,238,255,72,141,76, + 202,16,73,137,215,72,139,105,252,240,73,137,252,235,73,193,252,251,47,65, + 129,252,251,239,15,133,244,30,248,67,72,139,90,252,248,252,247,195,237,15, + 133,244,253,248,1,72,137,106,252,240,137,4,36,131,232,1,15,132,244,249,248, + 2,72,139,41,72,131,193,8,73,137,47,73,131,199,8,131,232,1,15,133,244,2,72, + 139,106,252,240,248,3,72,193,229,17,72,193,252,237,17,139,4,36,128,189,233, + 1,15,135,244,251,248,4,72,139,157,233,255,139,11,15,182,252,233,15,182,205, + 72,131,195,4,65,252,255,36,252,238,248,5,252,247,195,237,15,133,244,4,15, + 182,75,252,253,72,252,247,217,76,139,124,202,224,73,193,231,17,73,193,252, + 239,17,77,139,191,233,77,139,191,233,252,233,244,4,248,7,72,129,252,235,239, + 252,247,195,237,15,133,244,254,72,41,218,73,137,215,72,139,90,252,248,252, + 233,244,1,248,8,129,195,239,252,233,244,1,255,72,141,76,202,16,72,139,105, + 224,72,139,65,232,72,137,41,72,137,65,8,72,139,105,216,72,137,105,252,240, + 184,237,73,137,252,235,72,193,229,17,72,193,252,237,17,73,193,252,251,47, + 65,129,252,251,239,15,133,244,30,72,137,202,72,137,90,252,248,72,139,157, + 233,139,11,15,182,252,233,15,182,205,72,131,195,4,65,252,255,36,252,238,255, + 72,139,108,202,252,240,72,193,229,17,72,193,252,237,17,139,68,202,252,248, + 68,139,149,233,72,131,195,4,76,139,157,233,248,1,68,57,208,15,131,244,251, + 73,129,60,253,195,239,15,132,244,250,252,242,15,42,192,73,139,44,195,72,137, + 108,202,8,252,242,15,17,4,202,131,192,1,137,68,202,252,248,248,2,15,183,67, + 252,254,72,141,156,253,131,233,248,3,139,3,15,182,204,15,182,232,72,131,195, + 4,193,232,16,65,252,255,36,252,238,248,4,131,192,1,252,233,244,1,248,5,68, + 41,208,248,6,59,133,233,15,135,244,3,255,68,105,216,239,76,3,157,233,73,129, + 187,233,239,15,132,244,253,70,141,84,16,1,73,139,171,233,73,139,131,233,72, + 137,44,202,72,137,68,202,8,68,137,84,202,252,248,252,233,244,2,248,7,131, + 192,1,252,233,244,6,255,72,139,108,202,232,73,137,252,235,72,193,229,17,72, + 193,252,237,17,73,193,252,251,47,65,129,252,251,239,15,133,244,251,76,139, + 92,202,252,240,73,193,252,251,47,65,129,252,251,239,15,133,244,251,72,129, + 124,253,202,252,248,239,15,133,244,251,128,189,233,235,15,133,244,251,72, + 141,156,253,131,233,73,186,237,237,76,137,84,202,252,248,248,1,139,3,15,182, + 204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,248,5,198,67, + 252,252,235,255,72,141,156,253,131,233,198,3,235,252,233,244,1,255,15,182, + 252,236,15,182,192,76,141,148,253,194,233,72,141,12,202,76,43,82,252,248, + 72,133,252,237,15,132,244,251,72,141,108,252,233,252,248,73,57,210,15,131, + 244,248,248,1,73,139,66,252,240,73,131,194,8,72,137,1,72,131,193,8,72,57, + 252,233,15,131,244,249,73,57,210,15,130,244,1,248,2,72,199,1,237,72,131,193, + 8,72,57,252,233,15,130,244,2,248,3,139,3,15,182,204,15,182,232,72,131,195, + 4,193,232,16,65,252,255,36,252,238,248,5,199,4,36,1,0,0,0,72,137,208,76,41, + 208,15,134,244,3,137,197,193,252,237,3,131,197,1,137,44,36,72,139,108,36, + 16,72,1,200,72,59,133,233,255,15,135,244,253,248,6,73,139,66,252,240,73,131, + 194,8,72,137,1,72,131,193,8,73,57,210,15,130,244,6,252,233,244,3,248,7,72, + 137,149,233,72,137,141,233,72,137,92,36,24,73,41,210,68,137,84,36,4,139,52, + 36,131,252,238,1,72,137,252,239,232,251,1,0,72,139,149,233,76,99,84,36,4, + 72,139,141,233,73,1,210,252,233,244,6,255,193,225,3,255,248,1,72,139,90,252, + 248,137,4,36,252,247,195,237,15,133,244,253,255,248,14,73,137,215,131,232, + 1,15,132,244,249,248,2,73,139,44,15,73,137,111,252,240,73,131,199,8,131,232, + 1,15,133,244,2,248,3,139,4,36,15,182,107,252,255,248,5,57,197,15,135,244, + 252,255,72,139,44,10,72,137,106,252,240,255,248,5,56,67,252,255,15,135,244, + 252,255,15,182,75,252,253,72,252,247,217,72,141,84,202,252,240,76,139,122, + 252,240,73,193,231,17,73,193,252,239,17,77,139,191,233,77,139,191,233,139, + 3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,248, + 6,255,73,199,71,252,240,237,73,131,199,8,255,72,199,68,194,232,237,255,72, + 131,192,1,252,233,244,5,248,7,72,141,171,233,252,247,197,237,15,133,244,15, + 72,41,252,234,255,72,1,252,233,255,137,221,209,252,237,129,229,239,102,65, + 129,172,253,46,233,238,15,130,244,147,255,72,141,12,202,255,76,139,25,73, + 193,252,251,47,65,129,252,251,239,15,131,244,68,76,139,89,8,73,193,252,251, + 47,65,129,252,251,239,15,131,244,68,255,76,139,89,8,73,193,252,251,47,65, + 129,252,251,239,15,131,244,161,76,139,89,16,73,193,252,251,47,65,129,252, + 251,239,15,131,244,161,255,72,139,105,16,255,73,137,252,235,73,193,252,251, + 47,65,129,252,251,239,15,131,244,68,255,252,242,15,16,1,252,242,15,16,73, + 8,255,252,242,15,88,65,16,252,242,15,17,1,72,133,252,237,15,136,244,249,255, + 15,140,244,249,255,102,15,46,200,248,1,252,242,15,17,65,24,255,15,131,244, + 248,72,141,156,253,131,233,255,72,141,156,253,131,233,15,183,67,252,254,15, + 131,245,255,15,130,244,248,72,141,156,253,131,233,255,248,2,139,3,15,182, + 204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252,238,248,3,102,15, + 46,193,252,233,244,1,255,72,141,12,202,72,139,41,72,129,252,253,239,15,132, + 244,247,255,72,137,105,252,248,252,233,245,255,72,141,156,253,131,233,72, + 137,105,252,248,255,73,139,142,233,72,139,4,193,72,139,128,233,72,139,108, + 36,16,73,137,150,233,73,137,174,233,72,131,252,236,16,76,137,100,36,16,76, + 137,108,36,8,252,255,224,255,72,141,156,253,131,233,139,3,15,182,204,15,182, + 232,72,131,195,4,193,232,16,65,252,255,36,252,238,255,137,221,209,252,237, + 129,229,239,102,65,129,172,253,46,233,238,15,130,244,149,255,76,139,187,233, + 72,139,108,36,16,72,141,12,202,72,59,141,233,15,135,244,25,15,182,139,233, + 57,200,15,134,244,249,248,2,255,15,183,67,252,254,252,233,245,255,248,3,72, + 199,68,194,252,248,237,131,192,1,57,200,15,134,244,3,252,233,244,2,255,141, + 44,197,237,72,141,68,194,8,76,139,122,252,240,72,137,104,252,248,76,137,120, + 252,240,72,139,108,36,16,72,141,12,200,72,59,141,233,15,135,244,24,72,137, + 209,72,137,194,15,182,171,233,133,252,237,15,132,244,248,72,131,193,8,248, + 1,72,131,193,8,72,57,209,15,131,244,249,76,139,121,252,240,76,137,56,72,131, + 192,8,72,199,65,252,240,237,131,252,237,1,15,133,244,1,248,2,255,76,139,187, + 233,139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,252,255,36,252, + 238,255,248,3,72,199,0,237,72,131,192,8,131,252,237,1,15,133,244,3,252,233, + 244,2,255,72,139,106,252,240,72,193,229,17,72,193,252,237,17,76,139,189,233, + 72,139,108,36,16,72,141,68,194,252,248,72,137,149,233,72,141,136,233,72,59, + 141,233,72,137,133,233,255,72,137,252,239,255,76,137,252,254,72,137,252,239, + 255,15,135,244,23,65,199,134,233,237,255,65,252,255,215,255,65,252,255,150, + 233,255,72,139,149,233,73,137,174,233,65,199,134,233,237,72,141,12,194,72, + 252,247,217,72,3,141,233,72,139,90,252,248,252,233,244,12,255,254,0 +}; + +#line 9 "vm_x64.dasc" +//|.globals GLOB_ +enum { + GLOB_vm_returnp, + GLOB_cont_dispatch, + GLOB_vm_returnc, + GLOB_vm_unwind_yield, + GLOB_BC_RET_Z, + GLOB_vm_return, + GLOB_vm_leave_cp, + GLOB_vm_leave_unw, + GLOB_vm_unwind_c_eh, + GLOB_vm_unwind_c, + GLOB_vm_unwind_rethrow, + GLOB_vm_unwind_ff, + GLOB_vm_unwind_ff_eh, + GLOB_vm_growstack_c, + GLOB_vm_growstack_v, + GLOB_vm_growstack_f, + GLOB_vm_resume, + GLOB_vm_pcall, + GLOB_vm_call, + GLOB_vm_call_dispatch, + GLOB_vmeta_call, + GLOB_vm_call_dispatch_f, + GLOB_vm_cpcall, + GLOB_cont_ffi_callback, + GLOB_vm_call_tail, + GLOB_cont_cat, + GLOB_cont_ra, + GLOB_BC_CAT_Z, + GLOB_vmeta_tgets, + GLOB_vmeta_tgetb, + GLOB_vmeta_tgetv, + GLOB_vmeta_tgetr, + GLOB_BC_TGETR_Z, + GLOB_BC_TGETR2_Z, + GLOB_vmeta_tsets, + GLOB_vmeta_tsetb, + GLOB_vmeta_tsetv, + GLOB_cont_nop, + GLOB_vmeta_tsetr, + GLOB_BC_TSETR_Z, + GLOB_vmeta_comp, + GLOB_vmeta_binop, + GLOB_cont_condt, + GLOB_cont_condf, + GLOB_vmeta_equal, + GLOB_vmeta_equal_cd, + GLOB_vmeta_istype, + GLOB_vmeta_arith_vno, + GLOB_vmeta_arith_vn, + GLOB_vmeta_arith_nvo, + GLOB_vmeta_arith_nv, + GLOB_vmeta_unm, + GLOB_vmeta_arith_vvo, + GLOB_vmeta_arith_vv, + GLOB_vmeta_len, + GLOB_BC_LEN_Z, + GLOB_vmeta_call_ra, + GLOB_BC_CALLT_Z, + GLOB_vmeta_for, + GLOB_ff_assert, + GLOB_fff_fallback, + GLOB_fff_res_, + GLOB_ff_type, + GLOB_fff_res1, + GLOB_ff_getmetatable, + GLOB_ff_setmetatable, + GLOB_ff_rawget, + GLOB_ff_tonumber, + GLOB_ff_tostring, + GLOB_fff_fallback_1, + GLOB_fff_gcstep, + GLOB_ff_next, + GLOB_fff_res2, + GLOB_fff_res, + GLOB_ff_pairs, + GLOB_ff_ipairs_aux, + GLOB_fff_res0, + GLOB_ff_ipairs, + GLOB_ff_pcall, + GLOB_ff_xpcall, + GLOB_ff_coroutine_resume, + GLOB_ff_coroutine_wrap_aux, + GLOB_ff_coroutine_yield, + GLOB_ff_math_abs, + GLOB_ff_math_sqrt, + GLOB_fff_resxmm0, + GLOB_ff_math_floor, + GLOB_vm_floor_sse, + GLOB_ff_math_ceil, + GLOB_vm_ceil_sse, + GLOB_ff_math_log, + GLOB_ff_math_log10, + GLOB_ff_math_exp, + GLOB_ff_math_sin, + GLOB_ff_math_cos, + GLOB_ff_math_tan, + GLOB_ff_math_asin, + GLOB_ff_math_acos, + GLOB_ff_math_atan, + GLOB_ff_math_sinh, + GLOB_ff_math_cosh, + GLOB_ff_math_tanh, + GLOB_ff_math_pow, + GLOB_ff_math_atan2, + GLOB_ff_math_fmod, + GLOB_ff_math_ldexp, + GLOB_ff_math_frexp, + GLOB_ff_math_modf, + GLOB_ff_math_min, + GLOB_ff_math_max, + GLOB_ff_string_byte, + GLOB_ff_string_char, + GLOB_fff_newstr, + GLOB_fff_resstr, + GLOB_ff_string_sub, + GLOB_fff_emptystr, + GLOB_ff_string_reverse, + GLOB_ff_string_lower, + GLOB_ff_string_upper, + GLOB_ff_bit_tobit, + GLOB_fff_resbit, + GLOB_ff_bit_band, + GLOB_fff_fallback_bit_op, + GLOB_ff_bit_bor, + GLOB_ff_bit_bxor, + GLOB_ff_bit_bswap, + GLOB_ff_bit_bnot, + GLOB_ff_bit_lshift, + GLOB_ff_bit_rshift, + GLOB_ff_bit_arshift, + GLOB_ff_bit_rol, + GLOB_ff_bit_ror, + GLOB_fff_fallback_2, + GLOB_vm_record, + GLOB_vm_rethook, + GLOB_vm_inshook, + GLOB_cont_hook, + GLOB_vm_hotloop, + GLOB_vm_callhook, + GLOB_vm_hotcall, + GLOB_cont_stitch, + GLOB_vm_exit_handler, + GLOB_vm_exit_interp, + GLOB_vm_exit_interp_notrack, + GLOB_vm_floor, + GLOB_vm_ceil, + GLOB_vm_trunc, + GLOB_vm_trunc_sse, + GLOB_vm_mod, + GLOB_vm_powi_sse, + GLOB_vm_cpuid, + GLOB_assert_bad_for_arg_type, + GLOB_vm_ffi_callback, + GLOB_vm_ffi_call, + GLOB_BC_MODVN_Z, + GLOB_BC_TGETS_Z, + GLOB_BC_TSETS_Z, + GLOB__MAX +}; +#line 10 "vm_x64.dasc" +//|.globalnames globnames +static const char *const globnames[] = { + "vm_returnp", + "cont_dispatch", + "vm_returnc", + "vm_unwind_yield", + "BC_RET_Z", + "vm_return", + "vm_leave_cp", + "vm_leave_unw", + "vm_unwind_c_eh", + "vm_unwind_c", + "vm_unwind_rethrow", + "vm_unwind_ff", + "vm_unwind_ff_eh", + "vm_growstack_c", + "vm_growstack_v", + "vm_growstack_f", + "vm_resume", + "vm_pcall", + "vm_call", + "vm_call_dispatch", + "vmeta_call", + "vm_call_dispatch_f", + "vm_cpcall", + "cont_ffi_callback", + "vm_call_tail", + "cont_cat", + "cont_ra", + "BC_CAT_Z", + "vmeta_tgets", + "vmeta_tgetb", + "vmeta_tgetv", + "vmeta_tgetr", + "BC_TGETR_Z", + "BC_TGETR2_Z", + "vmeta_tsets", + "vmeta_tsetb", + "vmeta_tsetv", + "cont_nop", + "vmeta_tsetr", + "BC_TSETR_Z", + "vmeta_comp", + "vmeta_binop", + "cont_condt", + "cont_condf", + "vmeta_equal", + "vmeta_equal_cd", + "vmeta_istype", + "vmeta_arith_vno", + "vmeta_arith_vn", + "vmeta_arith_nvo", + "vmeta_arith_nv", + "vmeta_unm", + "vmeta_arith_vvo", + "vmeta_arith_vv", + "vmeta_len", + "BC_LEN_Z", + "vmeta_call_ra", + "BC_CALLT_Z", + "vmeta_for", + "ff_assert", + "fff_fallback", + "fff_res_", + "ff_type", + "fff_res1", + "ff_getmetatable", + "ff_setmetatable", + "ff_rawget", + "ff_tonumber", + "ff_tostring", + "fff_fallback_1", + "fff_gcstep", + "ff_next", + "fff_res2", + "fff_res", + "ff_pairs", + "ff_ipairs_aux", + "fff_res0", + "ff_ipairs", + "ff_pcall", + "ff_xpcall", + "ff_coroutine_resume", + "ff_coroutine_wrap_aux", + "ff_coroutine_yield", + "ff_math_abs", + "ff_math_sqrt", + "fff_resxmm0", + "ff_math_floor", + "vm_floor_sse", + "ff_math_ceil", + "vm_ceil_sse", + "ff_math_log", + "ff_math_log10", + "ff_math_exp", + "ff_math_sin", + "ff_math_cos", + "ff_math_tan", + "ff_math_asin", + "ff_math_acos", + "ff_math_atan", + "ff_math_sinh", + "ff_math_cosh", + "ff_math_tanh", + "ff_math_pow", + "ff_math_atan2", + "ff_math_fmod", + "ff_math_ldexp", + "ff_math_frexp", + "ff_math_modf", + "ff_math_min", + "ff_math_max", + "ff_string_byte", + "ff_string_char", + "fff_newstr", + "fff_resstr", + "ff_string_sub", + "fff_emptystr", + "ff_string_reverse", + "ff_string_lower", + "ff_string_upper", + "ff_bit_tobit", + "fff_resbit", + "ff_bit_band", + "fff_fallback_bit_op", + "ff_bit_bor", + "ff_bit_bxor", + "ff_bit_bswap", + "ff_bit_bnot", + "ff_bit_lshift", + "ff_bit_rshift", + "ff_bit_arshift", + "ff_bit_rol", + "ff_bit_ror", + "fff_fallback_2", + "vm_record", + "vm_rethook", + "vm_inshook", + "cont_hook", + "vm_hotloop", + "vm_callhook", + "vm_hotcall", + "cont_stitch", + "vm_exit_handler", + "vm_exit_interp", + "vm_exit_interp_notrack", + "vm_floor", + "vm_ceil", + "vm_trunc", + "vm_trunc_sse", + "vm_mod", + "vm_powi_sse", + "vm_cpuid", + "assert_bad_for_arg_type", + "vm_ffi_callback", + "vm_ffi_call", + "BC_MODVN_Z", + "BC_TGETS_Z", + "BC_TSETS_Z", + (const char *)0 +}; +#line 11 "vm_x64.dasc" +//|.externnames extnames +static const char *const extnames[] = { + "lj_state_growstack", + "lj_err_throw", + "lj_meta_tget", + "lj_tab_getinth", + "lj_meta_tset", + "lj_tab_setinth", + "lj_meta_comp", + "lj_meta_equal", + "lj_meta_equal_cd", + "lj_meta_istype", + "lj_meta_arith", + "lj_meta_len", + "lj_meta_call", + "lj_meta_for", + "lj_tab_get", + "lj_strfmt_num", + "lj_tab_next", + "lj_ffh_coroutine_wrap_err", + "log", + "log10", + "exp", + "sin", + "cos", + "tan", + "asin", + "acos", + "atan", + "sinh", + "cosh", + "tanh", + "pow", + "atan2", + "fmod", + "frexp", + "modf", + "lj_str_new", + "lj_buf_putstr_reverse", + "lj_buf_tostr", + "lj_buf_putstr_lower", + "lj_buf_putstr_upper", + "lj_gc_step", + "lj_dispatch_ins", + "lj_trace_hot", + "lj_dispatch_call", + "lj_dispatch_stitch", + "lj_trace_exit", + "lj_ccallback_enter", + "lj_ccallback_leave", + "lj_tab_len", + "lj_meta_cat", + "lj_gc_barrieruv", + "lj_func_closeuv", + "lj_func_newL_gc", + "lj_tab_new", + "lj_gc_step_fixtop", + "lj_tab_dup", + "lj_tab_newkey", + "lj_tab_reasize", + (const char *)0 +}; +#line 12 "vm_x64.dasc" +//| +//|//----------------------------------------------------------------------- +//| +//|// Fixed register assignments for the interpreter. +//|// This is very fragile and has many dependencies. Caveat emptor. +//|.define BASE, rdx // Not C callee-save, refetched anyway. +//|.define KBASE, r15 // Must be C callee-save. +//|.define PC, rbx // Must be C callee-save. +//|.define DISPATCH, r14 // Must be C callee-save. +//|.define KBASEd, r15d +//|.define PCd, ebx +//|.define DISPATCHd, r14d +//| +//|.define RA, rcx +//|.define RAd, ecx +//|.define RAH, ch +//|.define RAL, cl +//|.define RB, rbp // Must be rbp (C callee-save). +//|.define RBd, ebp +//|.define RC, rax // Must be rax. +//|.define RCd, eax +//|.define RCW, ax +//|.define RCH, ah +//|.define RCL, al +//|.define OP, RBd +//|.define RD, RC +//|.define RDd, RCd +//|.define RDW, RCW +//|.define RDL, RCL +//|.define TMPR, r10 +//|.define TMPRd, r10d +//|.define ITYPE, r11 +//|.define ITYPEd, r11d +//| +//|.define CARG1, rdi // x64/POSIX C call arguments. +//|.define CARG2, rsi +//|.define CARG3, rdx +//|.define CARG4, rcx +//|.define CARG5, r8 +//|.define CARG6, r9 +//|.define CARG1d, edi +//|.define CARG2d, esi +//|.define CARG3d, edx +//|.define CARG4d, ecx +//|.define CARG5d, r8d +//|.define CARG6d, r9d +//| +//|// Type definitions. Some of these are only used for documentation. +//|.type L, lua_State +#define Dt1(_V) (int)(ptrdiff_t)&(((lua_State *)0)_V) +#line 61 "vm_x64.dasc" +//|.type GL, global_State +#define Dt2(_V) (int)(ptrdiff_t)&(((global_State *)0)_V) +#line 62 "vm_x64.dasc" +//|.type TVALUE, TValue +#define Dt3(_V) (int)(ptrdiff_t)&(((TValue *)0)_V) +#line 63 "vm_x64.dasc" +//|.type GCOBJ, GCobj +#define Dt4(_V) (int)(ptrdiff_t)&(((GCobj *)0)_V) +#line 64 "vm_x64.dasc" +//|.type STR, GCstr +#define Dt5(_V) (int)(ptrdiff_t)&(((GCstr *)0)_V) +#line 65 "vm_x64.dasc" +//|.type TAB, GCtab +#define Dt6(_V) (int)(ptrdiff_t)&(((GCtab *)0)_V) +#line 66 "vm_x64.dasc" +//|.type LFUNC, GCfuncL +#define Dt7(_V) (int)(ptrdiff_t)&(((GCfuncL *)0)_V) +#line 67 "vm_x64.dasc" +//|.type CFUNC, GCfuncC +#define Dt8(_V) (int)(ptrdiff_t)&(((GCfuncC *)0)_V) +#line 68 "vm_x64.dasc" +//|.type PROTO, GCproto +#define Dt9(_V) (int)(ptrdiff_t)&(((GCproto *)0)_V) +#line 69 "vm_x64.dasc" +//|.type UPVAL, GCupval +#define DtA(_V) (int)(ptrdiff_t)&(((GCupval *)0)_V) +#line 70 "vm_x64.dasc" +//|.type NODE, Node +#define DtB(_V) (int)(ptrdiff_t)&(((Node *)0)_V) +#line 71 "vm_x64.dasc" +//|.type NARGS, int +#define DtC(_V) (int)(ptrdiff_t)&(((int *)0)_V) +#line 72 "vm_x64.dasc" +//|.type TRACE, GCtrace +#define DtD(_V) (int)(ptrdiff_t)&(((GCtrace *)0)_V) +#line 73 "vm_x64.dasc" +//|.type SBUF, SBuf +#define DtE(_V) (int)(ptrdiff_t)&(((SBuf *)0)_V) +#line 74 "vm_x64.dasc" +//| +//|// Stack layout while in interpreter. Must match with lj_frame.h. +//|//----------------------------------------------------------------------- +//| +//|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). +//|.macro saveregs_ +//| push rbx; push r15; push r14 +//| push r13; push r12 +//| sub rsp, CFRAME_SPACE +//|.endmacro +//|.macro saveregs +//| push rbp; saveregs_ +//|.endmacro +//|.macro restoreregs +//| add rsp, CFRAME_SPACE +//| pop r12; pop r13 +//| pop r14; pop r15; pop rbx; pop rbp +//|.endmacro +//| +//|//----- 16 byte aligned, +//|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. +//|.define SAVE_R4, aword [rsp+aword*10] +//|.define SAVE_R3, aword [rsp+aword*9] +//|.define SAVE_R2, aword [rsp+aword*8] +//|.define SAVE_R1, aword [rsp+aword*7] +//|.define SAVE_RU2, aword [rsp+aword*6] +//|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. +//|.define SAVE_CFRAME, aword [rsp+aword*4] +//|.define SAVE_PC, aword [rsp+aword*3] +//|.define SAVE_L, aword [rsp+aword*2] +//|.define SAVE_ERRF, dword [rsp+dword*3] +//|.define SAVE_NRES, dword [rsp+dword*2] +//|.define TMP1, aword [rsp] //<-- rsp while in interpreter. +//|//----- 16 byte aligned +//| +//|.define TMP1d, dword [rsp] +//|.define TMP1hi, dword [rsp+dword*1] +//|.define MULTRES, TMP1d // MULTRES overlaps TMP1d. +//| +//|//----------------------------------------------------------------------- +//| +//|// Instruction headers. +//|.macro ins_A; .endmacro +//|.macro ins_AD; .endmacro +//|.macro ins_AJ; .endmacro +//|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro +//|.macro ins_AB_; movzx RBd, RCH; .endmacro +//|.macro ins_A_C; movzx RCd, RCL; .endmacro +//|.macro ins_AND; not RD; .endmacro +//| +//|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). +//|.macro ins_NEXT +//| mov RCd, [PC] +//| movzx RAd, RCH +//| movzx OP, RCL +//| add PC, 4 +//| shr RCd, 16 +//| jmp aword [DISPATCH+OP*8] +//|.endmacro +//| +//|// Instruction footer. +//| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +//| .define ins_next, ins_NEXT +//| .define ins_next_, ins_NEXT +//| +//|// Call decode and dispatch. +//|.macro ins_callt +//| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC +//| mov PC, LFUNC:RB->pc +//| mov RAd, [PC] +//| movzx OP, RAL +//| movzx RAd, RAH +//| add PC, 4 +//| jmp aword [DISPATCH+OP*8] +//|.endmacro +//| +//|.macro ins_call +//| // BASE = new base, RB = LFUNC, RD = nargs+1 +//| mov [BASE-8], PC +//| ins_callt +//|.endmacro +//| +//|//----------------------------------------------------------------------- +//| +//|// Macros to clear or set tags. +//|.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro +//|.macro settp, reg, tp +//| mov64 ITYPE, ((uint64_t)tp<<47) +//| or reg, ITYPE +//|.endmacro +//|.macro settp, dst, reg, tp +//| mov64 dst, ((uint64_t)tp<<47) +//| or dst, reg +//|.endmacro +//|.macro setint, reg +//| settp reg, LJ_TISNUM +//|.endmacro +//|.macro setint, dst, reg +//| settp dst, reg, LJ_TISNUM +//|.endmacro +//| +//|// Macros to test operand types. +//|.macro checktp_nc, reg, tp, target +//| mov ITYPE, reg +//| sar ITYPE, 47 +//| cmp ITYPEd, tp +//| jne target +//|.endmacro +//|.macro checktp, reg, tp, target +//| mov ITYPE, reg +//| cleartp reg +//| sar ITYPE, 47 +//| cmp ITYPEd, tp +//| jne target +//|.endmacro +//|.macro checktptp, src, tp, target +//| mov ITYPE, src +//| sar ITYPE, 47 +//| cmp ITYPEd, tp +//| jne target +//|.endmacro +//|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro +//|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro +//|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro +//| +//|.macro checknumx, reg, target, jump +//| mov ITYPE, reg +//| sar ITYPE, 47 +//| cmp ITYPEd, LJ_TISNUM +//| jump target +//|.endmacro +//|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro +//|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro +//|.macro checknum, reg, target; checknumx reg, target, jae; .endmacro +//|.macro checknumtp, src, target; checknumx src, target, jae; .endmacro +//|.macro checknumber, src, target; checknumx src, target, ja; .endmacro +//| +//|.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro +//|.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro +//| +//|// These operands must be used with movzx. +//|.define PC_OP, byte [PC-4] +//|.define PC_RA, byte [PC-3] +//|.define PC_RB, byte [PC-1] +//|.define PC_RC, byte [PC-2] +//|.define PC_RD, word [PC-2] +//| +//|.macro branchPC, reg +//| lea PC, [PC+reg*4-BCBIAS_J*4] +//|.endmacro +//| +//|// Assumes DISPATCH is relative to GL. +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +//| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +//| +//|// Decrement hashed hotcount and trigger trace recorder if zero. +//|.macro hotloop, reg +//| mov reg, PCd +//| shr reg, 1 +//| and reg, HOTCOUNT_PCMASK +//| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP +//| jb ->vm_hotloop +//|.endmacro +//| +//|.macro hotcall, reg +//| mov reg, PCd +//| shr reg, 1 +//| and reg, HOTCOUNT_PCMASK +//| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL +//| jb ->vm_hotcall +//|.endmacro +//| +//|// Set current VM state. +//|.macro set_vmstate, st +//| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st +//|.endmacro +//| +//|.macro fpop1; fstp st1; .endmacro +//| +//|// Synthesize SSE FP constants. +//|.macro sseconst_abs, reg, tmp // Synthesize abs mask. +//| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp +//|.endmacro +//| +//|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const. +//| mov64 tmp, U64x(val,00000000); movd reg, tmp +//|.endmacro +//| +//|.macro sseconst_sign, reg, tmp // Synthesize sign mask. +//| sseconst_hi reg, tmp, 80000000 +//|.endmacro +//|.macro sseconst_1, reg, tmp // Synthesize 1.0. +//| sseconst_hi reg, tmp, 3ff00000 +//|.endmacro +//|.macro sseconst_m1, reg, tmp // Synthesize -1.0. +//| sseconst_hi reg, tmp, bff00000 +//|.endmacro +//|.macro sseconst_2p52, reg, tmp // Synthesize 2^52. +//| sseconst_hi reg, tmp, 43300000 +//|.endmacro +//|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. +//| sseconst_hi reg, tmp, 43380000 +//|.endmacro +//| +//|// Move table write barrier back. Overwrites reg. +//|.macro barrierback, tab, reg +//| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab) +//| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] +//| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab +//| mov tab->gclist, reg +//|.endmacro +//| +//|//----------------------------------------------------------------------- + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + //|.code_sub + dasm_put(Dst, 0); +#line 295 "vm_x64.dasc" + //| + //|//----------------------------------------------------------------------- + //|//-- Return handling ---------------------------------------------------- + //|//----------------------------------------------------------------------- + //| + //|->vm_returnp: + //| test PCd, FRAME_P + //| jz ->cont_dispatch + //| + //| // Return from pcall or xpcall fast func. + //| and PC, -8 + //| sub BASE, PC // Restore caller base. + //| lea RA, [RA+PC-8] // Rebase RA and prepend one result. + //| mov PC, [BASE-8] // Fetch PC of previous frame. + //| // Prepending may overwrite the pcall frame, so do it at the end. + //| mov_true ITYPE + //| mov aword [BASE+RA], ITYPE // Prepend true to results. + //| + //|->vm_returnc: + //| add RDd, 1 // RD = nresults+1 + //| jz ->vm_unwind_yield + //| mov MULTRES, RDd + //| test PC, FRAME_TYPE + //| jz ->BC_RET_Z // Handle regular return to Lua. + //| + //|->vm_return: + //| // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return + //| xor PC, FRAME_C + //| test PCd, FRAME_TYPE + //| jnz ->vm_returnp + //| + //| // Return to C. + //| set_vmstate C + //| and PC, -8 + dasm_put(Dst, 2, FRAME_P, (unsigned int)((int64_t)~((uint64_t)2<<47)), (unsigned int)(((int64_t)~((uint64_t)2<<47))>>32), FRAME_TYPE, FRAME_C, FRAME_TYPE, DISPATCH_GL(vmstate), ~LJ_VMST_C); +#line 329 "vm_x64.dasc" + //| sub PC, BASE + //| neg PC // Previous base = BASE - delta. + //| + //| sub RDd, 1 + //| jz >2 + //|1: // Move results down. + //| mov RB, [BASE+RA] + //| mov [BASE-16], RB + //| add BASE, 8 + //| sub RDd, 1 + //| jnz <1 + //|2: + //| mov L:RB, SAVE_L + //| mov L:RB->base, PC + //|3: + //| mov RDd, MULTRES + //| mov RAd, SAVE_NRES // RA = wanted nresults+1 + //|4: + //| cmp RAd, RDd + //| jne >6 // More/less results wanted? + //|5: + //| sub BASE, 16 + //| mov L:RB->top, BASE + //| + //|->vm_leave_cp: + //| mov RA, SAVE_CFRAME // Restore previous C frame. + //| mov L:RB->cframe, RA + //| xor eax, eax // Ok return status for vm_pcall. + //| + //|->vm_leave_unw: + //| restoreregs + //| ret + //| + //|6: + //| jb >7 // Less results wanted? + //| // More results wanted. Check stack size and fill up results with nil. + //| cmp BASE, L:RB->maxstack + dasm_put(Dst, 81, Dt1(->base), Dt1(->top), Dt1(->cframe)); +#line 366 "vm_x64.dasc" + //| ja >8 + //| mov aword [BASE-16], LJ_TNIL + //| add BASE, 8 + //| add RDd, 1 + //| jmp <4 + //| + //|7: // Less results wanted. + //| test RAd, RAd + //| jz <5 // But check for LUA_MULTRET+1. + //| sub RA, RD // Negative result! + //| lea BASE, [BASE+RA*8] // Correct top. + //| jmp <5 + //| + //|8: // Corner case: need to grow stack for filling up results. + //| // This can happen if: + //| // - A C function grows the stack (a lot). + //| // - The GC shrinks the stack in between. + //| // - A return back from a lua_call() with (high) nresults adjustment. + //| mov L:RB->top, BASE // Save current top held in BASE (yes). + //| mov MULTRES, RDd // Need to fill only remainder with nil. + //| mov CARG2d, RAd + //| mov CARG1, L:RB + //| call extern lj_state_growstack // (lua_State *L, int n) + //| mov BASE, L:RB->top // Need the (realloced) L->top in BASE. + //| jmp <3 + //| + //|->vm_unwind_yield: + //| mov al, LUA_YIELD + //| jmp ->vm_unwind_c_eh + //| + //|->vm_unwind_c: // Unwind C stack, return from vm_pcall. + //| // (void *cframe, int errcode) + //| mov eax, CARG2d // Error return status for vm_pcall. + dasm_put(Dst, 198, Dt1(->maxstack), LJ_TNIL, Dt1(->top), Dt1(->top), LUA_YIELD); +#line 399 "vm_x64.dasc" + //| mov rsp, CARG1 + //|->vm_unwind_c_eh: // Landing pad for external unwinder. + //| mov L:RB, SAVE_L + //| mov GL:RB, L:RB->glref + //| mov dword GL:RB->vmstate, ~LJ_VMST_C + //| jmp ->vm_leave_unw + //| + //|->vm_unwind_rethrow: + //| mov CARG1, SAVE_L + //| mov CARG2d, eax + //| restoreregs + //| jmp extern lj_err_throw // (lua_State *L, int errcode) + //| + //|->vm_unwind_ff: // Unwind C stack, return from ff pcall. + //| // (void *cframe) + //| and CARG1, CFRAME_RAWMASK + //| mov rsp, CARG1 + //|->vm_unwind_ff_eh: // Landing pad for external unwinder. + //| mov L:RB, SAVE_L + //| mov RDd, 1+1 // Really 1+2 results, incr. later. + //| mov BASE, L:RB->base + //| mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + //| add DISPATCH, GG_G2DISP + //| mov PC, [BASE-8] // Fetch PC of previous frame. + //| mov_false RA + //| mov RB, [BASE] + //| mov [BASE-16], RA // Prepend false to error message. + //| mov [BASE-8], RB + //| mov RA, -16 // Results start at BASE+RA = BASE-16. + //| set_vmstate INTERP + //| jmp ->vm_returnc // Increments RD/MULTRES and returns. + //| + //|//----------------------------------------------------------------------- + //|//-- Grow stack for calls ----------------------------------------------- + //|//----------------------------------------------------------------------- + //| + //|->vm_growstack_c: // Grow stack for C function. + //| mov CARG2d, LUA_MINSTACK + dasm_put(Dst, 280, Dt1(->glref), Dt2(->vmstate), ~LJ_VMST_C, CFRAME_RAWMASK, 1+1, Dt1(->base), Dt1(->glref), GG_G2DISP, (unsigned int)((int64_t)~((uint64_t)1<<47)), (unsigned int)(((int64_t)~((uint64_t)1<<47))>>32), DISPATCH_GL(vmstate), ~LJ_VMST_INTERP); +#line 437 "vm_x64.dasc" + //| jmp >2 + //| + //|->vm_growstack_v: // Grow stack for vararg Lua function. + //| sub RD, 16 // LJ_FR2 + //| jmp >1 + //| + //|->vm_growstack_f: // Grow stack for fixarg Lua function. + //| // BASE = new base, RD = nargs+1, RB = L, PC = first PC + //| lea RD, [BASE+NARGS:RD*8-8] + //|1: + //| movzx RAd, byte [PC-4+PC2PROTO(framesize)] + //| add PC, 4 // Must point after first instruction. + //| mov L:RB->base, BASE + //| mov L:RB->top, RD + //| mov SAVE_PC, PC + //| mov CARG2, RA + //|2: + //| // RB = L, L->base = new base, L->top = top + //| mov CARG1, L:RB + //| call extern lj_state_growstack // (lua_State *L, int n) + //| mov BASE, L:RB->base + //| mov RD, L:RB->top + //| mov LFUNC:RB, [BASE-16] + //| cleartp LFUNC:RB + //| sub RD, BASE + //| shr RDd, 3 + //| add NARGS:RDd, 1 + //| // BASE = new base, RB = LFUNC, RD = nargs+1 + //| ins_callt // Just retry the call. + //| + //|//----------------------------------------------------------------------- + //|//-- Entry points into the assembler VM --------------------------------- + //|//----------------------------------------------------------------------- + //| + //|->vm_resume: // Setup C frame and resume thread. + //| // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + //| saveregs + //| mov L:RB, CARG1 // Caveat: CARG1 may be RA. + //| mov SAVE_L, CARG1 + //| mov RA, CARG2 + //| mov PCd, FRAME_CP + //| xor RDd, RDd + //| lea KBASE, [esp+CFRAME_RESUME] + //| mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + //| add DISPATCH, GG_G2DISP + dasm_put(Dst, 410, LUA_MINSTACK, -4+PC2PROTO(framesize), Dt1(->base), Dt1(->top), Dt1(->base), Dt1(->top), Dt7(->pc), FRAME_CP, CFRAME_RESUME, Dt1(->glref)); +#line 482 "vm_x64.dasc" + //| mov SAVE_PC, RD // Any value outside of bytecode is ok. + //| mov SAVE_CFRAME, RD + //| mov SAVE_NRES, RDd + //| mov SAVE_ERRF, RDd + //| mov L:RB->cframe, KBASE + //| cmp byte L:RB->status, RDL + //| je >2 // Initial resume (like a call). + //| + //| // Resume after yield (like a return). + //| mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + //| set_vmstate INTERP + //| mov byte L:RB->status, RDL + //| mov BASE, L:RB->base + //| mov RD, L:RB->top + //| sub RD, RA + //| shr RDd, 3 + //| add RDd, 1 // RD = nresults+1 + //| sub RA, BASE // RA = resultofs + //| mov PC, [BASE-8] + //| mov MULTRES, RDd + //| test PCd, FRAME_TYPE + //| jz ->BC_RET_Z + //| jmp ->vm_return + //| + //|->vm_pcall: // Setup protected C frame and enter VM. + //| // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + //| saveregs + //| mov PCd, FRAME_CP + //| mov SAVE_ERRF, CARG4d + //| jmp >1 + //| + //|->vm_call: // Setup C frame and enter VM. + //| // (lua_State *L, TValue *base, int nres1) + //| saveregs + dasm_put(Dst, 569, GG_G2DISP, Dt1(->cframe), Dt1(->status), DISPATCH_GL(cur_L), DISPATCH_GL(vmstate), ~LJ_VMST_INTERP, Dt1(->status), Dt1(->base), Dt1(->top), FRAME_TYPE, FRAME_CP); +#line 516 "vm_x64.dasc" + //| mov PCd, FRAME_C + //| + //|1: // Entry point for vm_pcall above (PC = ftype). + //| mov SAVE_NRES, CARG3d + //| mov L:RB, CARG1 // Caveat: CARG1 may be RA. + //| mov SAVE_L, CARG1 + //| mov RA, CARG2 + //| + //| mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + //| mov KBASE, L:RB->cframe // Add our C frame to cframe chain. + //| mov SAVE_CFRAME, KBASE + //| mov SAVE_PC, L:RB // Any value outside of bytecode is ok. + //| add DISPATCH, GG_G2DISP + //| mov L:RB->cframe, rsp + //| + //|2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). + //| mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + //| set_vmstate INTERP + //| mov BASE, L:RB->base // BASE = old base (used in vmeta_call). + //| add PC, RA + //| sub PC, BASE // PC = frame delta + frame type + //| + //| mov RD, L:RB->top + //| sub RD, RA + //| shr NARGS:RDd, 3 + //| add NARGS:RDd, 1 // RD = nargs+1 + //| + //|->vm_call_dispatch: + //| mov LFUNC:RB, [RA-16] + //| checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE. + //| + //|->vm_call_dispatch_f: + //| mov BASE, RA + //| ins_call + //| // BASE = new base, RB = func, RD = nargs+1, PC = caller PC + //| + //|->vm_cpcall: // Setup protected C frame, call C. + //| // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + //| saveregs + dasm_put(Dst, 684, FRAME_C, Dt1(->glref), Dt1(->cframe), GG_G2DISP, Dt1(->cframe), DISPATCH_GL(cur_L), DISPATCH_GL(vmstate), ~LJ_VMST_INTERP, Dt1(->base), Dt1(->top), LJ_TFUNC, Dt7(->pc)); +#line 555 "vm_x64.dasc" + //| mov L:RB, CARG1 // Caveat: CARG1 may be RA. + //| mov SAVE_L, CARG1 + //| mov SAVE_PC, L:RB // Any value outside of bytecode is ok. + //| + //| mov KBASE, L:RB->stack // Compute -savestack(L, L->top). + //| sub KBASE, L:RB->top + //| mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + //| mov SAVE_ERRF, 0 // No error function. + //| mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame. + //| add DISPATCH, GG_G2DISP + //| // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). + //| + //| mov KBASE, L:RB->cframe // Add our C frame to cframe chain. + //| mov SAVE_CFRAME, KBASE + //| mov L:RB->cframe, rsp + //| mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + //| + //| call CARG4 // (lua_State *L, lua_CFunction func, void *ud) + //| // TValue * (new base) or NULL returned in eax (RC). + //| test RC, RC + //| jz ->vm_leave_cp // No base? Just remove C frame. + //| mov RA, RC + //| mov PCd, FRAME_CP + //| jmp <2 // Else continue with the call. + //| + //|//----------------------------------------------------------------------- + //|//-- Metamethod handling ------------------------------------------------ + //|//----------------------------------------------------------------------- + //| + //|//-- Continuation dispatch ---------------------------------------------- + //| + //|->cont_dispatch: + //| // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) + //| add RA, BASE + //| and PC, -8 + //| mov RB, BASE + //| sub BASE, PC // Restore caller BASE. + //| mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg. + //| mov RC, RA // ... in [RC] + //| mov PC, [RB-24] // Restore PC from [cont|PC]. + //| mov RA, qword [RB-32] // May be negative on WIN64 with debug. + //| cmp RA, 1 + //| jbe >1 + //| mov LFUNC:KBASE, [BASE-16] + //| cleartp LFUNC:KBASE + //| mov KBASE, LFUNC:KBASE->pc + //| mov KBASE, [KBASE+PC2PROTO(k)] + //| // BASE = base, RC = result, RB = meta base + //| jmp RA // Jump to continuation. + //| + //|1: + //| je ->cont_ffi_callback // cont = 1: return from FFI callback. + dasm_put(Dst, 850, Dt1(->stack), Dt1(->top), Dt1(->glref), GG_G2DISP, Dt1(->cframe), Dt1(->cframe), DISPATCH_GL(cur_L), FRAME_CP, LJ_TNIL, Dt7(->pc), PC2PROTO(k)); +#line 607 "vm_x64.dasc" + //| // cont = 0: Tail call from C function. + //| sub RB, BASE + //| shr RBd, 3 + //| lea RDd, [RBd-3] + //| jmp ->vm_call_tail + //| + //|->cont_cat: // BASE = base, RC = result, RB = mbase + //| movzx RAd, PC_RB + //| sub RB, 32 + //| lea RA, [BASE+RA*8] + //| sub RA, RB + //| je ->cont_ra + //| neg RA + //| shr RAd, 3 + //| mov L:CARG1, SAVE_L + //| mov L:CARG1->base, BASE + //| mov CARG3d, RAd + //| mov RA, [RC] + //| mov [RB], RA + //| mov CARG2, RB + //| jmp ->BC_CAT_Z + //| + //|//-- Table indexing metamethods ----------------------------------------- + //| + //|->vmeta_tgets: + //| settp STR:RC, LJ_TSTR // STR:RC = GCstr * + //| mov TMP1, STR:RC + //| lea RC, TMP1 + //| cmp PC_OP, BC_GGET + //| jne >1 + //| settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * + //| lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. + //| mov [RB], TAB:RA + dasm_put(Dst, 1015, Dt1(->base), (unsigned int)(((uint64_t)LJ_TSTR<<47)), (unsigned int)((((uint64_t)LJ_TSTR<<47))>>32), BC_GGET, (unsigned int)(((uint64_t)LJ_TTAB<<47)), (unsigned int)((((uint64_t)LJ_TTAB<<47))>>32), DISPATCH_GL(tmptv)); +#line 640 "vm_x64.dasc" + //| jmp >2 + //| + //|->vmeta_tgetb: + //| movzx RCd, PC_RC + //| cvtsi2sd xmm0, RCd + //| movsd TMP1, xmm0 + //| lea RC, TMP1 + //| jmp >1 + //| + //|->vmeta_tgetv: + //| movzx RCd, PC_RC // Reload TValue *k from RC. + //| lea RC, [BASE+RC*8] + //|1: + //| movzx RBd, PC_RB // Reload TValue *t from RB. + //| lea RB, [BASE+RB*8] + //|2: + //| mov L:CARG1, SAVE_L + //| mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. + //| mov CARG2, RB + //| mov CARG3, RC + //| mov L:RB, L:CARG1 + //| mov SAVE_PC, PC + //| call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) + //| // TValue * (finished) or NULL (metamethod) returned in eax (RC). + //| mov BASE, L:RB->base + //| test RC, RC + //| jz >3 + //|->cont_ra: // BASE = base, RC = result + //| movzx RAd, PC_RA + //| mov RB, [RC] + //| mov [BASE+RA*8], RB + //| ins_next + //| + //|3: // Call __index metamethod. + //| // BASE = base, L->top = new base, stack = cont/func/t/k + //| mov RA, L:RB->top + //| mov [RA-24], PC // [cont|PC] + //| lea PC, [RA+FRAME_CONT] + //| sub PC, BASE + //| mov LFUNC:RB, [RA-16] // Guaranteed to be a function here. + //| mov NARGS:RDd, 2+1 // 2 args for func(t, k). + //| cleartp LFUNC:RB + //| jmp ->vm_call_dispatch_f + //| + //|->vmeta_tgetr: + //| mov CARG1, TAB:RB + dasm_put(Dst, 1131, Dt1(->base), Dt1(->base), Dt1(->top), FRAME_CONT, 2+1); +#line 686 "vm_x64.dasc" + //| mov RB, BASE // Save BASE. + //| mov CARG2d, RCd // Caveat: CARG2 == BASE + //| call extern lj_tab_getinth // (GCtab *t, int32_t key) + //| // cTValue * or NULL returned in eax (RC). + //| movzx RAd, PC_RA + //| mov BASE, RB // Restore BASE. + //| test RC, RC + //| jnz ->BC_TGETR_Z + //| mov ITYPE, LJ_TNIL + //| jmp ->BC_TGETR2_Z + //| + //|//----------------------------------------------------------------------- + //| + //|->vmeta_tsets: + //| settp STR:RC, LJ_TSTR // STR:RC = GCstr * + //| mov TMP1, STR:RC + //| lea RC, TMP1 + //| cmp PC_OP, BC_GSET + //| jne >1 + //| settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * + //| lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. + //| mov [RB], TAB:RA + //| jmp >2 + //| + //|->vmeta_tsetb: + //| movzx RCd, PC_RC + //| cvtsi2sd xmm0, RCd + //| movsd TMP1, xmm0 + //| lea RC, TMP1 + //| jmp >1 + //| + //|->vmeta_tsetv: + //| movzx RCd, PC_RC // Reload TValue *k from RC. + dasm_put(Dst, 1305, LJ_TNIL, (unsigned int)(((uint64_t)LJ_TSTR<<47)), (unsigned int)((((uint64_t)LJ_TSTR<<47))>>32), BC_GSET, (unsigned int)(((uint64_t)LJ_TTAB<<47)), (unsigned int)((((uint64_t)LJ_TTAB<<47))>>32), DISPATCH_GL(tmptv)); +#line 719 "vm_x64.dasc" + //| lea RC, [BASE+RC*8] + //|1: + //| movzx RBd, PC_RB // Reload TValue *t from RB. + //| lea RB, [BASE+RB*8] + //|2: + //| mov L:CARG1, SAVE_L + //| mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. + //| mov CARG2, RB + //| mov CARG3, RC + //| mov L:RB, L:CARG1 + //| mov SAVE_PC, PC + //| call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + //| // TValue * (finished) or NULL (metamethod) returned in eax (RC). + //| mov BASE, L:RB->base + //| test RC, RC + //| jz >3 + //| // NOBARRIER: lj_meta_tset ensures the table is not black. + //| movzx RAd, PC_RA + //| mov RB, [BASE+RA*8] + //| mov [RC], RB + //|->cont_nop: // BASE = base, (RC = result) + //| ins_next + //| + //|3: // Call __newindex metamethod. + //| // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + //| mov RA, L:RB->top + //| mov [RA-24], PC // [cont|PC] + //| movzx RCd, PC_RA + //| // Copy value to third argument. + //| mov RB, [BASE+RC*8] + //| mov [RA+16], RB + //| lea PC, [RA+FRAME_CONT] + //| sub PC, BASE + //| mov LFUNC:RB, [RA-16] // Guaranteed to be a function here. + //| mov NARGS:RDd, 3+1 // 3 args for func(t, k, v). + //| cleartp LFUNC:RB + //| jmp ->vm_call_dispatch_f + //| + //|->vmeta_tsetr: + //| mov L:CARG1, SAVE_L + //| mov CARG2, TAB:RB + //| mov L:CARG1->base, BASE + //| mov RB, BASE // Save BASE. + //| mov CARG3d, RCd // Caveat: CARG3 == BASE. + //| mov SAVE_PC, PC + //| call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + //| // TValue * returned in eax (RC). + //| movzx RAd, PC_RA + //| mov BASE, RB // Restore BASE. + //| jmp ->BC_TSETR_Z + //| + //|//-- Comparison metamethods --------------------------------------------- + //| + //|->vmeta_comp: + //| movzx RDd, PC_RD + //| movzx RAd, PC_RA + //| mov L:RB, SAVE_L + //| mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE. + //| lea CARG2, [BASE+RA*8] + dasm_put(Dst, 1417, Dt1(->base), Dt1(->base), Dt1(->top), FRAME_CONT, 3+1, Dt1(->base), Dt1(->base)); +#line 778 "vm_x64.dasc" + //| lea CARG3, [BASE+RD*8] + //| mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA. + //| movzx CARG4d, PC_OP + //| mov SAVE_PC, PC + //| call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) + //| // 0/1 or TValue * (metamethod) returned in eax (RC). + //|3: + //| mov BASE, L:RB->base + //| cmp RC, 1 + //| ja ->vmeta_binop + //|4: + //| lea PC, [PC+4] + //| jb >6 + //|5: + //| movzx RDd, PC_RD + //| branchPC RD + //|6: + //| ins_next + //| + //|->cont_condt: // BASE = base, RC = result + //| add PC, 4 + //| mov ITYPE, [RC] + //| sar ITYPE, 47 + //| cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true. + //| jb <5 + //| jmp <6 + //| + //|->cont_condf: // BASE = base, RC = result + //| mov ITYPE, [RC] + //| sar ITYPE, 47 + //| cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false. + //| jmp <4 + //| + //|->vmeta_equal: + //| cleartp TAB:RD + dasm_put(Dst, 1629, Dt1(->base), -BCBIAS_J*4, LJ_TISTRUECOND, LJ_TISTRUECOND); +#line 813 "vm_x64.dasc" + //| sub PC, 4 + //| mov CARG2, RA + //| mov CARG4d, RBd // Caveat: CARG4 == RA. + //| mov L:RB, SAVE_L + //| mov L:RB->base, BASE // Caveat: CARG3 == BASE. + //| mov CARG3, RD + //| mov CARG1, L:RB + //| mov SAVE_PC, PC + //| call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) + //| // 0/1 or TValue * (metamethod) returned in eax (RC). + //| jmp <3 + //| + //|->vmeta_equal_cd: + //| sub PC, 4 + //| mov L:RB, SAVE_L + //| mov L:RB->base, BASE + //| mov CARG1, L:RB + //| mov CARG2d, dword [PC-4] + //| mov SAVE_PC, PC + //| call extern lj_meta_equal_cd // (lua_State *L, BCIns ins) + //| // 0/1 or TValue * (metamethod) returned in eax (RC). + //| jmp <3 + //| + //|->vmeta_istype: + //| mov L:RB, SAVE_L + //| mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE. + //| mov CARG2d, RAd + //| mov CARG3d, RDd + //| mov L:CARG1, L:RB + //| mov SAVE_PC, PC + //| call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + //| mov BASE, L:RB->base + //| jmp <6 + //| + //|//-- Arithmetic metamethods --------------------------------------------- + //| + //|->vmeta_arith_vno: + //|->vmeta_arith_vn: + //| lea RC, [KBASE+RC*8] + //| jmp >1 + //| + //|->vmeta_arith_nvo: + //|->vmeta_arith_nv: + //| lea TMPR, [KBASE+RC*8] + //| lea RC, [BASE+RB*8] + //| mov RB, TMPR + //| jmp >2 + //| + //|->vmeta_unm: + //| lea RC, [BASE+RD*8] + dasm_put(Dst, 1765, Dt1(->base), Dt1(->base), Dt1(->base), Dt1(->base)); +#line 863 "vm_x64.dasc" + //| mov RB, RC + //| jmp >2 + //| + //|->vmeta_arith_vvo: + //|->vmeta_arith_vv: + //| lea RC, [BASE+RC*8] + //|1: + //| lea RB, [BASE+RB*8] + //|2: + //| lea RA, [BASE+RA*8] + //| movzx CARG5d, PC_OP + //| mov CARG2, RA + //| mov CARG4, RC // Caveat: CARG4 == RA. + //| mov L:CARG1, SAVE_L + //| mov L:CARG1->base, BASE // Caveat: CARG3 == BASE. + //| mov CARG3, RB + //| mov L:RB, L:CARG1 + //| mov SAVE_PC, PC + //| call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + //| // NULL (finished) or TValue * (metamethod) returned in eax (RC). + //| mov BASE, L:RB->base + //| test RC, RC + //| jz ->cont_nop + //| + //| // Call metamethod for binary op. + //|->vmeta_binop: + //| // BASE = base, RC = new base, stack = cont/func/o1/o2 + //| mov RA, RC + //| sub RC, BASE + //| mov [RA-24], PC // [cont|PC] + //| lea PC, [RC+FRAME_CONT] + //| mov NARGS:RDd, 2+1 // 2 args for func(o1, o2). + //| jmp ->vm_call_dispatch + //| + //|->vmeta_len: + //| movzx RDd, PC_RD + //| mov L:RB, SAVE_L + //| mov L:RB->base, BASE + //| lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE + //| mov L:CARG1, L:RB + //| mov SAVE_PC, PC + //| call extern lj_meta_len // (lua_State *L, TValue *o) + //| // NULL (retry) or TValue * (metamethod) returned in eax (RC). + //| mov BASE, L:RB->base + dasm_put(Dst, 1921, Dt1(->base), Dt1(->base), FRAME_CONT, 2+1, Dt1(->base), Dt1(->base)); +#line 907 "vm_x64.dasc" +#if LJ_52 + //| test RC, RC + //| jne ->vmeta_binop // Binop call for compatibility. + //| movzx RDd, PC_RD + //| mov TAB:CARG1, [BASE+RD*8] + //| cleartp TAB:CARG1 + //| jmp ->BC_LEN_Z + dasm_put(Dst, 2062); +#line 914 "vm_x64.dasc" +#else + //| jmp ->vmeta_binop // Binop call for compatibility. + dasm_put(Dst, 2092); +#line 916 "vm_x64.dasc" +#endif + //| + //|//-- Call metamethod ---------------------------------------------------- + //| + //|->vmeta_call_ra: + //| lea RA, [BASE+RA*8+16] + //|->vmeta_call: // Resolve and call __call metamethod. + //| // BASE = old base, RA = new base, RC = nargs+1, PC = return + //| mov TMP1d, NARGS:RDd // Save RA, RC for us. + //| mov RB, RA + //| mov L:CARG1, SAVE_L + //| mov L:CARG1->base, BASE // Caveat: CARG3 is BASE. + //| lea CARG2, [RA-16] + //| lea CARG3, [RA+NARGS:RD*8-8] + //| mov SAVE_PC, PC + //| call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + //| mov RA, RB + //| mov L:RB, SAVE_L + //| mov BASE, L:RB->base + //| mov NARGS:RDd, TMP1d + //| mov LFUNC:RB, [RA-16] + //| add NARGS:RDd, 1 + //| // This is fragile. L->base must not move, KBASE must always be defined. + //| cmp KBASE, BASE // Continue with CALLT if flag set. + //| je ->BC_CALLT_Z + //| cleartp LFUNC:RB + //| mov BASE, RA + //| ins_call // Otherwise call resolved metamethod. + //| + //|//-- Argument coercion for 'for' statement ------------------------------ + //| + //|->vmeta_for: + //| mov L:RB, SAVE_L + //| mov L:RB->base, BASE + //| mov CARG2, RA // Caveat: CARG2 == BASE + //| mov L:CARG1, L:RB // Caveat: CARG1 == RA + //| mov SAVE_PC, PC + //| call extern lj_meta_for // (lua_State *L, TValue *base) + //| mov BASE, L:RB->base + //| mov RCd, [PC-4] + //| movzx RAd, RCH + //| movzx OP, RCL + //| shr RCd, 16 + //| jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI. + //| + //|//----------------------------------------------------------------------- + //|//-- Fast functions ----------------------------------------------------- + //|//----------------------------------------------------------------------- + //| + //|.macro .ffunc, name + //|->ff_ .. name: + //|.endmacro + //| + //|.macro .ffunc_1, name + //|->ff_ .. name: + //| cmp NARGS:RDd, 1+1; jb ->fff_fallback + //|.endmacro + //| + //|.macro .ffunc_2, name + //|->ff_ .. name: + //| cmp NARGS:RDd, 2+1; jb ->fff_fallback + //|.endmacro + //| + //|.macro .ffunc_n, name, op + //| .ffunc_1 name + //| checknumtp [BASE], ->fff_fallback + //| op xmm0, qword [BASE] + //|.endmacro + //| + //|.macro .ffunc_n, name + //| .ffunc_n name, movsd + //|.endmacro + //| + //|.macro .ffunc_nn, name + //| .ffunc_2 name + //| checknumtp [BASE], ->fff_fallback + //| checknumtp [BASE+8], ->fff_fallback + //| movsd xmm0, qword [BASE] + //| movsd xmm1, qword [BASE+8] + //|.endmacro + //| + //|// Inlined GC threshold check. Caveat: uses label 1. + //|.macro ffgccheck + //| mov RB, [DISPATCH+DISPATCH_GL(gc.total)] + //| cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] + //| jb >1 + //| call ->fff_gcstep + //|1: + //|.endmacro + //| + //|//-- Base library: checks ----------------------------------------------- + //| + //|.ffunc_1 assert + //| mov ITYPE, [BASE] + //| mov RB, ITYPE + //| sar ITYPE, 47 + //| cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback + //| mov PC, [BASE-8] + //| mov MULTRES, RDd + //| mov RB, [BASE] + //| mov [BASE-16], RB + //| sub RDd, 2 + //| jz >2 + //| mov RA, BASE + dasm_put(Dst, 2097, Dt1(->base), Dt1(->base), Dt7(->pc), Dt1(->base), Dt1(->base), GG_DISP2STATIC, 1+1, LJ_TISTRUECOND); +#line 1020 "vm_x64.dasc" + //|1: + //| add RA, 8 + //| mov RB, [RA] + //| mov [RA-16], RB + //| sub RDd, 1 + //| jnz <1 + //|2: + //| mov RDd, MULTRES + //| jmp ->fff_res_ + //| + //|.ffunc_1 type + //| mov RC, [BASE] + //| sar RC, 47 + //| mov RBd, LJ_TISNUM + //| cmp RCd, RBd + //| cmovb RCd, RBd + //| not RCd + //|2: + //| mov CFUNC:RB, [BASE-16] + //| cleartp CFUNC:RB + //| mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] + //| mov PC, [BASE-8] + //| settp STR:RC, LJ_TSTR + //| mov [BASE-16], STR:RC + //| jmp ->fff_res1 + //| + //|//-- Base library: getters and setters --------------------------------- + //| + //|.ffunc_1 getmetatable + dasm_put(Dst, 2318, 1+1, LJ_TISNUM, ((char *)(&((GCfuncC *)0)->upvalue)), (unsigned int)(((uint64_t)LJ_TSTR<<47)), (unsigned int)((((uint64_t)LJ_TSTR<<47))>>32), 1+1); +#line 1049 "vm_x64.dasc" + //| mov TAB:RB, [BASE] + //| mov PC, [BASE-8] + //| checktab TAB:RB, >6 + //|1: // Field metatable must be at same offset for GCtab and GCudata! + //| mov TAB:RB, TAB:RB->metatable + //|2: + //| test TAB:RB, TAB:RB + //| mov aword [BASE-16], LJ_TNIL + //| jz ->fff_res1 + //| settp TAB:RC, TAB:RB, LJ_TTAB + //| mov [BASE-16], TAB:RC // Store metatable as default result. + //| mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)] + //| mov RAd, TAB:RB->hmask + //| and RAd, STR:RC->hash + //| settp STR:RC, LJ_TSTR + //| imul RAd, #NODE + //| add NODE:RA, TAB:RB->node + dasm_put(Dst, 2429, LJ_TTAB, Dt6(->metatable), LJ_TNIL, (unsigned int)(((uint64_t)LJ_TTAB<<47)), (unsigned int)((((uint64_t)LJ_TTAB<<47))>>32), DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable), Dt6(->hmask), Dt5(->hash), (unsigned int)(((uint64_t)LJ_TSTR<<47)), (unsigned int)((((uint64_t)LJ_TSTR<<47))>>32), sizeof(Node)); +#line 1066 "vm_x64.dasc" + //|3: // Rearranged logic, because we expect _not_ to find the key. + //| cmp NODE:RA->key, STR:RC + //| je >5 + //|4: + //| mov NODE:RA, NODE:RA->next + //| test NODE:RA, NODE:RA + //| jnz <3 + //| jmp ->fff_res1 // Not found, keep default result. + //|5: + //| mov RB, NODE:RA->val + //| cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. + //| mov [BASE-16], RB // Return value of mt.__metatable. + //| jmp ->fff_res1 + //| + //|6: + //| cmp ITYPEd, LJ_TUDATA; je <1 + dasm_put(Dst, 2523, Dt6(->node), DtB(->key), DtB(->next), DtB(->val), LJ_TNIL); +#line 1082 "vm_x64.dasc" + //| cmp ITYPEd, LJ_TISNUM; ja >7 + //| mov ITYPEd, LJ_TISNUM + //|7: + //| not ITYPEd + //| mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])] + //| jmp <2 + //| + //|.ffunc_2 setmetatable + //| mov TAB:RB, [BASE] + //| mov TAB:TMPR, TAB:RB + //| checktab TAB:RB, ->fff_fallback + //| // Fast path: no mt for table yet and not clearing the mt. + //| cmp aword TAB:RB->metatable, 0; jne ->fff_fallback + dasm_put(Dst, 2581, LJ_TUDATA, LJ_TISNUM, LJ_TISNUM, DISPATCH_GL(gcroot[GCROOT_BASEMT]), 2+1, LJ_TTAB, Dt6(->metatable)); +#line 1095 "vm_x64.dasc" + //| mov TAB:RA, [BASE+8] + //| checktab TAB:RA, ->fff_fallback + //| mov TAB:RB->metatable, TAB:RA + //| mov PC, [BASE-8] + //| mov [BASE-16], TAB:TMPR // Return original table. + //| test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + //| jz >1 + //| // Possible write barrier. Table is black, but skip iswhite(mt) check. + //| barrierback TAB:RB, RC + //|1: + //| jmp ->fff_res1 + //| + //|.ffunc_2 rawget + dasm_put(Dst, 2668, LJ_TTAB, Dt6(->metatable), Dt6(->marked), LJ_GC_BLACK, Dt6(->marked), (uint8_t)~LJ_GC_BLACK, DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); +#line 1108 "vm_x64.dasc" + //| mov TAB:CARG2, [BASE] + //| checktab TAB:CARG2, ->fff_fallback + //| mov RB, BASE // Save BASE. + //| lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. + //| mov CARG1, SAVE_L + //| call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + //| // cTValue * returned in eax (RD). + //| mov BASE, RB // Restore BASE. + //| // Copy table slot. + //| mov RB, [RD] + //| mov PC, [BASE-8] + //| mov [BASE-16], RB + //| jmp ->fff_res1 + //| + //|//-- Base library: conversions ------------------------------------------ + //| + //|.ffunc tonumber + //| // Only handles the number case inline (without a base argument). + //| cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument. + //| mov RB, [BASE] + //| checknumber RB, ->fff_fallback + //| mov PC, [BASE-8] + //| mov [BASE-16], RB + //| jmp ->fff_res1 + //| + //|.ffunc_1 tostring + dasm_put(Dst, 2750, 2+1, LJ_TTAB, 1+1, LJ_TISNUM, 1+1); +#line 1134 "vm_x64.dasc" + //| // Only handles the string or number case inline. + //| mov PC, [BASE-8] + //| mov STR:RB, [BASE] + //| checktp_nc STR:RB, LJ_TSTR, >3 + //| // A __tostring method in the string base metatable is ignored. + //|2: + //| mov [BASE-16], STR:RB + //| jmp ->fff_res1 + //|3: // Handle numbers inline, unless a number base metatable is present. + //| cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1 + //| cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 + //| jne ->fff_fallback + //| ffgccheck // Caveat: uses label 1. + dasm_put(Dst, 2877, LJ_TSTR, LJ_TISNUM, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]), DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold)); +#line 1147 "vm_x64.dasc" + //| mov L:RB, SAVE_L + //| mov L:RB->base, BASE // Add frame since C call can throw. + //| mov SAVE_PC, PC // Redundant (but a defined value). + //| mov CARG2, BASE // Otherwise: CARG2 == BASE + //| mov L:CARG1, L:RB + //| call extern lj_strfmt_num // (lua_State *L, lua_Number *np) + //| // GCstr returned in eax (RD). + //| mov BASE, L:RB->base + //| settp STR:RB, RD, LJ_TSTR + //| jmp <2 + //| + //|//-- Base library: iterators ------------------------------------------- + //| + //|.ffunc_1 next + //| je >2 // Missing 2nd arg? + //|1: + //| mov CARG2, [BASE] + //| checktab CARG2, ->fff_fallback + //| mov L:RB, SAVE_L + dasm_put(Dst, 2951, Dt1(->base), Dt1(->base), (unsigned int)(((uint64_t)LJ_TSTR<<47)), (unsigned int)((((uint64_t)LJ_TSTR<<47))>>32), 1+1, LJ_TTAB); +#line 1166 "vm_x64.dasc" + //| mov L:RB->base, BASE // Add frame since C call can throw. + //| mov L:RB->top, BASE // Dummy frame length is ok. + //| mov PC, [BASE-8] + //| lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. + //| mov CARG1, L:RB + //| mov SAVE_PC, PC // Needed for ITERN fallback. + //| call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) + //| // Flag returned in eax (RD). + //| mov BASE, L:RB->base + //| test RDd, RDd; jz >3 // End of traversal? + //| // Copy key and value to results. + //| mov RB, [BASE+8] + //| mov RD, [BASE+16] + //| mov [BASE-16], RB + //| mov [BASE-8], RD + //|->fff_res2: + //| mov RDd, 1+2 + //| jmp ->fff_res + //|2: // Set missing 2nd arg to nil. + //| mov aword [BASE+8], LJ_TNIL + //| jmp <1 + //|3: // End of traversal: return nil. + //| mov aword [BASE-16], LJ_TNIL + //| jmp ->fff_res1 + //| + //|.ffunc_1 pairs + dasm_put(Dst, 3043, Dt1(->base), Dt1(->top), Dt1(->base), 1+2, LJ_TNIL, LJ_TNIL, 1+1); +#line 1192 "vm_x64.dasc" + //| mov TAB:RB, [BASE] + //| mov TMPR, TAB:RB + //| checktab TAB:RB, ->fff_fallback + dasm_put(Dst, 3144, LJ_TTAB); +#line 1195 "vm_x64.dasc" +#if LJ_52 + //| cmp aword TAB:RB->metatable, 0; jne ->fff_fallback + dasm_put(Dst, 3183, Dt6(->metatable)); +#line 1197 "vm_x64.dasc" +#endif + //| mov CFUNC:RD, [BASE-16] + //| cleartp CFUNC:RD + //| mov CFUNC:RD, CFUNC:RD->upvalue[0] + //| settp CFUNC:RD, LJ_TFUNC + //| mov PC, [BASE-8] + //| mov [BASE-16], CFUNC:RD + //| mov [BASE-8], TMPR + //| mov aword [BASE], LJ_TNIL + //| mov RDd, 1+3 + //| jmp ->fff_res + //| + //|.ffunc_2 ipairs_aux + //| mov TAB:RB, [BASE] + //| checktab TAB:RB, ->fff_fallback + //| checknumtp [BASE+8], ->fff_fallback + //| movsd xmm0, qword [BASE+8] + //| mov PC, [BASE-8] + //| sseconst_1 xmm1, TMPR + dasm_put(Dst, 3193, Dt8(->upvalue[0]), (unsigned int)(((uint64_t)LJ_TFUNC<<47)), (unsigned int)((((uint64_t)LJ_TFUNC<<47))>>32), LJ_TNIL, 1+3, 2+1, LJ_TTAB, LJ_TISNUM, (unsigned int)(U64x(3ff00000,00000000)), (unsigned int)((U64x(3ff00000,00000000))>>32)); +#line 1216 "vm_x64.dasc" + //| addsd xmm0, xmm1 + //| cvttsd2si RAd, xmm0 + //| movsd qword [BASE-16], xmm0 + //| cmp RAd, TAB:RB->asize; jae >2 // Not in array part? + //| mov RD, TAB:RB->array + //| lea RD, [RD+RA*8] + //|1: + //| cmp aword [RD], LJ_TNIL; je ->fff_res0 + //| // Copy array slot. + //| mov RB, [RD] + //| mov [BASE-8], RB + //| jmp ->fff_res2 + //|2: // Check for empty hash part first. Otherwise call C function. + //| cmp dword TAB:RB->hmask, 0; je ->fff_res0 + //| mov CARG1, TAB:RB + //| mov RB, BASE // Save BASE. + //| mov CARG2d, RAd // Caveat: CARG2 == BASE + //| call extern lj_tab_getinth // (GCtab *t, int32_t key) + //| // cTValue * or NULL returned in eax (RD). + //| mov BASE, RB + //| test RD, RD + //| jnz <1 + //|->fff_res0: + //| mov RDd, 1+0 + //| jmp ->fff_res + //| + //|.ffunc_1 ipairs + dasm_put(Dst, 3316, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->hmask), 1+0); +#line 1243 "vm_x64.dasc" + //| mov TAB:RB, [BASE] + //| mov TMPR, TAB:RB + //| checktab TAB:RB, ->fff_fallback + dasm_put(Dst, 3420, 1+1, LJ_TTAB); +#line 1246 "vm_x64.dasc" +#if LJ_52 + //| cmp aword TAB:RB->metatable, 0; jne ->fff_fallback + dasm_put(Dst, 3183, Dt6(->metatable)); +#line 1248 "vm_x64.dasc" +#endif + //| mov CFUNC:RD, [BASE-16] + //| cleartp CFUNC:RD + //| mov CFUNC:RD, CFUNC:RD->upvalue[0] + //| settp CFUNC:RD, LJ_TFUNC + //| mov PC, [BASE-8] + //| mov [BASE-16], CFUNC:RD + //| mov [BASE-8], TMPR + //| mov qword [BASE], 0 + //| mov RDd, 1+3 + //| jmp ->fff_res + //| + //|//-- Base library: catch errors ---------------------------------------- + //| + //|.ffunc_1 pcall + //| lea RA, [BASE+16] + //| sub NARGS:RDd, 1 + //| mov PCd, 16+FRAME_PCALL + //|1: + //| movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)] + //| shr RB, HOOK_ACTIVE_SHIFT + //| and RB, 1 + //| add PC, RB // Remember active hook before pcall. + //| // Note: this does a (harmless) copy of the function to the PC slot, too. + //| mov KBASE, RD + //|2: + //| mov RB, [RA+KBASE*8-24] + //| mov [RA+KBASE*8-16], RB + //| sub KBASE, 1 + //| ja <2 + //| jmp ->vm_call_dispatch + //| + //|.ffunc_2 xpcall + dasm_put(Dst, 3463, Dt8(->upvalue[0]), (unsigned int)(((uint64_t)LJ_TFUNC<<47)), (unsigned int)((((uint64_t)LJ_TFUNC<<47))>>32), 1+3, 1+1, 16+FRAME_PCALL, DISPATCH_GL(hookmask), HOOK_ACTIVE_SHIFT); +#line 1281 "vm_x64.dasc" + //| mov LFUNC:RA, [BASE+8] + //| checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback + //| mov LFUNC:RB, [BASE] // Swap function and traceback. + //| mov [BASE], LFUNC:RA + //| mov [BASE+8], LFUNC:RB + //| lea RA, [BASE+24] + //| sub NARGS:RDd, 2 + //| mov PCd, 24+FRAME_PCALL + //| jmp <1 + //| + //|//-- Coroutine library -------------------------------------------------- + //| + //|.macro coroutine_resume_wrap, resume + //|.if resume + //|.ffunc_1 coroutine_resume + //| mov L:RB, [BASE] + //| cleartp L:RB + //|.else + //|.ffunc coroutine_wrap_aux + //| mov CFUNC:RB, [BASE-16] + //| cleartp CFUNC:RB + //| mov L:RB, CFUNC:RB->upvalue[0].gcr + //| cleartp L:RB + //|.endif + //| mov PC, [BASE-8] + //| mov SAVE_PC, PC + //| mov TMP1, L:RB + //|.if resume + //| checktptp [BASE], LJ_TTHREAD, ->fff_fallback + //|.endif + //| cmp aword L:RB->cframe, 0; jne ->fff_fallback + //| cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback + //| mov RA, L:RB->top + //| je >1 // Status != LUA_YIELD (i.e. 0)? + //| cmp RA, L:RB->base // Check for presence of initial func. + //| je ->fff_fallback + //| mov PC, [RA-8] // Move initial function up. + //| mov [RA], PC + //| add RA, 8 + //|1: + //|.if resume + //| lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread). + //|.else + //| lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1). + //|.endif + //| cmp PC, L:RB->maxstack; ja ->fff_fallback + //| mov L:RB->top, PC + //| + //| mov L:RB, SAVE_L + //| mov L:RB->base, BASE + //|.if resume + //| add BASE, 8 // Keep resumed thread in stack for GC. + //|.endif + //| mov L:RB->top, BASE + //|.if resume + //| lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move. + //|.else + //| lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move. + //|.endif + //| sub RB, PC // Relative to PC. + //| + //| cmp PC, RA + //| je >3 + //|2: // Move args to coroutine. + //| mov RC, [PC+RB] + //| mov [PC-8], RC + //| sub PC, 8 + //| cmp PC, RA + //| jne <2 + //|3: + //| mov CARG2, RA + //| mov CARG1, TMP1 + //| call ->vm_resume // (lua_State *L, TValue *base, 0, 0) + //| + //| mov L:RB, SAVE_L + //| mov L:PC, TMP1 + //| mov BASE, L:RB->base + //| mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + //| set_vmstate INTERP + //| + //| cmp eax, LUA_YIELD + //| ja >8 + //|4: + //| mov RA, L:PC->base + //| mov KBASE, L:PC->top + //| mov L:PC->top, RA // Clear coroutine stack. + //| mov PC, KBASE + //| sub PC, RA + //| je >6 // No results? + //| lea RD, [BASE+PC] + //| shr PCd, 3 + //| cmp RD, L:RB->maxstack + //| ja >9 // Need to grow stack? + //| + //| mov RB, BASE + //| sub RB, RA + //|5: // Move results from coroutine. + //| mov RD, [RA] + //| mov [RA+RB], RD + //| add RA, 8 + //| cmp RA, KBASE + //| jne <5 + //|6: + //|.if resume + //| lea RDd, [PCd+2] // nresults+1 = 1 + true + results. + //| mov_true ITYPE // Prepend true to results. + //| mov [BASE-8], ITYPE + //|.else + //| lea RDd, [PCd+1] // nresults+1 = 1 + results. + //|.endif + //|7: + //| mov PC, SAVE_PC + //| mov MULTRES, RDd + //|.if resume + //| mov RA, -8 + //|.else + //| xor RAd, RAd + //|.endif + //| test PCd, FRAME_TYPE + //| jz ->BC_RET_Z + //| jmp ->vm_return + //| + //|8: // Coroutine returned with error (at co->top-1). + //|.if resume + //| mov_false ITYPE // Prepend false to results. + //| mov [BASE-8], ITYPE + //| mov RA, L:PC->top + //| sub RA, 8 + //| mov L:PC->top, RA // Clear error from coroutine stack. + //| // Copy error message. + //| mov RD, [RA] + //| mov [BASE], RD + //| mov RDd, 1+2 // nresults+1 = 1 + false + error. + //| jmp <7 + //|.else + //| mov CARG2, L:PC + //| mov CARG1, L:RB + //| call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) + //| // Error function does not return. + //|.endif + //| + //|9: // Handle stack expansion on return from yield. + //| mov L:RA, TMP1 + //| mov L:RA->top, KBASE // Undo coroutine stack clearing. + //| mov CARG2, PC + //| mov CARG1, L:RB + //| call extern lj_state_growstack // (lua_State *L, int n) + //| mov L:PC, TMP1 + //| mov BASE, L:RB->base + //| jmp <4 // Retry the stack move. + //|.endmacro + //| + //| coroutine_resume_wrap 1 // coroutine.resume + dasm_put(Dst, 3588, 2+1, LJ_TFUNC, 24+FRAME_PCALL, 1+1, LJ_TTHREAD, Dt1(->cframe)); + dasm_put(Dst, 3703, Dt1(->status), LUA_YIELD, Dt1(->top), Dt1(->base), Dt1(->maxstack), Dt1(->top), Dt1(->base), Dt1(->top)); + dasm_put(Dst, 3794, Dt1(->base), DISPATCH_GL(cur_L), DISPATCH_GL(vmstate), ~LJ_VMST_INTERP, LUA_YIELD, Dt1(->base), Dt1(->top), Dt1(->top), Dt1(->maxstack)); + dasm_put(Dst, 3895, (unsigned int)((int64_t)~((uint64_t)2<<47)), (unsigned int)(((int64_t)~((uint64_t)2<<47))>>32), FRAME_TYPE, (unsigned int)((int64_t)~((uint64_t)1<<47)), (unsigned int)(((int64_t)~((uint64_t)1<<47))>>32), Dt1(->top), Dt1(->top)); +#line 1434 "vm_x64.dasc" + //| coroutine_resume_wrap 0 // coroutine.wrap + dasm_put(Dst, 3998, 1+2, Dt1(->top), Dt1(->base), Dt8(->upvalue[0].gcr), Dt1(->cframe), Dt1(->status), LUA_YIELD, Dt1(->top)); + dasm_put(Dst, 4112, Dt1(->base), Dt1(->maxstack), Dt1(->top), Dt1(->base), Dt1(->top), Dt1(->base)); + dasm_put(Dst, 4230, DISPATCH_GL(cur_L), DISPATCH_GL(vmstate), ~LJ_VMST_INTERP, LUA_YIELD, Dt1(->base), Dt1(->top), Dt1(->top), Dt1(->maxstack)); +#line 1435 "vm_x64.dasc" + //| + //|.ffunc coroutine_yield + //| mov L:RB, SAVE_L + //| test aword L:RB->cframe, CFRAME_RESUME + //| jz ->fff_fallback + //| mov L:RB->base, BASE + //| lea RD, [BASE+NARGS:RD*8-8] + //| mov L:RB->top, RD + //| xor RDd, RDd + dasm_put(Dst, 4318, FRAME_TYPE, Dt1(->top), Dt1(->base), Dt1(->cframe), CFRAME_RESUME, Dt1(->base), Dt1(->top)); +#line 1444 "vm_x64.dasc" + //| mov aword L:RB->cframe, RD + //| mov al, LUA_YIELD + //| mov byte L:RB->status, al + //| jmp ->vm_leave_unw + //| + //|//-- Math library ------------------------------------------------------- + //| + //| .ffunc_1 math_abs + //| mov RB, [BASE] + //| checknum RB, ->fff_fallback + //| shl RB, 1 + //| shr RB, 1 + //| mov PC, [BASE-8] + //| mov [BASE-16], RB + //| jmp ->fff_res1 + //| + //|.ffunc_n math_sqrt, sqrtsd + dasm_put(Dst, 4423, Dt1(->cframe), LUA_YIELD, Dt1(->status), 1+1, LJ_TISNUM, 1+1, LJ_TISNUM); +#line 1461 "vm_x64.dasc" + //|->fff_resxmm0: + //| mov PC, [BASE-8] + //| movsd qword [BASE-16], xmm0 + //| // fallthrough + //| + //|->fff_res1: + //| mov RDd, 1+1 + //|->fff_res: + //| mov MULTRES, RDd + //|->fff_res_: + //| test PCd, FRAME_TYPE + //| jnz >7 + //|5: + //| cmp PC_RB, RDL // More results expected? + //| ja >6 + //| // Adjust BASE. KBASE is assumed to be set for the calling frame. + //| movzx RAd, PC_RA + //| neg RA + //| lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 + //| ins_next + //| + //|6: // Fill up results with nil. + //| mov aword [BASE+RD*8-24], LJ_TNIL + //| add RD, 1 + //| jmp <5 + //| + //|7: // Non-standard return case. + //| mov RA, -16 // Results start at BASE+RA = BASE-16. + //| jmp ->vm_return + //| + //|.macro math_round, func + //| .ffunc math_ .. func + //| checknumtp [BASE], ->fff_fallback + //| movsd xmm0, qword [BASE] + //| call ->vm_ .. func .. _sse + //| jmp ->fff_resxmm0 + //|.endmacro + //| + //| math_round floor + dasm_put(Dst, 4514, 1+1, FRAME_TYPE, LJ_TNIL); +#line 1500 "vm_x64.dasc" + //| math_round ceil + //| + //|.ffunc math_log + //| cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument. + //| checknumtp [BASE], ->fff_fallback + dasm_put(Dst, 4638, LJ_TISNUM, LJ_TISNUM, 1+1); +#line 1505 "vm_x64.dasc" + //| movsd xmm0, qword [BASE] + //| mov RB, BASE + //| call extern log + //| mov BASE, RB + //| jmp ->fff_resxmm0 + //| + //|.macro math_extern, func + //| .ffunc_n math_ .. func + //| mov RB, BASE + //| call extern func + //| mov BASE, RB + //| jmp ->fff_resxmm0 + //|.endmacro + //| + //|.macro math_extern2, func + //| .ffunc_nn math_ .. func + //| mov RB, BASE + //| call extern func + //| mov BASE, RB + //| jmp ->fff_resxmm0 + //|.endmacro + //| + //| math_extern log10 + //| math_extern exp + dasm_put(Dst, 4709, LJ_TISNUM, 1+1, LJ_TISNUM, 1+1, LJ_TISNUM); +#line 1529 "vm_x64.dasc" + //| math_extern sin + //| math_extern cos + dasm_put(Dst, 4817, 1+1, LJ_TISNUM, 1+1, LJ_TISNUM); +#line 1531 "vm_x64.dasc" + //| math_extern tan + //| math_extern asin + //| math_extern acos + dasm_put(Dst, 4916, 1+1, LJ_TISNUM, 1+1, LJ_TISNUM); +#line 1534 "vm_x64.dasc" + //| math_extern atan + //| math_extern sinh + dasm_put(Dst, 5033, 1+1, LJ_TISNUM, 1+1, LJ_TISNUM, 1+1); +#line 1536 "vm_x64.dasc" + //| math_extern cosh + //| math_extern tanh + dasm_put(Dst, 5132, LJ_TISNUM, 1+1, LJ_TISNUM, 1+1); +#line 1538 "vm_x64.dasc" + //| math_extern2 pow + //| math_extern2 atan2 + dasm_put(Dst, 5231, LJ_TISNUM, 2+1, LJ_TISNUM, LJ_TISNUM, 2+1); +#line 1540 "vm_x64.dasc" + //| math_extern2 fmod + dasm_put(Dst, 5346, LJ_TISNUM, LJ_TISNUM, 2+1, LJ_TISNUM, LJ_TISNUM); +#line 1541 "vm_x64.dasc" + //| + //|.ffunc_2 math_ldexp + //| checknumtp [BASE], ->fff_fallback + //| checknumtp [BASE+8], ->fff_fallback + //| fld qword [BASE+8] + //| fld qword [BASE] + //| fscale + //| fpop1 + //| mov PC, [BASE-8] + //| fstp qword [BASE-16] + //| jmp ->fff_res1 + //| + //|.ffunc_n math_frexp + dasm_put(Dst, 5457, 2+1, LJ_TISNUM, LJ_TISNUM, 1+1, LJ_TISNUM); +#line 1554 "vm_x64.dasc" + //| mov RB, BASE + //| lea CARG1, TMP1 + //| call extern frexp + //| mov BASE, RB + //| mov RBd, TMP1d + //| mov PC, [BASE-8] + //| movsd qword [BASE-16], xmm0 + //| cvtsi2sd xmm1, RBd + //| movsd qword [BASE-8], xmm1 + //| mov RDd, 1+2 + //| jmp ->fff_res + //| + //|.ffunc_n math_modf + //| mov RB, BASE + //| lea CARG1, [BASE-16] + //| call extern modf + //| mov BASE, RB + //| mov PC, [BASE-8] + //| movsd qword [BASE-8], xmm0 + //| mov RDd, 1+2 + //| jmp ->fff_res + //| + //|.macro math_minmax, name, cmovop, sseop + //| .ffunc name + //| mov RAd, 2 + //| checknumtp [BASE], ->fff_fallback + //| + //| movsd xmm0, qword [BASE] + //|5: // Handle numbers or integers. + //| cmp RAd, RDd; jae ->fff_resxmm0 + //| checknumtp [BASE+RA*8-8], ->fff_fallback + //|6: + //| movsd xmm1, qword [BASE+RA*8-8] + //|7: + //| sseop xmm0, xmm1 + //| add RAd, 1 + //| jmp <5 + //|.endmacro + //| + //| math_minmax math_min, cmovg, minsd + dasm_put(Dst, 5575, 1+2, 1+1, LJ_TISNUM, 1+2, LJ_TISNUM); +#line 1594 "vm_x64.dasc" + //| math_minmax math_max, cmovl, maxsd + dasm_put(Dst, 5723, LJ_TISNUM, LJ_TISNUM, LJ_TISNUM); +#line 1595 "vm_x64.dasc" + //| + //|//-- String library ----------------------------------------------------- + //| + //|.ffunc string_byte // Only handle the 1-arg case here. + //| cmp NARGS:RDd, 1+1; jne ->fff_fallback + //| mov STR:RB, [BASE] + //| checkstr STR:RB, ->fff_fallback + //| mov PC, [BASE-8] + //| cmp dword STR:RB->len, 1 + //| jb ->fff_res0 // Return no results for empty string. + //| movzx RBd, byte STR:RB[1] + //| cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0 + //| + //|.ffunc string_char // Only handle the 1-arg case here. + //| ffgccheck + dasm_put(Dst, 5840, 1+1, LJ_TSTR, Dt5(->len), Dt5([1]), DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold)); +#line 1610 "vm_x64.dasc" + //| cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg. + //| checknumtp [BASE], ->fff_fallback + //| cvttsd2si RBd, qword [BASE] + //| cmp RBd, 255; ja ->fff_fallback + //| mov TMP1d, RBd + //| mov TMPRd, 1 + //| lea RD, TMP1 // Points to stack. Little-endian. + //|->fff_newstr: + //| mov L:RB, SAVE_L + //| mov L:RB->base, BASE + //| mov CARG3d, TMPRd // Zero-extended to size_t. + //| mov CARG2, RD + //| mov CARG1, L:RB + //| mov SAVE_PC, PC + //| call extern lj_str_new // (lua_State *L, char *str, size_t l) + //|->fff_resstr: + //| // GCstr * returned in eax (RD). + //| mov BASE, L:RB->base + //| mov PC, [BASE-8] + //| settp STR:RD, LJ_TSTR + dasm_put(Dst, 5939, 1+1, LJ_TISNUM, Dt1(->base), Dt1(->base), (unsigned int)(((uint64_t)LJ_TSTR<<47)), (unsigned int)((((uint64_t)LJ_TSTR<<47))>>32)); +#line 1630 "vm_x64.dasc" + //| mov [BASE-16], STR:RD + //| jmp ->fff_res1 + //| + //|.ffunc string_sub + //| ffgccheck + //| mov TMPRd, -1 + //| cmp NARGS:RDd, 1+2; jb ->fff_fallback + //| jna >1 + //| checknumtp [BASE+16], ->fff_fallback + //| cvttsd2si TMPRd, qword [BASE+16] + //|1: + //| mov STR:RB, [BASE] + dasm_put(Dst, 6049, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), 1+2, LJ_TISNUM); +#line 1642 "vm_x64.dasc" + //| checkstr STR:RB, ->fff_fallback + //| checknumtp [BASE+8], ->fff_fallback + //| cvttsd2si RAd, qword [BASE+8] + //| mov RCd, STR:RB->len + //| cmp RCd, TMPRd // len < end? (unsigned compare) + //| jb >5 + //|2: + //| test RAd, RAd // start <= 0? + //| jle >7 + //|3: + //| sub TMPRd, RAd // start > end? + //| jl ->fff_emptystr + //| lea RD, [STR:RB+RAd+#STR-1] + //| add TMPRd, 1 + //|4: + //| jmp ->fff_newstr + //| + //|5: // Negative end or overflow. + //| jl >6 + dasm_put(Dst, 6130, LJ_TSTR, LJ_TISNUM, Dt5(->len), sizeof(GCstr)-1); +#line 1661 "vm_x64.dasc" + //| lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1) + //| jmp <2 + //|6: // Overflow. + //| mov TMPRd, RCd // end = len + //| jmp <2 + //| + //|7: // Negative start or underflow. + //| je >8 + //| add RAd, RCd // start = start+(len+1) + //| add RAd, 1 + //| jg <3 // start > 0? + //|8: // Underflow. + //| mov RAd, 1 // start = 1 + //| jmp <3 + //| + //|->fff_emptystr: // Range underflow. + //| xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok. + //| jmp <4 + //| + //|.macro ffstring_op, name + //| .ffunc_1 string_ .. name + //| ffgccheck + //| mov STR:CARG2, [BASE] + //| checkstr STR:CARG2, ->fff_fallback + //| mov L:RB, SAVE_L + //| lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] + //| mov L:RB->base, BASE + //| mov RC, SBUF:CARG1->b + //| mov SBUF:CARG1->L, L:RB + //| mov SBUF:CARG1->p, RC + //| mov SAVE_PC, PC + //| call extern lj_buf_putstr_ .. name + //| mov CARG1, rax + //| call extern lj_buf_tostr + //| jmp ->fff_resstr + //|.endmacro + //| + //|ffstring_op reverse + dasm_put(Dst, 6230); +#line 1699 "vm_x64.dasc" + //|ffstring_op lower + dasm_put(Dst, 6290, 1+1, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), LJ_TSTR, DISPATCH_GL(tmpbuf), Dt1(->base), DtE(->b), DtE(->L), DtE(->p)); +#line 1700 "vm_x64.dasc" + //|ffstring_op upper + dasm_put(Dst, 6393, 1+1, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), LJ_TSTR, DISPATCH_GL(tmpbuf), Dt1(->base), DtE(->b), DtE(->L), DtE(->p)); +#line 1701 "vm_x64.dasc" + //| + //|//-- Bit library -------------------------------------------------------- + //| + //|.macro .ffunc_bit, name, kind, fdef + //| fdef name + //|.if kind == 2 + //| sseconst_tobit xmm1, RB + //|.endif + //| checknumtp [BASE], ->fff_fallback + //| movsd xmm0, qword [BASE] + //|.if kind < 2 + //| sseconst_tobit xmm1, RB + //|.endif + //| addsd xmm0, xmm1 + //| movd RBd, xmm0 + //|2: + //|.endmacro + //| + //|.macro .ffunc_bit, name, kind + //| .ffunc_bit name, kind, .ffunc_1 + //|.endmacro + //| + //|.ffunc_bit bit_tobit, 0 + dasm_put(Dst, 6496, 1+1, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), LJ_TSTR, DISPATCH_GL(tmpbuf), Dt1(->base), DtE(->b), DtE(->L), DtE(->p)); +#line 1724 "vm_x64.dasc" + //| jmp ->fff_resbit + //| + //|.macro .ffunc_bit_op, name, ins + //| .ffunc_bit name, 2 + //| mov TMPRd, NARGS:RDd // Save for fallback. + //| lea RD, [BASE+NARGS:RD*8-16] + //|1: + //| cmp RD, BASE + //| jbe ->fff_resbit + //| checknumtp [RD], ->fff_fallback_bit_op + //| movsd xmm0, qword [RD] + //| addsd xmm0, xmm1 + //| movd RAd, xmm0 + //| ins RBd, RAd + //| sub RD, 8 + //| jmp <1 + //|.endmacro + //| + //|.ffunc_bit_op bit_band, and + dasm_put(Dst, 6599, 1+1, LJ_TISNUM, (unsigned int)(U64x(43380000,00000000)), (unsigned int)((U64x(43380000,00000000))>>32), 1+1, (unsigned int)(U64x(43380000,00000000)), (unsigned int)((U64x(43380000,00000000))>>32), LJ_TISNUM); +#line 1743 "vm_x64.dasc" + //|.ffunc_bit_op bit_bor, or + dasm_put(Dst, 6690, LJ_TISNUM, 1+1, (unsigned int)(U64x(43380000,00000000)), (unsigned int)((U64x(43380000,00000000))>>32), LJ_TISNUM); +#line 1744 "vm_x64.dasc" + //|.ffunc_bit_op bit_bxor, xor + dasm_put(Dst, 6818, LJ_TISNUM, 1+1, (unsigned int)(U64x(43380000,00000000)), (unsigned int)((U64x(43380000,00000000))>>32), LJ_TISNUM); +#line 1745 "vm_x64.dasc" + //| + //|.ffunc_bit bit_bswap, 1 + //| bswap RBd + //| jmp ->fff_resbit + //| + //|.ffunc_bit bit_bnot, 1 + dasm_put(Dst, 6941, LJ_TISNUM, 1+1, LJ_TISNUM, (unsigned int)(U64x(43380000,00000000)), (unsigned int)((U64x(43380000,00000000))>>32)); +#line 1751 "vm_x64.dasc" + //| not RBd + //|->fff_resbit: + //| cvtsi2sd xmm0, RBd + //| jmp ->fff_resxmm0 + //| + //|->fff_fallback_bit_op: + //| mov NARGS:RDd, TMPRd // Restore for fallback + //| jmp ->fff_fallback + //| + //|.macro .ffunc_bit_sh, name, ins + //| .ffunc_nn name + //| sseconst_tobit xmm2, RB + //| addsd xmm0, xmm2 + //| addsd xmm1, xmm2 + //| movd RBd, xmm0 + //| movd RAd, xmm1 + //| ins RBd, cl // Assumes RA is ecx. + //| jmp ->fff_resbit + //|.endmacro + //| + //|.ffunc_bit_sh bit_lshift, shl + dasm_put(Dst, 7050, 1+1, LJ_TISNUM, (unsigned int)(U64x(43380000,00000000)), (unsigned int)((U64x(43380000,00000000))>>32), 2+1); +#line 1772 "vm_x64.dasc" + //|.ffunc_bit_sh bit_rshift, shr + dasm_put(Dst, 7134, LJ_TISNUM, LJ_TISNUM, (unsigned int)(U64x(43380000,00000000)), (unsigned int)((U64x(43380000,00000000))>>32), 2+1, LJ_TISNUM, LJ_TISNUM); +#line 1773 "vm_x64.dasc" + //|.ffunc_bit_sh bit_arshift, sar + //|.ffunc_bit_sh bit_rol, rol + dasm_put(Dst, 7259, (unsigned int)(U64x(43380000,00000000)), (unsigned int)((U64x(43380000,00000000))>>32), 2+1, LJ_TISNUM, LJ_TISNUM, (unsigned int)(U64x(43380000,00000000)), (unsigned int)((U64x(43380000,00000000))>>32)); +#line 1775 "vm_x64.dasc" + //|.ffunc_bit_sh bit_ror, ror + dasm_put(Dst, 7397, 2+1, LJ_TISNUM, LJ_TISNUM, (unsigned int)(U64x(43380000,00000000)), (unsigned int)((U64x(43380000,00000000))>>32), 2+1, LJ_TISNUM); +#line 1776 "vm_x64.dasc" + //| + //|//----------------------------------------------------------------------- + //| + //|->fff_fallback_2: + //| mov NARGS:RDd, 1+2 // Other args are ignored, anyway. + //| jmp ->fff_fallback + //|->fff_fallback_1: + //| mov NARGS:RDd, 1+1 // Other args are ignored, anyway. + //|->fff_fallback: // Call fast function fallback handler. + //| // BASE = new base, RD = nargs+1 + //| mov L:RB, SAVE_L + //| mov PC, [BASE-8] // Fallback may overwrite PC. + //| mov SAVE_PC, PC // Redundant (but a defined value). + //| mov L:RB->base, BASE + //| lea RD, [BASE+NARGS:RD*8-8] + //| lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler. + //| mov L:RB->top, RD + //| mov CFUNC:RD, [BASE-16] + //| cleartp CFUNC:RD + //| cmp RA, L:RB->maxstack + //| ja >5 // Need to grow stack. + //| mov CARG1, L:RB + dasm_put(Dst, 7512, LJ_TISNUM, (unsigned int)(U64x(43380000,00000000)), (unsigned int)((U64x(43380000,00000000))>>32), 1+2, 1+1, Dt1(->base), 8*LUA_MINSTACK, Dt1(->top), Dt1(->maxstack)); +#line 1798 "vm_x64.dasc" + //| call aword CFUNC:RD->f // (lua_State *L) + //| mov BASE, L:RB->base + //| // Either throws an error, or recovers and returns -1, 0 or nresults+1. + //| test RDd, RDd; jg ->fff_res // Returned nresults+1? + //|1: + //| mov RA, L:RB->top + //| sub RA, BASE + //| shr RAd, 3 + //| test RDd, RDd + //| lea NARGS:RDd, [RAd+1] + //| mov LFUNC:RB, [BASE-16] + //| jne ->vm_call_tail // Returned -1? + //| cleartp LFUNC:RB + //| ins_callt // Returned 0: retry fast path. + //| + //|// Reconstruct previous base for vmeta_call during tailcall. + //|->vm_call_tail: + //| mov RA, BASE + //| test PCd, FRAME_TYPE + //| jnz >3 + //| movzx RBd, PC_RA + //| neg RB + //| lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8 + //| jmp ->vm_call_dispatch // Resolve again for tailcall. + //|3: + //| mov RB, PC + //| and RB, -8 + //| sub BASE, RB + //| jmp ->vm_call_dispatch // Resolve again for tailcall. + //| + //|5: // Grow stack for fallback handler. + //| mov CARG2d, LUA_MINSTACK + dasm_put(Dst, 7643, Dt8(->f), Dt1(->base), Dt1(->top), Dt7(->pc), FRAME_TYPE); +#line 1830 "vm_x64.dasc" + //| mov CARG1, L:RB + //| call extern lj_state_growstack // (lua_State *L, int n) + //| mov BASE, L:RB->base + //| xor RDd, RDd // Simulate a return 0. + //| jmp <1 // Dumb retry (goes through ff first). + //| + //|->fff_gcstep: // Call GC step function. + //| // BASE = new base, RD = nargs+1 + //| pop RB // Must keep stack at same level. + //| mov TMP1, RB // Save return address + //| mov L:RB, SAVE_L + //| mov SAVE_PC, PC // Redundant (but a defined value). + //| mov L:RB->base, BASE + //| lea RD, [BASE+NARGS:RD*8-8] + //| mov CARG1, L:RB + //| mov L:RB->top, RD + //| call extern lj_gc_step // (lua_State *L) + //| mov BASE, L:RB->base + //| mov RD, L:RB->top + //| sub RD, BASE + //| shr RDd, 3 + //| add NARGS:RDd, 1 + //| mov RB, TMP1 + //| push RB // Restore return address. + //| ret + //| + //|//----------------------------------------------------------------------- + //|//-- Special dispatch targets ------------------------------------------- + //|//----------------------------------------------------------------------- + //| + //|->vm_record: // Dispatch target for recording phase. + //| // Decrement the hookcount for consistency, but always do the call. + //| test RDL, HOOK_ACTIVE + //| jnz >1 + //| test RDL, LUA_MASKLINE|LUA_MASKCOUNT + //| jz >1 + //| dec dword [DISPATCH+DISPATCH_GL(hookcount)] + //| jmp >1 + //| + //|->vm_rethook: // Dispatch target for return hooks. + //| movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] + dasm_put(Dst, 7774, LUA_MINSTACK, Dt1(->base), Dt1(->base), Dt1(->top), Dt1(->base), Dt1(->top), HOOK_ACTIVE, LUA_MASKLINE|LUA_MASKCOUNT, DISPATCH_GL(hookcount)); +#line 1871 "vm_x64.dasc" + //| test RDL, HOOK_ACTIVE // Hook already active? + //| jnz >5 + //| jmp >1 + //| + //|->vm_inshook: // Dispatch target for instr/line hooks. + //| movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] + //| test RDL, HOOK_ACTIVE // Hook already active? + //| jnz >5 + //| + //| test RDL, LUA_MASKLINE|LUA_MASKCOUNT + //| jz >5 + //| dec dword [DISPATCH+DISPATCH_GL(hookcount)] + //| jz >1 + //| test RDL, LUA_MASKLINE + //| jz >5 + //|1: + //| mov L:RB, SAVE_L + dasm_put(Dst, 7882, DISPATCH_GL(hookmask), HOOK_ACTIVE, DISPATCH_GL(hookmask), HOOK_ACTIVE, LUA_MASKLINE|LUA_MASKCOUNT, DISPATCH_GL(hookcount), LUA_MASKLINE); +#line 1888 "vm_x64.dasc" + //| mov L:RB->base, BASE + //| mov CARG2, PC // Caveat: CARG2 == BASE + //| mov CARG1, L:RB + //| // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + //| call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) + //|3: + //| mov BASE, L:RB->base + //|4: + //| movzx RAd, PC_RA + //|5: + //| movzx OP, PC_OP + //| movzx RDd, PC_RD + //| jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. + //| + //|->cont_hook: // Continue from hook yield. + //| add PC, 4 + //| mov RA, [RB-40] + //| mov MULTRES, RAd // Restore MULTRES for *M ins. + //| jmp <4 + //| + //|->vm_hotloop: // Hot loop counter underflow. + //| mov LFUNC:RB, [BASE-16] // Same as curr_topL(L). + //| cleartp LFUNC:RB + //| mov RB, LFUNC:RB->pc + //| movzx RDd, byte [RB+PC2PROTO(framesize)] + //| lea RD, [BASE+RD*8] + //| mov L:RB, SAVE_L + //| mov L:RB->base, BASE + //| mov L:RB->top, RD + //| mov CARG2, PC + //| lea CARG1, [DISPATCH+GG_DISP2J] + //| mov aword [DISPATCH+DISPATCH_J(L)], L:RB + //| mov SAVE_PC, PC + //| call extern lj_trace_hot // (jit_State *J, const BCIns *pc) + //| jmp <3 + //| + //|->vm_callhook: // Dispatch target for call hooks. + //| mov SAVE_PC, PC + dasm_put(Dst, 7934, Dt1(->base), Dt1(->base), GG_DISP2STATIC, Dt7(->pc), PC2PROTO(framesize), Dt1(->base), Dt1(->top), GG_DISP2J, DISPATCH_J(L)); +#line 1926 "vm_x64.dasc" + //| jmp >1 + //| + //|->vm_hotcall: // Hot call counter underflow. + //| mov SAVE_PC, PC + //| or PC, 1 // Marker for hot call. + //|1: + //| lea RD, [BASE+NARGS:RD*8-8] + //| mov L:RB, SAVE_L + //| mov L:RB->base, BASE + //| mov L:RB->top, RD + //| mov CARG2, PC + //| mov CARG1, L:RB + //| call extern lj_dispatch_call // (lua_State *L, const BCIns *pc) + //| // ASMFunction returned in eax/rax (RD). + //| mov SAVE_PC, 0 // Invalidate for subsequent line hook. + //| and PC, -2 + //| mov BASE, L:RB->base + //| mov RA, RD + //| mov RD, L:RB->top + //| sub RD, BASE + //| mov RB, RA + //| movzx RAd, PC_RA + //| shr RDd, 3 + //| add NARGS:RDd, 1 + //| jmp RB + //| + //|->cont_stitch: // Trace stitching. + //| // BASE = base, RC = result, RB = mbase + //| mov TRACE:ITYPE, [RB-40] // Save previous trace. + //| cleartp TRACE:ITYPE + //| mov TMPRd, MULTRES + //| movzx RAd, PC_RA + //| lea RA, [BASE+RA*8] // Call base. + //| sub TMPRd, 1 + //| jz >2 + //|1: // Move results down. + //| mov RB, [RC] + //| mov [RA], RB + //| add RC, 8 + //| add RA, 8 + //| sub TMPRd, 1 + //| jnz <1 + //|2: + //| movzx RCd, PC_RA + //| movzx RBd, PC_RB + //| add RC, RB + //| lea RC, [BASE+RC*8-8] + //|3: + //| cmp RC, RA + //| ja >9 // More results wanted? + //| + //| test TRACE:ITYPE, TRACE:ITYPE + //| jz ->cont_nop + //| movzx RBd, word TRACE:ITYPE->traceno + dasm_put(Dst, 8072, Dt1(->base), Dt1(->top), Dt1(->base), Dt1(->top)); +#line 1980 "vm_x64.dasc" + //| movzx RDd, word TRACE:ITYPE->link + //| cmp RDd, RBd + //| je ->cont_nop // Blacklisted. + //| test RDd, RDd + //| jne =>BC_JLOOP // Jump to stitched trace. + //| + //| // Stitch a new trace to the previous trace. + //| mov [DISPATCH+DISPATCH_J(exitno)], RB + //| mov L:RB, SAVE_L + //| mov L:RB->base, BASE + //| mov CARG2, PC + //| lea CARG1, [DISPATCH+GG_DISP2J] + //| mov aword [DISPATCH+DISPATCH_J(L)], L:RB + //| call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + //| mov BASE, L:RB->base + //| jmp ->cont_nop + //| + //|9: // Fill up results with nil. + //| mov aword [RA], LJ_TNIL + //| add RA, 8 + //| jmp <3 + //| + //|//----------------------------------------------------------------------- + //|//-- Trace exit handler ------------------------------------------------- + //|//----------------------------------------------------------------------- + //| + //|// Called from an exit stub with the exit number on the stack. + //|// The 16 bit exit number is stored with two (sign-extended) push imm8. + //|->vm_exit_handler: + //| push r13; push r12 + //| push r11; push r10; push r9; push r8 + //| push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp + //| push rbx; push rdx; push rcx; push rax + //| movzx RCd, byte [rbp-8] // Reconstruct exit number. + //| mov RCH, byte [rbp-16] + //| mov [rbp-8], r15; mov [rbp-16], r14 + //| // DISPATCH is preserved on-trace in LJ_GC64 mode. + //| mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. + //| set_vmstate EXIT + dasm_put(Dst, 8269, DtD(->traceno), DtD(->link), BC_JLOOP, DISPATCH_J(exitno), Dt1(->base), GG_DISP2J, DISPATCH_J(L), Dt1(->base), LJ_TNIL, DISPATCH_GL(vmstate)); +#line 2019 "vm_x64.dasc" + //| mov [DISPATCH+DISPATCH_J(exitno)], RCd + //| mov [DISPATCH+DISPATCH_J(parent)], RAd + //| mov dword [DISPATCH+DISPATCH_GL(lasttrace)], RAd + //| sub rsp, 16*8 // Room for SSE regs. + //| add rbp, -128 + //| movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 + //| movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 + //| movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10 + //| movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8 + //| movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6 + //| movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4 + //| movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2 + //| movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0 + //| // Caveat: RB is rbp. + //| mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] + //| mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] + //| mov aword [DISPATCH+DISPATCH_J(L)], L:RB + //| mov L:RB->base, BASE + //| mov CARG2, rsp + //| lea CARG1, [DISPATCH+GG_DISP2J] + //| mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 + //| call extern lj_trace_exit // (jit_State *J, ExitState *ex) + //| // MULTRES or negated error code returned in eax (RD). + //| mov RA, L:RB->cframe + //| and RA, CFRAME_RAWMASK + //| mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield). + //| mov BASE, L:RB->base + //| mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC. + //| jmp >1 + //|->vm_exit_interp: + //| // Record which trace exited to the interpreter. + //| mov TMPRd, dword [DISPATCH+DISPATCH_GL(vmstate)] + dasm_put(Dst, 8391, DISPATCH_GL(vmstate), ~LJ_VMST_EXIT, DISPATCH_J(exitno), DISPATCH_J(parent), DISPATCH_GL(lasttrace), 16*8, DISPATCH_GL(cur_L), DISPATCH_GL(jit_base), DISPATCH_J(L), Dt1(->base), GG_DISP2J, DISPATCH_GL(jit_base), Dt1(->cframe), CFRAME_RAWMASK, CFRAME_OFS_L, Dt1(->base), CFRAME_OFS_PC); +#line 2051 "vm_x64.dasc" + //| mov dword [DISPATCH+DISPATCH_GL(lasttrace)], TMPRd + //|->vm_exit_interp_notrack: + //| // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. + //| // Restore additional callee-save registers only used in compiled code. + //| lea RA, [rsp+16] + //|1: + //| mov r13, [RA-8] + //| mov r12, [RA] + //| mov rsp, RA // Reposition stack to C frame. + //| test RDd, RDd; js >9 // Check for error from exit. + //| mov L:RB, SAVE_L + //| mov MULTRES, RDd + //| mov LFUNC:KBASE, [BASE-16] + //| cleartp LFUNC:KBASE + //| mov KBASE, LFUNC:KBASE->pc + //| mov KBASE, [KBASE+PC2PROTO(k)] + //| mov L:RB->base, BASE + //| mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 + //| mov TMPRd, dword [DISPATCH+DISPATCH_GL(vmstate)] + //| set_vmstate INTERP + //| // Modified copy of ins_next which handles function header dispatch, too. + //| mov RCd, [PC] + //| movzx RAd, RCH + //| movzx OP, RCL + //| add PC, 4 + //| shr RCd, 16 + //| cmp OP, BC_FUNCF // Function header? + //| jb >3 + //| cmp OP, BC_FUNCC+2 // Fast function? + //| jae >4 + //|2: + //| mov RCd, MULTRES // RC/RD holds nres+1. + dasm_put(Dst, 8585, DISPATCH_GL(vmstate), DISPATCH_GL(lasttrace), Dt7(->pc), PC2PROTO(k), Dt1(->base), DISPATCH_GL(jit_base), DISPATCH_GL(vmstate), DISPATCH_GL(vmstate), ~LJ_VMST_INTERP, BC_FUNCF, BC_FUNCC+2); +#line 2083 "vm_x64.dasc" + //|3: + //| jmp aword [DISPATCH+OP*8] + //| + //|4: // Check frame below fast function. + //| mov RC, [BASE-8] + //| test RCd, FRAME_TYPE + //| jnz <2 // Trace stitching continuation? + //| // Otherwise set KBASE for Lua function below fast function. + //| movzx RCd, byte [RC-3] + //| neg RC + //| mov LFUNC:KBASE, [BASE+RC*8-32] + //| cleartp LFUNC:KBASE + //| mov KBASE, LFUNC:KBASE->pc + //| mov KBASE, [KBASE+PC2PROTO(k)] + //| jmp <2 + //| + //|9: // Rethrow error from the right C frame. + //| neg RD + //| mov CARG1, L:RB + //| mov CARG2, RD + //| call extern lj_err_throw // (lua_State *L, int errcode) + //| + //|//----------------------------------------------------------------------- + //|//-- Math helper functions ---------------------------------------------- + //|//----------------------------------------------------------------------- + //| + //|// FP value rounding. Called by math.floor/math.ceil fast functions + //|// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. + //|.macro vm_round, name, mode, cond + //|->name: + //|->name .. _sse: + //| sseconst_abs xmm2, RD + //| sseconst_2p52 xmm3, RD + //| movaps xmm1, xmm0 + //| andpd xmm1, xmm2 // |x| + //| ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|. + //| jbe >1 + //| andnpd xmm2, xmm0 // Isolate sign bit. + //|.if mode == 2 // trunc(x)? + //| movaps xmm0, xmm1 + //| addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 + //| subsd xmm1, xmm3 + //| sseconst_1 xmm3, RD + //| cmpsd xmm0, xmm1, 1 // |x| < result? + //| andpd xmm0, xmm3 + //| subsd xmm1, xmm0 // If yes, subtract -1. + //| orpd xmm1, xmm2 // Merge sign bit back in. + //|.else + //| addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 + //| subsd xmm1, xmm3 + //| orpd xmm1, xmm2 // Merge sign bit back in. + //| .if mode == 1 // ceil(x)? + //| sseconst_m1 xmm2, RD // Must subtract -1 to preserve -0. + //| cmpsd xmm0, xmm1, 6 // x > result? + //| .else // floor(x)? + //| sseconst_1 xmm2, RD + //| cmpsd xmm0, xmm1, 1 // x < result? + //| .endif + //| andpd xmm0, xmm2 + //| subsd xmm1, xmm0 // If yes, subtract +-1. + //|.endif + //| movaps xmm0, xmm1 + //|1: + //| ret + //|.endmacro + //| + //| vm_round vm_floor, 0, 1 + dasm_put(Dst, 8704, FRAME_TYPE, Dt7(->pc), PC2PROTO(k), (unsigned int)(U64x(7fffffff,ffffffff)), (unsigned int)((U64x(7fffffff,ffffffff))>>32), (unsigned int)(U64x(43300000,00000000)), (unsigned int)((U64x(43300000,00000000))>>32), (unsigned int)(U64x(3ff00000,00000000)), (unsigned int)((U64x(3ff00000,00000000))>>32)); +#line 2150 "vm_x64.dasc" + //| vm_round vm_ceil, 1, JIT + //| vm_round vm_trunc, 2, JIT + dasm_put(Dst, 8840, (unsigned int)(U64x(7fffffff,ffffffff)), (unsigned int)((U64x(7fffffff,ffffffff))>>32), (unsigned int)(U64x(43300000,00000000)), (unsigned int)((U64x(43300000,00000000))>>32), (unsigned int)(U64x(bff00000,00000000)), (unsigned int)((U64x(bff00000,00000000))>>32), (unsigned int)(U64x(7fffffff,ffffffff)), (unsigned int)((U64x(7fffffff,ffffffff))>>32), (unsigned int)(U64x(43300000,00000000)), (unsigned int)((U64x(43300000,00000000))>>32)); +#line 2152 "vm_x64.dasc" + //| + //|// FP modulo x%y. Called by BC_MOD* and vm_arith. + //|->vm_mod: + //|// Args in xmm0/xmm1, return value in xmm0. + //|// Caveat: xmm0-xmm5 and RC (eax) modified! + //| movaps xmm5, xmm0 + //| divsd xmm0, xmm1 + //| sseconst_abs xmm2, RD + //| sseconst_2p52 xmm3, RD + //| movaps xmm4, xmm0 + //| andpd xmm4, xmm2 // |x/y| + //| ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. + //| jbe >1 + //| andnpd xmm2, xmm0 // Isolate sign bit. + //| addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 + //| subsd xmm4, xmm3 + //| orpd xmm4, xmm2 // Merge sign bit back in. + //| sseconst_1 xmm2, RD + //| cmpsd xmm0, xmm4, 1 // x/y < result? + //| andpd xmm0, xmm2 + //| subsd xmm4, xmm0 // If yes, subtract 1.0. + //| movaps xmm0, xmm5 + //| mulsd xmm1, xmm4 + //| subsd xmm0, xmm1 + //| ret + //|1: + //| mulsd xmm1, xmm0 + //| movaps xmm0, xmm5 + //| subsd xmm0, xmm1 + //| ret + //| + //|// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. + //|->vm_powi_sse: + //| cmp eax, 1; jle >6 // i<=1? + //| // Now 1 < (unsigned)i <= 0x80000000. + //|1: // Handle leading zeros. + //| test eax, 1; jnz >2 + //| mulsd xmm0, xmm0 + dasm_put(Dst, 8989, (unsigned int)(U64x(3ff00000,00000000)), (unsigned int)((U64x(3ff00000,00000000))>>32), (unsigned int)(U64x(7fffffff,ffffffff)), (unsigned int)((U64x(7fffffff,ffffffff))>>32), (unsigned int)(U64x(43300000,00000000)), (unsigned int)((U64x(43300000,00000000))>>32), (unsigned int)(U64x(3ff00000,00000000)), (unsigned int)((U64x(3ff00000,00000000))>>32)); +#line 2190 "vm_x64.dasc" + //| shr eax, 1 + //| jmp <1 + //|2: + //| shr eax, 1; jz >5 + //| movaps xmm1, xmm0 + //|3: // Handle trailing bits. + //| mulsd xmm0, xmm0 + //| shr eax, 1; jz >4 + //| jnc <3 + //| mulsd xmm1, xmm0 + //| jmp <3 + //|4: + //| mulsd xmm0, xmm1 + //|5: + //| ret + //|6: + //| je <5 // x^1 ==> x + //| jb >7 // x^0 ==> 1 + //| neg eax + dasm_put(Dst, 9177); +#line 2209 "vm_x64.dasc" + //| call <1 + //| sseconst_1 xmm1, RD + //| divsd xmm1, xmm0 + //| movaps xmm0, xmm1 + //| ret + //|7: + //| sseconst_1 xmm0, RD + //| ret + //| + //|//----------------------------------------------------------------------- + //|//-- Miscellaneous functions -------------------------------------------- + //|//----------------------------------------------------------------------- + //| + //|// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) + //|->vm_cpuid: + //| mov eax, CARG1d + //| push rbx + //| xor ecx, ecx + //| cpuid + //| mov [rsi], eax + //| mov [rsi+4], ebx + //| mov [rsi+8], ecx + //| mov [rsi+12], edx + //| pop rbx + //| ret + //| + //|//----------------------------------------------------------------------- + //|//-- Assertions --------------------------------------------------------- + //|//----------------------------------------------------------------------- + //| + //|->assert_bad_for_arg_type: + dasm_put(Dst, 9246, (unsigned int)(U64x(3ff00000,00000000)), (unsigned int)((U64x(3ff00000,00000000))>>32), (unsigned int)(U64x(3ff00000,00000000)), (unsigned int)((U64x(3ff00000,00000000))>>32)); +#line 2240 "vm_x64.dasc" +#ifdef LUA_USE_ASSERT + //| int3 + dasm_put(Dst, 9308); +#line 2242 "vm_x64.dasc" +#endif + //| int3 + //| + //|//----------------------------------------------------------------------- + //|//-- FFI helper functions ----------------------------------------------- + //|//----------------------------------------------------------------------- + //| + //|// Handler for callback functions. Callback slot number in ah/al. + //|->vm_ffi_callback: + //|.type CTSTATE, CTState, PC +#define DtF(_V) (int)(ptrdiff_t)&(((CTState *)0)_V) +#line 2252 "vm_x64.dasc" + //| saveregs_ // ebp/rbp already saved. ebp now holds global_State *. + //| lea DISPATCH, [ebp+GG_G2DISP] + //| mov CTSTATE, GL:ebp->ctype_state + //| movzx eax, ax + //| mov CTSTATE->cb.slot, eax + //| mov CTSTATE->cb.gpr[0], CARG1 + //| mov CTSTATE->cb.gpr[1], CARG2 + //| mov CTSTATE->cb.gpr[2], CARG3 + //| mov CTSTATE->cb.gpr[3], CARG4 + //| movsd qword CTSTATE->cb.fpr[0], xmm0 + //| movsd qword CTSTATE->cb.fpr[1], xmm1 + //| movsd qword CTSTATE->cb.fpr[2], xmm2 + //| movsd qword CTSTATE->cb.fpr[3], xmm3 + //| lea rax, [rsp+CFRAME_SIZE] + //| mov CTSTATE->cb.gpr[4], CARG5 + //| mov CTSTATE->cb.gpr[5], CARG6 + //| movsd qword CTSTATE->cb.fpr[4], xmm4 + //| movsd qword CTSTATE->cb.fpr[5], xmm5 + //| movsd qword CTSTATE->cb.fpr[6], xmm6 + //| movsd qword CTSTATE->cb.fpr[7], xmm7 + //| mov CTSTATE->cb.stack, rax + dasm_put(Dst, 9310, GG_G2DISP, Dt2(->ctype_state), DtF(->cb.slot), DtF(->cb.gpr[0]), DtF(->cb.gpr[1]), DtF(->cb.gpr[2]), DtF(->cb.gpr[3]), DtF(->cb.fpr[0]), DtF(->cb.fpr[1]), DtF(->cb.fpr[2]), DtF(->cb.fpr[3]), CFRAME_SIZE, DtF(->cb.gpr[4]), DtF(->cb.gpr[5]), DtF(->cb.fpr[4]), DtF(->cb.fpr[5]), DtF(->cb.fpr[6]), DtF(->cb.fpr[7])); +#line 2273 "vm_x64.dasc" + //| mov CARG2, rsp + //| mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. + //| mov CARG1, CTSTATE + //| call extern lj_ccallback_enter // (CTState *cts, void *cf) + //| // lua_State * returned in eax (RD). + //| set_vmstate INTERP + //| mov BASE, L:RD->base + //| mov RD, L:RD->top + //| sub RD, BASE + //| mov LFUNC:RB, [BASE-16] + //| cleartp LFUNC:RB + //| shr RD, 3 + //| add RD, 1 + //| ins_callt + //| + //|->cont_ffi_callback: // Return from FFI callback. + //| mov L:RA, SAVE_L + //| mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] + //| mov aword CTSTATE->L, L:RA + //| mov L:RA->base, BASE + //| mov L:RA->top, RB + //| mov CARG1, CTSTATE + //| mov CARG2, RC + //| call extern lj_ccallback_leave // (CTState *cts, TValue *o) + //| mov rax, CTSTATE->cb.gpr[0] + //| movsd xmm0, qword CTSTATE->cb.fpr[0] + //| jmp ->vm_leave_unw + //| + //|->vm_ffi_call: // Call C function via FFI. + //| // Caveat: needs special frame unwinding, see below. + //| .type CCSTATE, CCallState, rbx +#define Dt10(_V) (int)(ptrdiff_t)&(((CCallState *)0)_V) +#line 2304 "vm_x64.dasc" + //| push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 + //| + //| // Readjust stack. + //| mov eax, CCSTATE->spadj + //| sub rsp, rax + //| + //| // Copy stack slots. + //| movzx ecx, byte CCSTATE->nsp + //| sub ecx, 1 + //| js >2 + //|1: + //| mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] + dasm_put(Dst, 9420, DtF(->cb.stack), DISPATCH_GL(vmstate), ~LJ_VMST_INTERP, Dt1(->base), Dt1(->top), Dt7(->pc), DISPATCH_GL(ctype_state), DtF(->L), Dt1(->base), Dt1(->top), DtF(->cb.gpr[0]), DtF(->cb.fpr[0]), Dt10(->spadj), Dt10(->nsp)); +#line 2316 "vm_x64.dasc" + //| mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax + //| sub ecx, 1 + //| jns <1 + //|2: + //| + //| movzx eax, byte CCSTATE->nfpr + //| mov CARG1, CCSTATE->gpr[0] + //| mov CARG2, CCSTATE->gpr[1] + //| mov CARG3, CCSTATE->gpr[2] + //| mov CARG4, CCSTATE->gpr[3] + //| mov CARG5, CCSTATE->gpr[4] + //| mov CARG6, CCSTATE->gpr[5] + //| test eax, eax; jz >5 + //| movaps xmm0, CCSTATE->fpr[0] + //| movaps xmm1, CCSTATE->fpr[1] + //| movaps xmm2, CCSTATE->fpr[2] + //| movaps xmm3, CCSTATE->fpr[3] + //| cmp eax, 4; jbe >5 + //| movaps xmm4, CCSTATE->fpr[4] + dasm_put(Dst, 9579, offsetof(CCallState, stack), CCALL_SPS_EXTRA*8, Dt10(->nfpr), Dt10(->gpr[0]), Dt10(->gpr[1]), Dt10(->gpr[2]), Dt10(->gpr[3]), Dt10(->gpr[4]), Dt10(->gpr[5]), Dt10(->fpr[0]), Dt10(->fpr[1]), Dt10(->fpr[2]), Dt10(->fpr[3])); +#line 2335 "vm_x64.dasc" + //| movaps xmm5, CCSTATE->fpr[5] + //| movaps xmm6, CCSTATE->fpr[6] + //| movaps xmm7, CCSTATE->fpr[7] + //|5: + //| + //| call aword CCSTATE->func + //| + //| mov CCSTATE->gpr[0], rax + //| movaps CCSTATE->fpr[0], xmm0 + //| mov CCSTATE->gpr[1], rdx + //| movaps CCSTATE->fpr[1], xmm1 + //| + //| mov rbx, [rbp-8]; leave; ret + //|// Note: vm_ffi_call must be the last function in this object file! + //| + //|//----------------------------------------------------------------------- + dasm_put(Dst, 9660, Dt10(->fpr[4]), Dt10(->fpr[5]), Dt10(->fpr[6]), Dt10(->fpr[7]), Dt10(->func), Dt10(->gpr[0]), Dt10(->fpr[0]), Dt10(->gpr[1]), Dt10(->fpr[1])); +#line 2351 "vm_x64.dasc" +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + //|// Note: aligning all instructions does not pay off. + //|=>defop: + dasm_put(Dst, 9706, defop); +#line 2359 "vm_x64.dasc" + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + //|.macro jmp_comp, lt, ge, le, gt, target + //||switch (op) { + //||case BC_ISLT: + //| lt target + //||break; + //||case BC_ISGE: + //| ge target + //||break; + //||case BC_ISLE: + //| le target + //||break; + //||case BC_ISGT: + //| gt target + //||break; + //||default: break; /* Shut up GCC. */ + //||} + //|.endmacro + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + //| // RA = src1, RD = src2, JMP with RD = target + //| ins_AD + //| mov ITYPE, [BASE+RA*8] + //| mov RB, [BASE+RD*8] + //| mov RA, ITYPE + //| mov RD, RB + //| sar ITYPE, 47 + //| sar RB, 47 + //| cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp + //| cmp RBd, LJ_TISNUM; jae ->vmeta_comp + //|1: + //| movd xmm0, RD + //|2: + //| movd xmm1, RA + //|3: + //| add PC, 4 + //| ucomisd xmm0, xmm1 + //| // Unordered: all of ZF CF PF set, ordered: PF clear. + //| // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. + //| jmp_comp jbe, ja, jb, jae, >1 + dasm_put(Dst, 9708, LJ_TISNUM, LJ_TISNUM); + switch (op) { + case BC_ISLT: + dasm_put(Dst, 8984); + break; + case BC_ISGE: + dasm_put(Dst, 9774); + break; + case BC_ISLE: + dasm_put(Dst, 2946); + break; + case BC_ISGT: + dasm_put(Dst, 9779); + break; + default: break; /* Shut up GCC. */ + } +#line 2405 "vm_x64.dasc" + //| movzx RDd, PC_RD + //| branchPC RD + //|1: + //| ins_next + dasm_put(Dst, 9784, -BCBIAS_J*4); +#line 2409 "vm_x64.dasc" + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + //| ins_AD // RA = src1, RD = src2, JMP with RD = target + //| mov RB, [BASE+RD*8] + //| mov ITYPE, [BASE+RA*8] + //| add PC, 4 + //| mov RD, RB + //| mov RA, ITYPE + //| sar RB, 47 + //| sar ITYPE, 47 + //| cmp RBd, LJ_TISNUM; jae >5 + //| cmp ITYPEd, LJ_TISNUM; jae >5 + //| movd xmm1, RD + //|1: + //| movd xmm0, RA + //|2: + //| ucomisd xmm0, xmm1 + //|4: + dasm_put(Dst, 9819, LJ_TISNUM, LJ_TISNUM); +#line 2429 "vm_x64.dasc" + iseqne_fp: + if (vk) { + //| jp >2 // Unordered means not equal. + //| jne >2 + dasm_put(Dst, 9885); +#line 2433 "vm_x64.dasc" + } else { + //| jp >2 // Unordered means not equal. + //| je >1 + dasm_put(Dst, 9894); +#line 2436 "vm_x64.dasc" + } + iseqne_end: + if (vk) { + //|1: // EQ: Branch to the target. + //| movzx RDd, PC_RD + //| branchPC RD + //|2: // NE: Fallthrough to next instruction. + dasm_put(Dst, 9903, -BCBIAS_J*4); +#line 2443 "vm_x64.dasc" + } else { + //|2: // NE: Branch to the target. + //| movzx RDd, PC_RD + //| branchPC RD + //|1: // EQ: Fallthrough to next instruction. + dasm_put(Dst, 9919, -BCBIAS_J*4); +#line 2448 "vm_x64.dasc" + } + //| ins_next + //| + dasm_put(Dst, 9797); +#line 2451 "vm_x64.dasc" + if (op == BC_ISEQV || op == BC_ISNEV) { + //|5: // Either or both types are not numbers. + //| cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd + //| cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd + //| cmp RA, RD + //| je <1 // Same GCobjs or pvalues? + //| cmp RBd, ITYPEd + //| jne <2 // Not the same type? + //| cmp RBd, LJ_TISTABUD + //| ja <2 // Different objects and not table/ud? + //| + //| // Different tables or userdatas. Need to check __eq metamethod. + //| // Field metatable must be at same offset for GCtab and GCudata! + //| cleartp TAB:RA + //| mov TAB:RB, TAB:RA->metatable + //| test TAB:RB, TAB:RB + //| jz <2 // No metatable? + //| test byte TAB:RB->nomm, 1<metatable), Dt6(->nomm), 1<vmeta_equal // Handle __eq metamethod. + dasm_put(Dst, 10018); +#line 2476 "vm_x64.dasc" + } else { + //|3: + //| cmp ITYPEd, LJ_TCDATA + //| jne <2 + //| jmp ->vmeta_equal_cd + dasm_put(Dst, 10023, LJ_TCDATA); +#line 2481 "vm_x64.dasc" + } + break; + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + //| ins_AND // RA = src, RD = str const, JMP with RD = target + //| mov RB, [BASE+RA*8] + //| add PC, 4 + //| checkstr RB, >3 + //| cmp RB, [KBASE+RD*8] + dasm_put(Dst, 10039, LJ_TSTR); +#line 2490 "vm_x64.dasc" + if (vk) { + //| jne >2 + dasm_put(Dst, 9172); +#line 2492 "vm_x64.dasc" + } else { + //| je >1 + dasm_put(Dst, 4107); +#line 2494 "vm_x64.dasc" + } + goto iseqne_end; + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + //| ins_AD // RA = src, RD = num const, JMP with RD = target + //| mov RB, [BASE+RA*8] + //| add PC, 4 + //| checknum RB, >3 + //|1: + //| movsd xmm0, qword [KBASE+RD*8] + //|2: + //| ucomisd xmm0, qword [BASE+RA*8] + //|4: + dasm_put(Dst, 10083, LJ_TISNUM); +#line 2507 "vm_x64.dasc" + goto iseqne_fp; + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + //| ins_AND // RA = src, RD = primitive type (~), JMP with RD = target + //| mov RB, [BASE+RA*8] + //| sar RB, 47 + //| add PC, 4 + //| cmp RBd, RDd + dasm_put(Dst, 10128); +#line 2515 "vm_x64.dasc" + if (vk) { + //| jne >3 + //| movzx RDd, PC_RD + //| branchPC RD + //|2: + //| ins_next + //|3: + //| cmp RBd, LJ_TCDATA; jne <2 + //| jmp ->vmeta_equal_cd + dasm_put(Dst, 10148, -BCBIAS_J*4, LJ_TCDATA); +#line 2524 "vm_x64.dasc" + } else { + //| je >2 + //| cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd + //| movzx RDd, PC_RD + //| branchPC RD + //|2: + //| ins_next + dasm_put(Dst, 10201, LJ_TCDATA, -BCBIAS_J*4); +#line 2531 "vm_x64.dasc" + } + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + //| ins_AD // RA = dst or unused, RD = src, JMP with RD = target + //| mov ITYPE, [BASE+RD*8] + //| add PC, 4 + dasm_put(Dst, 10248); +#line 2540 "vm_x64.dasc" + if (op == BC_ISTC || op == BC_ISFC) { + //| mov RB, ITYPE + dasm_put(Dst, 10257); +#line 2542 "vm_x64.dasc" + } + //| sar ITYPE, 47 + //| cmp ITYPEd, LJ_TISTRUECOND + dasm_put(Dst, 4503, LJ_TISTRUECOND); +#line 2545 "vm_x64.dasc" + if (op == BC_IST || op == BC_ISTC) { + //| jae >1 + dasm_put(Dst, 9779); +#line 2547 "vm_x64.dasc" + } else { + //| jb >1 + dasm_put(Dst, 2946); +#line 2549 "vm_x64.dasc" + } + if (op == BC_ISTC || op == BC_ISFC) { + //| mov [BASE+RA*8], RB + dasm_put(Dst, 10261); +#line 2552 "vm_x64.dasc" + } + //| movzx RDd, PC_RD + //| branchPC RD + //|1: // Fallthrough to the next instruction. + //| ins_next + dasm_put(Dst, 9784, -BCBIAS_J*4); +#line 2557 "vm_x64.dasc" + break; + + case BC_ISTYPE: + //| ins_AD // RA = src, RD = -type + //| mov RB, [BASE+RA*8] + //| sar RB, 47 + //| add RBd, RDd + //| jne ->vmeta_istype + //| ins_next + dasm_put(Dst, 10266); +#line 2566 "vm_x64.dasc" + break; + case BC_ISNUM: + //| ins_AD // RA = src, RD = -(TISNUM-1) + //| checknumtp [BASE+RA*8], ->vmeta_istype + //| ins_next + dasm_put(Dst, 10303, LJ_TISNUM); +#line 2571 "vm_x64.dasc" + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_MOV: + //| ins_AD // RA = dst, RD = src + //| mov RB, [BASE+RD*8] + //| mov [BASE+RA*8], RB + //| ins_next_ + dasm_put(Dst, 10343); +#line 2580 "vm_x64.dasc" + break; + case BC_NOT: + //| ins_AD // RA = dst, RD = src + //| mov RB, [BASE+RD*8] + //| sar RB, 47 + //| mov RCd, 2 + //| cmp RB, LJ_TISTRUECOND + //| sbb RCd, 0 + //| shl RC, 47 + //| not RC + //| mov [BASE+RA*8], RC + //| ins_next + dasm_put(Dst, 10373, LJ_TISTRUECOND); +#line 2592 "vm_x64.dasc" + break; + case BC_UNM: + //| ins_AD // RA = dst, RD = src + //| mov RB, [BASE+RD*8] + //| checknum RB, ->vmeta_unm + //| mov64 RD, U64x(80000000,00000000) + //| xor RB, RD + //| mov [BASE+RA*8], RB + //| ins_next + dasm_put(Dst, 10429, LJ_TISNUM, (unsigned int)(U64x(80000000,00000000)), (unsigned int)((U64x(80000000,00000000))>>32)); +#line 2601 "vm_x64.dasc" + break; + case BC_LEN: + //| ins_AD // RA = dst, RD = src + //| mov RD, [BASE+RD*8] + //| checkstr RD, >2 + //| xorps xmm0, xmm0 + //| cvtsi2sd xmm0, dword STR:RD->len + //|1: + //| movsd qword [BASE+RA*8], xmm0 + //| ins_next + //|2: + //| cmp ITYPEd, LJ_TTAB; jne ->vmeta_len + //| mov TAB:CARG1, TAB:RD + dasm_put(Dst, 10484, LJ_TSTR, Dt5(->len), LJ_TTAB); +#line 2614 "vm_x64.dasc" +#if LJ_52 + //| mov TAB:RB, TAB:RD->metatable + //| cmp TAB:RB, 0 + //| jnz >9 + //|3: + dasm_put(Dst, 10566, Dt6(->metatable)); +#line 2619 "vm_x64.dasc" +#endif + //|->BC_LEN_Z: + //| mov RB, BASE // Save BASE. + //| call extern lj_tab_len // (GCtab *t) + //| // Length of table returned in eax (RD). + //| cvtsi2sd xmm0, RDd + //| mov BASE, RB // Restore BASE. + //| movzx RAd, PC_RA + //| jmp <1 + dasm_put(Dst, 10582); +#line 2628 "vm_x64.dasc" +#if LJ_52 + //|9: // Check for __len. + //| test byte TAB:RB->nomm, 1<vmeta_len // 'no __len' flag NOT set: check. + dasm_put(Dst, 10610, Dt6(->nomm), 1<vmeta_arith_vn + //| movsd xmm0, qword [BASE+RB*8] + //| sseins ssereg, qword [KBASE+RC*8] + //|| break; + //||case 1: + //| checknumtp [BASE+RB*8], ->vmeta_arith_nv + //| movsd xmm0, qword [KBASE+RC*8] + //| sseins ssereg, qword [BASE+RB*8] + //|| break; + //||default: + //| checknumtp [BASE+RB*8], ->vmeta_arith_vv + //| checknumtp [BASE+RC*8], ->vmeta_arith_vv + //| movsd xmm0, qword [BASE+RB*8] + //| sseins ssereg, qword [BASE+RC*8] + //|| break; + //||} + //|.endmacro + //| + //|.macro ins_arithdn, intins + //| ins_ABC + //||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + //||switch (vk) { + //||case 0: + //| mov RB, [BASE+RB*8] + //| mov RC, [KBASE+RC*8] + //| checkint RB, ->vmeta_arith_vno + //| checkint RC, ->vmeta_arith_vno + //| intins RBd, RCd; jo ->vmeta_arith_vno + //|| break; + //||case 1: + //| mov RB, [BASE+RB*8] + //| mov RC, [KBASE+RC*8] + //| checkint RB, ->vmeta_arith_nvo + //| checkint RC, ->vmeta_arith_nvo + //| intins RCd, RBd; jo ->vmeta_arith_nvo + //|| break; + //||default: + //| mov RB, [BASE+RB*8] + //| mov RC, [BASE+RC*8] + //| checkint RB, ->vmeta_arith_vvo + //| checkint RC, ->vmeta_arith_vvo + //| intins RBd, RCd; jo ->vmeta_arith_vvo + //|| break; + //||} + //||if (vk == 1) { + //| setint RC + //| mov [BASE+RA*8], RC + //||} else { + //| setint RB + //| mov [BASE+RA*8], RB + //||} + //| ins_next + //|.endmacro + //| + //|.macro ins_arithpost + //| movsd qword [BASE+RA*8], xmm0 + //|.endmacro + //| + //|.macro ins_arith, sseins + //| ins_arithpre sseins, xmm0 + //| ins_arithpost + //| ins_next + //|.endmacro + //| + //|.macro ins_arith, intins, sseins + //| ins_arith, sseins + //|.endmacro + + //| // RA = dst, RB = src1 or num const, RC = src2 or num const + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + //| ins_arith add, addsd + dasm_put(Dst, 10626); + vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + switch (vk) { + case 0: + dasm_put(Dst, 10634, LJ_TISNUM); + break; + case 1: + dasm_put(Dst, 10668, LJ_TISNUM); + break; + default: + dasm_put(Dst, 10702, LJ_TISNUM, LJ_TISNUM); + break; + } + dasm_put(Dst, 10753); +#line 2714 "vm_x64.dasc" + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + //| ins_arith sub, subsd + dasm_put(Dst, 10626); + vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + switch (vk) { + case 0: + dasm_put(Dst, 10781, LJ_TISNUM); + break; + case 1: + dasm_put(Dst, 10815, LJ_TISNUM); + break; + default: + dasm_put(Dst, 10849, LJ_TISNUM, LJ_TISNUM); + break; + } + dasm_put(Dst, 10753); +#line 2717 "vm_x64.dasc" + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + //| ins_arith imul, mulsd + dasm_put(Dst, 10626); + vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + switch (vk) { + case 0: + dasm_put(Dst, 10900, LJ_TISNUM); + break; + case 1: + dasm_put(Dst, 10934, LJ_TISNUM); + break; + default: + dasm_put(Dst, 10968, LJ_TISNUM, LJ_TISNUM); + break; + } + dasm_put(Dst, 10753); +#line 2720 "vm_x64.dasc" + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + //| ins_arith divsd + dasm_put(Dst, 10626); + vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + switch (vk) { + case 0: + dasm_put(Dst, 11019, LJ_TISNUM); + break; + case 1: + dasm_put(Dst, 11053, LJ_TISNUM); + break; + default: + dasm_put(Dst, 11087, LJ_TISNUM, LJ_TISNUM); + break; + } + dasm_put(Dst, 10753); +#line 2723 "vm_x64.dasc" + break; + case BC_MODVN: + //| ins_arithpre movsd, xmm1 + dasm_put(Dst, 10626); + vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + switch (vk) { + case 0: + dasm_put(Dst, 11138, LJ_TISNUM); + break; + case 1: + dasm_put(Dst, 11172, LJ_TISNUM); + break; + default: + dasm_put(Dst, 11206, LJ_TISNUM, LJ_TISNUM); + break; + } +#line 2726 "vm_x64.dasc" + //|->BC_MODVN_Z: + //| call ->vm_mod + //| ins_arithpost + //| ins_next + dasm_put(Dst, 11257); +#line 2730 "vm_x64.dasc" + break; + case BC_MODNV: case BC_MODVV: + //| ins_arithpre movsd, xmm1 + dasm_put(Dst, 10626); + vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + switch (vk) { + case 0: + dasm_put(Dst, 11138, LJ_TISNUM); + break; + case 1: + dasm_put(Dst, 11172, LJ_TISNUM); + break; + default: + dasm_put(Dst, 11206, LJ_TISNUM, LJ_TISNUM); + break; + } +#line 2733 "vm_x64.dasc" + //| jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. + dasm_put(Dst, 11290); +#line 2734 "vm_x64.dasc" + break; + case BC_POW: + //| ins_arithpre movsd, xmm1 + dasm_put(Dst, 10626); + vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + switch (vk) { + case 0: + dasm_put(Dst, 11138, LJ_TISNUM); + break; + case 1: + dasm_put(Dst, 11172, LJ_TISNUM); + break; + default: + dasm_put(Dst, 11206, LJ_TISNUM, LJ_TISNUM); + break; + } +#line 2737 "vm_x64.dasc" + //| mov RB, BASE + //| call extern pow + //| movzx RAd, PC_RA + //| mov BASE, RB + //| ins_arithpost + //| ins_next + dasm_put(Dst, 11295); +#line 2743 "vm_x64.dasc" + break; + + case BC_CAT: + //| ins_ABC // RA = dst, RB = src_start, RC = src_end + //| mov L:CARG1, SAVE_L + //| mov L:CARG1->base, BASE + //| lea CARG2, [BASE+RC*8] + //| mov CARG3d, RCd + //| sub CARG3d, RBd + //|->BC_CAT_Z: + //| mov L:RB, L:CARG1 + //| mov SAVE_PC, PC + //| call extern lj_meta_cat // (lua_State *L, TValue *top, int left) + //| // NULL (finished) or TValue * (metamethod) returned in eax (RC). + //| mov BASE, L:RB->base + //| test RC, RC + //| jnz ->vmeta_binop + //| movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB]. + //| movzx RAd, PC_RA + //| mov RC, [BASE+RB*8] + //| mov [BASE+RA*8], RC + //| ins_next + dasm_put(Dst, 11339, Dt1(->base), Dt1(->base)); +#line 2765 "vm_x64.dasc" + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + //| ins_AND // RA = dst, RD = str const (~) + //| mov RD, [KBASE+RD*8] + //| settp RD, LJ_TSTR + //| mov [BASE+RA*8], RD + //| ins_next + dasm_put(Dst, 11431, (unsigned int)(((uint64_t)LJ_TSTR<<47)), (unsigned int)((((uint64_t)LJ_TSTR<<47))>>32)); +#line 2775 "vm_x64.dasc" + break; + case BC_KCDATA: + //| ins_AND // RA = dst, RD = cdata const (~) + //| mov RD, [KBASE+RD*8] + //| settp RD, LJ_TCDATA + //| mov [BASE+RA*8], RD + //| ins_next + dasm_put(Dst, 11431, (unsigned int)(((uint64_t)LJ_TCDATA<<47)), (unsigned int)((((uint64_t)LJ_TCDATA<<47))>>32)); +#line 2782 "vm_x64.dasc" + break; + case BC_KSHORT: + //| ins_AD // RA = dst, RD = signed int16 literal + //| movsx RDd, RDW // Sign-extend literal. + //| cvtsi2sd xmm0, RDd + //| movsd qword [BASE+RA*8], xmm0 + //| ins_next + dasm_put(Dst, 11472); +#line 2789 "vm_x64.dasc" + break; + case BC_KNUM: + //| ins_AD // RA = dst, RD = num const + //| movsd xmm0, qword [KBASE+RD*8] + //| movsd qword [BASE+RA*8], xmm0 + //| ins_next + dasm_put(Dst, 11508); +#line 2795 "vm_x64.dasc" + break; + case BC_KPRI: + //| ins_AD // RA = dst, RD = primitive type (~) + //| shl RD, 47 + //| not RD + //| mov [BASE+RA*8], RD + //| ins_next + dasm_put(Dst, 10395); +#line 2802 "vm_x64.dasc" + break; + case BC_KNIL: + //| ins_AD // RA = dst_start, RD = dst_end + //| lea RA, [BASE+RA*8+8] + //| lea RD, [BASE+RD*8] + //| mov RB, LJ_TNIL + //| mov [RA-8], RB // Sets minimum 2 slots. + //|1: + //| mov [RA], RB + //| add RA, 8 + //| cmp RA, RD + //| jbe <1 + //| ins_next + dasm_put(Dst, 11543, LJ_TNIL); +#line 2815 "vm_x64.dasc" + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + //| ins_AD // RA = dst, RD = upvalue # + //| mov LFUNC:RB, [BASE-16] + //| cleartp LFUNC:RB + //| mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)] + //| mov RB, UPVAL:RB->v + //| mov RD, [RB] + //| mov [BASE+RA*8], RD + //| ins_next + dasm_put(Dst, 11599, offsetof(GCfuncL, uvptr), DtA(->v)); +#line 2828 "vm_x64.dasc" + break; + case BC_USETV: +#define TV2MARKOFS \ + ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) + //| ins_AD // RA = upvalue #, RD = src + //| mov LFUNC:RB, [BASE-16] + //| cleartp LFUNC:RB + //| mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] + //| cmp byte UPVAL:RB->closed, 0 + //| mov RB, UPVAL:RB->v + //| mov RA, [BASE+RD*8] + //| mov [RB], RA + //| jz >1 + //| // Check barrier for closed upvalue. + //| test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) + //| jnz >2 + //|1: + //| ins_next + //| + //|2: // Upvalue is black. Check if new value is collectable and white. + //| mov RD, RA + //| sar RD, 47 + //| sub RDd, LJ_TISGCV + //| cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) + //| jbe <1 + //| cleartp GCOBJ:RA + //| test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) + //| jz <1 + //| // Crossed a write barrier. Move the barrier forward. + //| mov CARG2, RB + dasm_put(Dst, 11653, offsetof(GCfuncL, uvptr), DtA(->closed), DtA(->v), TV2MARKOFS, LJ_GC_BLACK, LJ_TISGCV, LJ_TNUMX - LJ_TISGCV, Dt4(->gch.marked), LJ_GC_WHITES); +#line 2858 "vm_x64.dasc" + //| mov RB, BASE // Save BASE. + //| lea GL:CARG1, [DISPATCH+GG_DISP2G] + //| call extern lj_gc_barrieruv // (global_State *g, TValue *tv) + //| mov BASE, RB // Restore BASE. + //| jmp <1 + dasm_put(Dst, 11765, GG_DISP2G); +#line 2863 "vm_x64.dasc" + break; +#undef TV2MARKOFS + case BC_USETS: + //| ins_AND // RA = upvalue #, RD = str const (~) + //| mov LFUNC:RB, [BASE-16] + //| cleartp LFUNC:RB + //| mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] + //| mov STR:RA, [KBASE+RD*8] + //| mov RD, UPVAL:RB->v + //| settp STR:ITYPE, STR:RA, LJ_TSTR + //| mov [RD], STR:ITYPE + //| test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) + //| jnz >2 + //|1: + //| ins_next + //| + //|2: // Check if string is white and ensure upvalue is closed. + //| test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) + //| jz <1 + //| cmp byte UPVAL:RB->closed, 0 + //| jz <1 + //| // Crossed a write barrier. Move the barrier forward. + //| mov RB, BASE // Save BASE (CARG2 == BASE). + //| mov CARG2, RD + //| lea GL:CARG1, [DISPATCH+GG_DISP2G] + //| call extern lj_gc_barrieruv // (global_State *g, TValue *tv) + //| mov BASE, RB // Restore BASE. + //| jmp <1 + dasm_put(Dst, 11789, offsetof(GCfuncL, uvptr), DtA(->v), (unsigned int)(((uint64_t)LJ_TSTR<<47)), (unsigned int)((((uint64_t)LJ_TSTR<<47))>>32), DtA(->marked), LJ_GC_BLACK, Dt4(->gch.marked), LJ_GC_WHITES, DtA(->closed), GG_DISP2G); +#line 2891 "vm_x64.dasc" + break; + case BC_USETN: + //| ins_AD // RA = upvalue #, RD = num const + //| mov LFUNC:RB, [BASE-16] + //| cleartp LFUNC:RB + //| movsd xmm0, qword [KBASE+RD*8] + //| mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] + //| mov RA, UPVAL:RB->v + //| movsd qword [RA], xmm0 + //| ins_next + dasm_put(Dst, 11905, offsetof(GCfuncL, uvptr), DtA(->v)); +#line 2901 "vm_x64.dasc" + break; + case BC_USETP: + //| ins_AD // RA = upvalue #, RD = primitive type (~) + //| mov LFUNC:RB, [BASE-16] + //| cleartp LFUNC:RB + //| mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] + //| shl RD, 47 + //| not RD + //| mov RA, UPVAL:RB->v + //| mov [RA], RD + //| ins_next + dasm_put(Dst, 11963, offsetof(GCfuncL, uvptr), DtA(->v)); +#line 2912 "vm_x64.dasc" + break; + case BC_UCLO: + //| ins_AD // RA = level, RD = target + //| branchPC RD // Do this first to free RD. + //| mov L:RB, SAVE_L + //| cmp aword L:RB->openupval, 0 + //| je >1 + //| mov L:RB->base, BASE + //| lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE + //| mov L:CARG1, L:RB // Caveat: CARG1 == RA + //| call extern lj_func_closeuv // (lua_State *L, TValue *level) + //| mov BASE, L:RB->base + //|1: + //| ins_next + dasm_put(Dst, 12020, -BCBIAS_J*4, Dt1(->openupval), Dt1(->base), Dt1(->base)); +#line 2926 "vm_x64.dasc" + break; + + case BC_FNEW: + //| ins_AND // RA = dst, RD = proto const (~) (holding function prototype) + //| mov L:RB, SAVE_L + //| mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE. + //| mov CARG3, [BASE-16] + //| cleartp CARG3 + //| mov CARG2, [KBASE+RD*8] // Fetch GCproto *. + //| mov CARG1, L:RB + //| mov SAVE_PC, PC + //| // (lua_State *L, GCproto *pt, GCfuncL *parent) + //| call extern lj_func_newL_gc + //| // GCfuncL * returned in eax (RC). + //| mov BASE, L:RB->base + //| movzx RAd, PC_RA + //| settp LFUNC:RC, LJ_TFUNC + //| mov [BASE+RA*8], LFUNC:RC + //| ins_next + dasm_put(Dst, 12084, Dt1(->base), Dt1(->base), (unsigned int)(((uint64_t)LJ_TFUNC<<47)), (unsigned int)((((uint64_t)LJ_TFUNC<<47))>>32)); +#line 2945 "vm_x64.dasc" + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_TNEW: + //| ins_AD // RA = dst, RD = hbits|asize + //| mov L:RB, SAVE_L + //| mov L:RB->base, BASE + //| mov RA, [DISPATCH+DISPATCH_GL(gc.total)] + //| cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] + //| mov SAVE_PC, PC + //| jae >5 + //|1: + //| mov CARG3d, RDd + //| and RDd, 0x7ff + //| shr CARG3d, 11 + //| cmp RDd, 0x7ff + //| je >3 + //|2: + //| mov L:CARG1, L:RB + //| mov CARG2d, RDd + //| call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) + //| // Table * returned in eax (RC). + //| mov BASE, L:RB->base + //| movzx RAd, PC_RA + //| settp TAB:RC, LJ_TTAB + //| mov [BASE+RA*8], TAB:RC + //| ins_next + //|3: // Turn 0x7ff into 0x801. + //| mov RDd, 0x801 + //| jmp <2 + //|5: + //| mov L:CARG1, L:RB + //| call extern lj_gc_step_fixtop // (lua_State *L) + //| movzx RDd, PC_RD + //| jmp <1 + dasm_put(Dst, 12170, Dt1(->base), DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), Dt1(->base), (unsigned int)(((uint64_t)LJ_TTAB<<47)), (unsigned int)((((uint64_t)LJ_TTAB<<47))>>32)); +#line 2981 "vm_x64.dasc" + break; + case BC_TDUP: + //| ins_AND // RA = dst, RD = table const (~) (holding template table) + //| mov L:RB, SAVE_L + //| mov RA, [DISPATCH+DISPATCH_GL(gc.total)] + //| mov SAVE_PC, PC + //| cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] + //| mov L:RB->base, BASE + //| jae >3 + //|2: + //| mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE + //| mov L:CARG1, L:RB // Caveat: CARG1 == RA + //| call extern lj_tab_dup // (lua_State *L, Table *kt) + //| // Table * returned in eax (RC). + //| mov BASE, L:RB->base + //| movzx RAd, PC_RA + //| settp TAB:RC, LJ_TTAB + //| mov [BASE+RA*8], TAB:RC + //| ins_next + //|3: + //| mov L:CARG1, L:RB + //| call extern lj_gc_step_fixtop // (lua_State *L) + //| movzx RDd, PC_RD // Need to reload RD. + //| not RD + //| jmp <2 + dasm_put(Dst, 12304, DISPATCH_GL(gc.total), DISPATCH_GL(gc.threshold), Dt1(->base), Dt1(->base), (unsigned int)(((uint64_t)LJ_TTAB<<47)), (unsigned int)((((uint64_t)LJ_TTAB<<47))>>32)); +#line 3006 "vm_x64.dasc" + break; + + case BC_GGET: + //| ins_AND // RA = dst, RD = str const (~) + //| mov LFUNC:RB, [BASE-16] + //| cleartp LFUNC:RB + //| mov TAB:RB, LFUNC:RB->env + //| mov STR:RC, [KBASE+RD*8] + //| jmp ->BC_TGETS_Z + dasm_put(Dst, 12413, Dt7(->env)); +#line 3015 "vm_x64.dasc" + break; + case BC_GSET: + //| ins_AND // RA = src, RD = str const (~) + //| mov LFUNC:RB, [BASE-16] + //| cleartp LFUNC:RB + //| mov TAB:RB, LFUNC:RB->env + //| mov STR:RC, [KBASE+RD*8] + //| jmp ->BC_TSETS_Z + dasm_put(Dst, 12444, Dt7(->env)); +#line 3023 "vm_x64.dasc" + break; + + case BC_TGETV: + //| ins_ABC // RA = dst, RB = table, RC = key + //| mov TAB:RB, [BASE+RB*8] + //| mov RC, [BASE+RC*8] + //| checktab TAB:RB, ->vmeta_tgetv + //| + //| // Integer key? + //| // Convert number to int and back and compare. + //| checknum RC, >5 + //| movd xmm0, RC + //| cvttsd2si RCd, xmm0 + //| cvtsi2sd xmm1, RCd + //| ucomisd xmm0, xmm1 + //| jne ->vmeta_tgetv // Generic numeric key? Use fallback. + //| cmp RCd, TAB:RB->asize // Takes care of unordered, too. + //| jae ->vmeta_tgetv // Not in array part? Use fallback. + //| shl RCd, 3 + //| add RC, TAB:RB->array + //| // Get array slot. + //| mov ITYPE, [RC] + //| cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath. + //| je >2 + //|1: + //| mov [BASE+RA*8], ITYPE + //| ins_next + //| + //|2: // Check for __index if table value is nil. + //| mov TAB:TMPR, TAB:RB->metatable + //| test TAB:TMPR, TAB:TMPR + //| jz <1 + //| test byte TAB:TMPR->nomm, 1<asize), Dt6(->array), LJ_TNIL, Dt6(->metatable)); +#line 3056 "vm_x64.dasc" + //| jz ->vmeta_tgetv // 'no __index' flag NOT set: check. + //| jmp <1 + //| + //|5: // String key? + //| cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv + //| cleartp STR:RC + //| jmp ->BC_TGETS_Z + dasm_put(Dst, 12625, Dt6(->nomm), 1<vmeta_tgets + //|->BC_TGETS_Z: // RB = GCtab *, RC = GCstr * + //| mov TMPRd, TAB:RB->hmask + //| and TMPRd, STR:RC->hash + //| imul TMPRd, #NODE + //| add NODE:TMPR, TAB:RB->node + //| settp ITYPE, STR:RC, LJ_TSTR + //|1: + //| cmp NODE:TMPR->key, ITYPE + //| jne >4 + //| // Get node value. + //| mov ITYPE, NODE:TMPR->val + //| cmp ITYPE, LJ_TNIL + //| je >5 // Key found, but nil value? + //|2: + //| mov [BASE+RA*8], ITYPE + dasm_put(Dst, 12663, LJ_TTAB, Dt6(->hmask), Dt5(->hash), sizeof(Node), Dt6(->node), (unsigned int)(((uint64_t)LJ_TSTR<<47)), (unsigned int)((((uint64_t)LJ_TSTR<<47))>>32), DtB(->key), DtB(->val), LJ_TNIL); +#line 3085 "vm_x64.dasc" + //| ins_next + //| + //|4: // Follow hash chain. + //| mov NODE:TMPR, NODE:TMPR->next + //| test NODE:TMPR, NODE:TMPR + //| jnz <1 + //| // End of hash chain: key not found, nil result. + //| mov ITYPE, LJ_TNIL + //| + //|5: // Check for __index if table value is nil. + //| mov TAB:TMPR, TAB:RB->metatable + //| test TAB:TMPR, TAB:TMPR + //| jz <2 // No metatable: done. + //| test byte TAB:TMPR->nomm, 1<vmeta_tgets // Caveat: preserve STR:RC. + dasm_put(Dst, 12761, DtB(->next), LJ_TNIL, Dt6(->metatable), Dt6(->nomm), 1<vmeta_tgetb + //| cmp RCd, TAB:RB->asize + //| jae ->vmeta_tgetb + //| shl RCd, 3 + //| add RC, TAB:RB->array + //| // Get array slot. + //| mov ITYPE, [RC] + //| cmp ITYPE, LJ_TNIL + //| je >2 + //|1: + //| mov [BASE+RA*8], ITYPE + //| ins_next + //| + //|2: // Check for __index if table value is nil. + //| mov TAB:TMPR, TAB:RB->metatable + //| test TAB:TMPR, TAB:TMPR + //| jz <1 + //| test byte TAB:TMPR->nomm, 1<vmeta_tgetb // 'no __index' flag NOT set: check. + //| jmp <1 + dasm_put(Dst, 12831, LJ_TTAB, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->metatable), Dt6(->nomm), 1<asize + //| jae ->vmeta_tgetr // Not in array part? Use fallback. + //| shl RCd, 3 + //| add RC, TAB:RB->array + //| // Get array slot. + //|->BC_TGETR_Z: + //| mov ITYPE, [RC] + //|->BC_TGETR2_Z: + //| mov [BASE+RA*8], ITYPE + //| ins_next + dasm_put(Dst, 12947, Dt6(->asize), Dt6(->array)); +#line 3141 "vm_x64.dasc" + break; + + case BC_TSETV: + //| ins_ABC // RA = src, RB = table, RC = key + //| mov TAB:RB, [BASE+RB*8] + //| mov RC, [BASE+RC*8] + //| checktab TAB:RB, ->vmeta_tsetv + //| + //| // Integer key? + //| // Convert number to int and back and compare. + //| checknum RC, >5 + //| movd xmm0, RC + //| cvttsd2si RCd, xmm0 + //| cvtsi2sd xmm1, RCd + //| ucomisd xmm0, xmm1 + //| jne ->vmeta_tsetv // Generic numeric key? Use fallback. + //| cmp RCd, TAB:RB->asize // Takes care of unordered, too. + //| jae ->vmeta_tsetv + //| shl RCd, 3 + //| add RC, TAB:RB->array + //| cmp aword [RC], LJ_TNIL + //| je >3 // Previous value is nil? + //|1: + //| test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + //| jnz >7 + //|2: // Set array slot. + //| mov RB, [BASE+RA*8] + dasm_put(Dst, 13021, LJ_TTAB, LJ_TISNUM, Dt6(->asize), Dt6(->array), LJ_TNIL, Dt6(->marked), LJ_GC_BLACK); +#line 3168 "vm_x64.dasc" + //| mov [RC], RB + //| ins_next + //| + //|3: // Check for __newindex if previous value is nil. + //| mov TAB:TMPR, TAB:RB->metatable + //| test TAB:TMPR, TAB:TMPR + //| jz <1 + //| test byte TAB:TMPR->nomm, 1<vmeta_tsetv // 'no __newindex' flag NOT set: check. + //| jmp <1 + //| + //|5: // String key? + //| cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv + //| cleartp STR:RC + //| jmp ->BC_TSETS_Z + //| + //|7: // Possible table write barrier for the value. Skip valiswhite check. + //| barrierback TAB:RB, TMPR + dasm_put(Dst, 13140, Dt6(->metatable), Dt6(->nomm), 1<marked), (uint8_t)~LJ_GC_BLACK); +#line 3186 "vm_x64.dasc" + //| jmp <2 + dasm_put(Dst, 13225, DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); +#line 3187 "vm_x64.dasc" + break; + case BC_TSETS: + //| ins_ABC // RA = src, RB = table, RC = str const (~) + //| mov TAB:RB, [BASE+RB*8] + //| not RC + //| mov STR:RC, [KBASE+RC*8] + //| checktab TAB:RB, ->vmeta_tsets + //|->BC_TSETS_Z: // RB = GCtab *, RC = GCstr * + //| mov TMPRd, TAB:RB->hmask + //| and TMPRd, STR:RC->hash + //| imul TMPRd, #NODE + //| mov byte TAB:RB->nomm, 0 // Clear metamethod cache. + //| add NODE:TMPR, TAB:RB->node + //| settp ITYPE, STR:RC, LJ_TSTR + //|1: + //| cmp NODE:TMPR->key, ITYPE + //| jne >5 + //| // Ok, key found. Assumes: offsetof(Node, val) == 0 + //| cmp aword [TMPR], LJ_TNIL + //| je >4 // Previous value is nil? + //|2: + //| test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + dasm_put(Dst, 13242, LJ_TTAB, Dt6(->hmask), Dt5(->hash), sizeof(Node), Dt6(->nomm), Dt6(->node), (unsigned int)(((uint64_t)LJ_TSTR<<47)), (unsigned int)((((uint64_t)LJ_TSTR<<47))>>32), DtB(->key), LJ_TNIL); +#line 3209 "vm_x64.dasc" + //| jnz >7 + //|3: // Set node value. + //| mov ITYPE, [BASE+RA*8] + //| mov [TMPR], ITYPE + //| ins_next + //| + //|4: // Check for __newindex if previous value is nil. + //| mov TAB:ITYPE, TAB:RB->metatable + //| test TAB:ITYPE, TAB:ITYPE + //| jz <2 + //| test byte TAB:ITYPE->nomm, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. + //| jmp <2 + //| + //|5: // Follow hash chain. + //| mov NODE:TMPR, NODE:TMPR->next + //| test NODE:TMPR, NODE:TMPR + //| jnz <1 + //| // End of hash chain: key not found, add a new one. + //| + //| // But check for __newindex first. + //| mov TAB:TMPR, TAB:RB->metatable + dasm_put(Dst, 13339, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable), Dt6(->nomm), 1<next)); +#line 3231 "vm_x64.dasc" + //| test TAB:TMPR, TAB:TMPR + //| jz >6 // No metatable: continue. + //| test byte TAB:TMPR->nomm, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. + //|6: + //| mov TMP1, ITYPE + //| mov L:CARG1, SAVE_L + //| mov L:CARG1->base, BASE + //| lea CARG3, TMP1 + //| mov CARG2, TAB:RB + //| mov SAVE_PC, PC + //| call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) + //| // Handles write barrier for the new key. TValue * returned in eax (RC). + //| mov L:CARG1, SAVE_L + //| mov BASE, L:CARG1->base + //| mov TMPR, rax + //| movzx RAd, PC_RA + //| jmp <2 // Must check write barrier for value. + //| + //|7: // Possible table write barrier for the value. Skip valiswhite check. + //| barrierback TAB:RB, ITYPE + //| jmp <3 + dasm_put(Dst, 13419, Dt6(->metatable), Dt6(->nomm), 1<base), Dt1(->base), Dt6(->marked), (uint8_t)~LJ_GC_BLACK, DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); +#line 3253 "vm_x64.dasc" + break; + case BC_TSETB: + //| ins_ABC // RA = src, RB = table, RC = byte literal + //| mov TAB:RB, [BASE+RB*8] + //| checktab TAB:RB, ->vmeta_tsetb + //| cmp RCd, TAB:RB->asize + //| jae ->vmeta_tsetb + //| shl RCd, 3 + //| add RC, TAB:RB->array + //| cmp aword [RC], LJ_TNIL + //| je >3 // Previous value is nil? + //|1: + //| test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + //| jnz >7 + //|2: // Set array slot. + //| mov ITYPE, [BASE+RA*8] + //| mov [RC], ITYPE + //| ins_next + //| + //|3: // Check for __newindex if previous value is nil. + //| mov TAB:TMPR, TAB:RB->metatable + //| test TAB:TMPR, TAB:TMPR + //| jz <1 + //| test byte TAB:TMPR->nomm, 1<asize), Dt6(->array), LJ_TNIL, Dt6(->marked), LJ_GC_BLACK, Dt6(->metatable)); +#line 3277 "vm_x64.dasc" + //| jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check. + //| jmp <1 + //| + //|7: // Possible table write barrier for the value. Skip valiswhite check. + //| barrierback TAB:RB, TMPR + //| jmp <2 + dasm_put(Dst, 13632, Dt6(->nomm), 1<marked), (uint8_t)~LJ_GC_BLACK, DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); +#line 3283 "vm_x64.dasc" + break; + case BC_TSETR: + //| ins_ABC // RA = src, RB = table, RC = key + //| mov TAB:RB, [BASE+RB*8] + //| cleartp TAB:RB + //| cvttsd2si RCd, qword [BASE+RC*8] + //| test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + //| jnz >7 + //|2: + //| cmp RCd, TAB:RB->asize + //| jae ->vmeta_tsetr + //| shl RCd, 3 + //| add RC, TAB:RB->array + //| // Set array slot. + //|->BC_TSETR_Z: + //| mov ITYPE, [BASE+RA*8] + //| mov [RC], ITYPE + //| ins_next + //| + //|7: // Possible table write barrier for the value. Skip valiswhite check. + //| barrierback TAB:RB, TMPR + //| jmp <2 + dasm_put(Dst, 13669, Dt6(->marked), LJ_GC_BLACK, Dt6(->asize), Dt6(->array), Dt6(->marked), (uint8_t)~LJ_GC_BLACK, DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); +#line 3305 "vm_x64.dasc" + break; + + case BC_TSETM: + //| ins_AD // RA = base (table at base-1), RD = num const (start index) + //|1: + //| mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word. + //| lea RA, [BASE+RA*8] + //| mov TAB:RB, [RA-8] // Guaranteed to be a table. + //| cleartp TAB:RB + //| test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + //| jnz >7 + //|2: + //| mov RDd, MULTRES + //| sub RDd, 1 + //| jz >4 // Nothing to copy? + //| add RDd, TMPRd // Compute needed size. + //| cmp RDd, TAB:RB->asize + //| ja >5 // Doesn't fit into array part? + //| sub RDd, TMPRd + //| shl TMPRd, 3 + //| add TMPR, TAB:RB->array + //|3: // Copy result slots to table. + //| mov RB, [RA] + //| add RA, 8 + //| mov [TMPR], RB + //| add TMPR, 8 + //| sub RDd, 1 + //| jnz <3 + //|4: + //| ins_next + //| + //|5: // Need to resize array part. + //| mov L:CARG1, SAVE_L + //| mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. + //| mov CARG2, TAB:RB + //| mov CARG3d, RDd + //| mov L:RB, L:CARG1 + //| mov SAVE_PC, PC + //| call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) + //| mov BASE, L:RB->base + //| movzx RAd, PC_RA // Restore RA. + dasm_put(Dst, 13774, Dt6(->marked), LJ_GC_BLACK, Dt6(->asize), Dt6(->array), Dt1(->base), Dt1(->base)); +#line 3346 "vm_x64.dasc" + //| movzx RDd, PC_RD // Restore RD. + //| jmp <1 // Retry. + //| + //|7: // Possible table write barrier for any value. Skip valiswhite check. + //| barrierback TAB:RB, RD + //| jmp <2 + dasm_put(Dst, 13921, Dt6(->marked), (uint8_t)~LJ_GC_BLACK, DISPATCH_GL(gc.grayagain), DISPATCH_GL(gc.grayagain), Dt6(->gclist)); +#line 3352 "vm_x64.dasc" + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALL: case BC_CALLM: + //| ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs + dasm_put(Dst, 10630); +#line 3358 "vm_x64.dasc" + if (op == BC_CALLM) { + //| add NARGS:RDd, MULTRES + dasm_put(Dst, 13958); +#line 3360 "vm_x64.dasc" + } + //| mov LFUNC:RB, [BASE+RA*8] + //| checkfunc LFUNC:RB, ->vmeta_call_ra + //| lea BASE, [BASE+RA*8+16] + //| ins_call + dasm_put(Dst, 13962, LJ_TFUNC, Dt7(->pc)); +#line 3365 "vm_x64.dasc" + break; + + case BC_CALLMT: + //| ins_AD // RA = base, RD = extra_nargs + //| add NARGS:RDd, MULTRES + //| // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. + dasm_put(Dst, 13958); +#line 3371 "vm_x64.dasc" + break; + case BC_CALLT: + //| ins_AD // RA = base, RD = nargs+1 + //| lea RA, [BASE+RA*8+16] + //| mov KBASE, BASE // Use KBASE for move + vmeta_call hint. + //| mov LFUNC:RB, [RA-16] + //| checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call + //|->BC_CALLT_Z: + //| mov PC, [BASE-8] + //| test PCd, FRAME_TYPE + //| jnz >7 + //|1: + //| mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below. + //| mov MULTRES, NARGS:RDd + //| sub NARGS:RDd, 1 + //| jz >3 + //|2: // Move args down. + //| mov RB, [RA] + //| add RA, 8 + //| mov [KBASE], RB + //| add KBASE, 8 + //| sub NARGS:RDd, 1 + //| jnz <2 + //| + //| mov LFUNC:RB, [BASE-16] + //|3: + //| cleartp LFUNC:RB + //| mov NARGS:RDd, MULTRES + //| cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? + //| ja >5 + //|4: + //| ins_callt + dasm_put(Dst, 14027, LJ_TFUNC, FRAME_TYPE, Dt7(->ffid), Dt7(->pc)); +#line 3403 "vm_x64.dasc" + //| + //|5: // Tailcall to a fast function. + //| test PCd, FRAME_TYPE // Lua frame below? + //| jnz <4 + //| movzx RAd, PC_RA + //| neg RA + //| mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE. + //| cleartp LFUNC:KBASE + //| mov KBASE, LFUNC:KBASE->pc + //| mov KBASE, [KBASE+PC2PROTO(k)] + //| jmp <4 + //| + //|7: // Tailcall from a vararg function. + //| sub PC, FRAME_VARG + //| test PCd, FRAME_TYPEP + //| jnz >8 // Vararg frame below? + //| sub BASE, PC // Need to relocate BASE/KBASE down. + //| mov KBASE, BASE + //| mov PC, [BASE-8] + //| jmp <1 + //|8: + //| add PCd, FRAME_VARG + //| jmp <1 + dasm_put(Dst, 14147, FRAME_TYPE, Dt7(->pc), PC2PROTO(k), FRAME_VARG, FRAME_TYPEP, FRAME_VARG); +#line 3426 "vm_x64.dasc" + break; + + case BC_ITERC: + //| ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) + //| lea RA, [BASE+RA*8+16] // fb = base+2 + //| mov RB, [RA-32] // Copy state. fb[0] = fb[-4]. + //| mov RC, [RA-24] // Copy control var. fb[1] = fb[-3]. + //| mov [RA], RB + //| mov [RA+8], RC + //| mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5] + //| mov [RA-16], LFUNC:RB + //| mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call. + //| checkfunc LFUNC:RB, ->vmeta_call + //| mov BASE, RA + //| ins_call + dasm_put(Dst, 14251, 2+1, LJ_TFUNC, Dt7(->pc)); +#line 3441 "vm_x64.dasc" + break; + + case BC_ITERN: + //| ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + //| // NYI: add hotloop, record BC_ITERN. + //| mov TAB:RB, [BASE+RA*8-16] + //| cleartp TAB:RB + //| mov RCd, [BASE+RA*8-8] // Get index from control var. + //| mov TMPRd, TAB:RB->asize + //| add PC, 4 + //| mov ITYPE, TAB:RB->array + //|1: // Traverse array part. + //| cmp RCd, TMPRd; jae >5 // Index points after array part? + //| cmp aword [ITYPE+RC*8], LJ_TNIL; je >4 + //| cvtsi2sd xmm0, RCd + //| // Copy array slot to returned value. + //| mov RB, [ITYPE+RC*8] + //| mov [BASE+RA*8+8], RB + //| // Return array index as a numeric key. + //| movsd qword [BASE+RA*8], xmm0 + //| add RCd, 1 + //| mov [BASE+RA*8-8], RCd // Update control var. + //|2: + //| movzx RDd, PC_RD // Get target from ITERL. + //| branchPC RD + //|3: + //| ins_next + //| + //|4: // Skip holes in array part. + //| add RCd, 1 + //| jmp <1 + //| + //|5: // Traverse hash part. + //| sub RCd, TMPRd + //|6: + //| cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1. + //| imul ITYPEd, RCd, #NODE + dasm_put(Dst, 14341, Dt6(->asize), Dt6(->array), LJ_TNIL, -BCBIAS_J*4, Dt6(->hmask)); +#line 3478 "vm_x64.dasc" + //| add NODE:ITYPE, TAB:RB->node + //| cmp aword NODE:ITYPE->val, LJ_TNIL; je >7 + //| lea TMPRd, [RCd+TMPRd+1] + //| // Copy key and value from hash slot. + //| mov RB, NODE:ITYPE->key + //| mov RC, NODE:ITYPE->val + //| mov [BASE+RA*8], RB + //| mov [BASE+RA*8+8], RC + //| mov [BASE+RA*8-8], TMPRd + //| jmp <2 + //| + //|7: // Skip holes in hash part. + //| add RCd, 1 + //| jmp <6 + dasm_put(Dst, 14480, sizeof(Node), Dt6(->node), DtB(->val), LJ_TNIL, DtB(->key), DtB(->val)); +#line 3492 "vm_x64.dasc" + break; + + case BC_ISNEXT: + //| ins_AD // RA = base, RD = target (points to ITERN) + //| mov CFUNC:RB, [BASE+RA*8-24] + //| checkfunc CFUNC:RB, >5 + //| checktptp [BASE+RA*8-16], LJ_TTAB, >5 + //| cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5 + //| cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 + //| branchPC RD + //| mov64 TMPR, U64x(fffe7fff, 00000000) + //| mov [BASE+RA*8-8], TMPR // Initialize control var. + //|1: + //| ins_next + //|5: // Despecialize bytecode if any of the checks fail. + //| mov PC_OP, BC_JMP + //| branchPC RD + dasm_put(Dst, 14539, LJ_TFUNC, LJ_TTAB, LJ_TNIL, Dt8(->ffid), FF_next_N, -BCBIAS_J*4, (unsigned int)(U64x(fffe7fff, 00000000)), (unsigned int)((U64x(fffe7fff, 00000000))>>32), BC_JMP); +#line 3509 "vm_x64.dasc" + //| mov byte [PC], BC_ITERC + //| jmp <1 + dasm_put(Dst, 14658, -BCBIAS_J*4, BC_ITERC); +#line 3511 "vm_x64.dasc" + break; + + case BC_VARG: + //| ins_ABC // RA = base, RB = nresults+1, RC = numparams + //| lea TMPR, [BASE+RC*8+(16+FRAME_VARG)] + //| lea RA, [BASE+RA*8] + //| sub TMPR, [BASE-8] + //| // Note: TMPR may now be even _above_ BASE if nargs was < numparams. + //| test RB, RB + //| jz >5 // Copy all varargs? + //| lea RB, [RA+RB*8-8] + //| cmp TMPR, BASE // No vararg slots? + //| jnb >2 + //|1: // Copy vararg slots to destination slots. + //| mov RC, [TMPR-16] + //| add TMPR, 8 + //| mov [RA], RC + //| add RA, 8 + //| cmp RA, RB // All destination slots filled? + //| jnb >3 + //| cmp TMPR, BASE // No more vararg slots? + //| jb <1 + //|2: // Fill up remainder with nil. + //| mov aword [RA], LJ_TNIL + //| add RA, 8 + //| cmp RA, RB + //| jb <2 + //|3: + //| ins_next + //| + //|5: // Copy all varargs. + //| mov MULTRES, 1 // MULTRES = 0+1 + //| mov RC, BASE + //| sub RC, TMPR + //| jbe <3 // No vararg slots? + //| mov RBd, RCd + //| shr RBd, 3 + //| add RBd, 1 + //| mov MULTRES, RBd // MULTRES = #varargs+1 + //| mov L:RB, SAVE_L + //| add RC, RA + //| cmp RC, L:RB->maxstack + //| ja >7 // Need to grow stack? + dasm_put(Dst, 14672, (16+FRAME_VARG), LJ_TNIL, Dt1(->maxstack)); +#line 3554 "vm_x64.dasc" + //|6: // Copy all vararg slots. + //| mov RC, [TMPR-16] + //| add TMPR, 8 + //| mov [RA], RC + //| add RA, 8 + //| cmp TMPR, BASE // No more vararg slots? + //| jb <6 + //| jmp <3 + //| + //|7: // Grow stack for varargs. + //| mov L:RB->base, BASE + //| mov L:RB->top, RA + //| mov SAVE_PC, PC + //| sub TMPR, BASE // Need delta, because BASE may change. + //| mov TMP1hi, TMPRd + //| mov CARG2d, MULTRES + //| sub CARG2d, 1 + //| mov CARG1, L:RB + //| call extern lj_state_growstack // (lua_State *L, int n) + //| mov BASE, L:RB->base + //| movsxd TMPR, TMP1hi + //| mov RA, L:RB->top + //| add TMPR, BASE + //| jmp <6 + dasm_put(Dst, 14834, Dt1(->base), Dt1(->top), Dt1(->base), Dt1(->top)); +#line 3578 "vm_x64.dasc" + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + //| ins_AD // RA = results, RD = extra_nresults + //| add RDd, MULTRES // MULTRES >=1, so RD >=1. + //| // Fall through. Assumes BC_RET follows and ins_AD is a no-op. + dasm_put(Dst, 13958); +#line 3586 "vm_x64.dasc" + break; + + case BC_RET: case BC_RET0: case BC_RET1: + //| ins_AD // RA = results, RD = nresults+1 + if (op != BC_RET0) { + //| shl RAd, 3 + dasm_put(Dst, 14926); +#line 3592 "vm_x64.dasc" + } + //|1: + //| mov PC, [BASE-8] + //| mov MULTRES, RDd // Save nresults+1. + //| test PCd, FRAME_TYPE // Check frame type marker. + //| jnz >7 // Not returning to a fixarg Lua func? + dasm_put(Dst, 14930, FRAME_TYPE); +#line 3598 "vm_x64.dasc" + switch (op) { + case BC_RET: + //|->BC_RET_Z: + //| mov KBASE, BASE // Use KBASE for result move. + //| sub RDd, 1 + //| jz >3 + //|2: // Move results down. + //| mov RB, [KBASE+RA] + //| mov [KBASE-16], RB + //| add KBASE, 8 + //| sub RDd, 1 + //| jnz <2 + //|3: + //| mov RDd, MULTRES // Note: MULTRES may be >255. + //| movzx RBd, PC_RB // So cannot compare with RDL! + //|5: + //| cmp RBd, RDd // More results expected? + //| ja >6 + dasm_put(Dst, 14949); +#line 3616 "vm_x64.dasc" + break; + case BC_RET1: + //| mov RB, [BASE+RA] + //| mov [BASE-16], RB + dasm_put(Dst, 15002); +#line 3620 "vm_x64.dasc" + /* fallthrough */ + case BC_RET0: + //|5: + //| cmp PC_RB, RDL // More results expected? + //| ja >6 + dasm_put(Dst, 15012); +#line 3625 "vm_x64.dasc" + default: + break; + } + //| movzx RAd, PC_RA + //| neg RA + //| lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 + //| mov LFUNC:KBASE, [BASE-16] + //| cleartp LFUNC:KBASE + //| mov KBASE, LFUNC:KBASE->pc + //| mov KBASE, [KBASE+PC2PROTO(k)] + //| ins_next + //| + //|6: // Fill up results with nil. + dasm_put(Dst, 15023, Dt7(->pc), PC2PROTO(k)); +#line 3638 "vm_x64.dasc" + if (op == BC_RET) { + //| mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base. + //| add KBASE, 8 + dasm_put(Dst, 15084, LJ_TNIL); +#line 3641 "vm_x64.dasc" + } else { + //| mov aword [BASE+RD*8-24], LJ_TNIL + dasm_put(Dst, 15095, LJ_TNIL); +#line 3643 "vm_x64.dasc" + } + //| add RD, 1 + //| jmp <5 + //| + //|7: // Non-standard return case. + //| lea RB, [PC-FRAME_VARG] + //| test RBd, FRAME_TYPEP + //| jnz ->vm_return + //| // Return from vararg function: relocate BASE down and RA up. + //| sub BASE, RB + dasm_put(Dst, 15102, -FRAME_VARG, FRAME_TYPEP); +#line 3653 "vm_x64.dasc" + if (op != BC_RET0) { + //| add RA, RB + dasm_put(Dst, 15129); +#line 3655 "vm_x64.dasc" + } + //| jmp <1 + dasm_put(Dst, 10605); +#line 3657 "vm_x64.dasc" + break; + + /* -- Loops and branches ------------------------------------------------ */ + + //|.define FOR_IDX, [RA] + //|.define FOR_STOP, [RA+8] + //|.define FOR_STEP, [RA+16] + //|.define FOR_EXT, [RA+24] + + case BC_FORL: + //| hotloop RBd + //| // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. + dasm_put(Dst, 15134, HOTCOUNT_PCMASK, GG_DISP2HOT, HOTCOUNT_LOOP); +#line 3669 "vm_x64.dasc" + break; + + case BC_JFORI: + case BC_JFORL: + case BC_FORI: + case BC_IFORL: + vk = (op == BC_IFORL || op == BC_JFORL); + //| ins_AJ // RA = base, RD = target (after end of loop or start of loop) + //| lea RA, [BASE+RA*8] + dasm_put(Dst, 15155); +#line 3678 "vm_x64.dasc" + if (!vk) { + //| checknumtp FOR_IDX, ->vmeta_for + //| checknumtp FOR_STOP, ->vmeta_for + dasm_put(Dst, 15160, LJ_TISNUM, LJ_TISNUM); +#line 3681 "vm_x64.dasc" + } else { +#ifdef LUA_USE_ASSERT + //| checknumtp FOR_STOP, ->assert_bad_for_arg_type + //| checknumtp FOR_STEP, ->assert_bad_for_arg_type + dasm_put(Dst, 15196, LJ_TISNUM, LJ_TISNUM); +#line 3685 "vm_x64.dasc" +#endif + } + //| mov RB, FOR_STEP + dasm_put(Dst, 15233); +#line 3688 "vm_x64.dasc" + if (!vk) { + //| checknum RB, ->vmeta_for + dasm_put(Dst, 15238, LJ_TISNUM); +#line 3690 "vm_x64.dasc" + } + //| movsd xmm0, qword FOR_IDX + //| movsd xmm1, qword FOR_STOP + dasm_put(Dst, 15257); +#line 3693 "vm_x64.dasc" + if (vk) { + //| addsd xmm0, qword FOR_STEP + //| movsd qword FOR_IDX, xmm0 + //| test RB, RB; js >3 + dasm_put(Dst, 15269); +#line 3697 "vm_x64.dasc" + } else { + //| jl >3 + dasm_put(Dst, 15289); +#line 3699 "vm_x64.dasc" + } + //| ucomisd xmm1, xmm0 + //|1: + //| movsd qword FOR_EXT, xmm0 + dasm_put(Dst, 15294); +#line 3703 "vm_x64.dasc" + if (op == BC_FORI) { + //| jnb >2 + //| branchPC RD + dasm_put(Dst, 15307, -BCBIAS_J*4); +#line 3706 "vm_x64.dasc" + } else if (op == BC_JFORI) { + //| branchPC RD + //| movzx RDd, PC_RD + //| jnb =>BC_JLOOP + dasm_put(Dst, 15318, -BCBIAS_J*4, BC_JLOOP); +#line 3710 "vm_x64.dasc" + } else if (op == BC_IFORL) { + //| jb >2 + //| branchPC RD + dasm_put(Dst, 15333, -BCBIAS_J*4); +#line 3713 "vm_x64.dasc" + } else { + //| jnb =>BC_JLOOP + dasm_put(Dst, 15329, BC_JLOOP); +#line 3715 "vm_x64.dasc" + } + //|2: + //| ins_next + //| + //|3: // Invert comparison if step is negative. + //| ucomisd xmm0, xmm1 + //| jmp <1 + dasm_put(Dst, 15344); +#line 3722 "vm_x64.dasc" + break; + + case BC_ITERL: + //| hotloop RBd + //| // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. + dasm_put(Dst, 15134, HOTCOUNT_PCMASK, GG_DISP2HOT, HOTCOUNT_LOOP); +#line 3727 "vm_x64.dasc" + break; + + case BC_JITERL: + case BC_IITERL: + //| ins_AJ // RA = base, RD = target + //| lea RA, [BASE+RA*8] + //| mov RB, [RA] + //| cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. + dasm_put(Dst, 15378, LJ_TNIL); +#line 3735 "vm_x64.dasc" + if (op == BC_JITERL) { + //| mov [RA-8], RB + //| jmp =>BC_JLOOP + dasm_put(Dst, 15395, BC_JLOOP); +#line 3738 "vm_x64.dasc" + } else { + //| branchPC RD // Otherwise save control var + branch. + //| mov [RA-8], RB + dasm_put(Dst, 15404, -BCBIAS_J*4); +#line 3741 "vm_x64.dasc" + } + //|1: + //| ins_next + dasm_put(Dst, 9795); +#line 3744 "vm_x64.dasc" + break; + + case BC_LOOP: + //| ins_A // RA = base, RD = target (loop extent) + //| // Note: RA/RD is only used by trace recorder to determine scope/extent + //| // This opcode does NOT jump, it's only purpose is to detect a hot loop. + //| hotloop RBd + //| // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. + dasm_put(Dst, 15134, HOTCOUNT_PCMASK, GG_DISP2HOT, HOTCOUNT_LOOP); +#line 3752 "vm_x64.dasc" + break; + + case BC_ILOOP: + //| ins_A // RA = base, RD = target (loop extent) + //| ins_next + dasm_put(Dst, 9797); +#line 3757 "vm_x64.dasc" + break; + + case BC_JLOOP: + //| ins_AD // RA = base (ignored), RD = traceno + //| mov RA, [DISPATCH+DISPATCH_J(trace)] + //| mov TRACE:RD, [RA+RD*8] + //| mov RD, TRACE:RD->mcode + //| mov L:RB, SAVE_L + //| mov [DISPATCH+DISPATCH_GL(jit_base)], BASE + //| mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB + //| // Save additional callee-save registers only used in compiled code. + //| sub rsp, 16 + //| mov [rsp+16], r12 + //| mov [rsp+8], r13 + //| jmp RD + dasm_put(Dst, 15416, DISPATCH_J(trace), DtD(->mcode), DISPATCH_GL(jit_base), DISPATCH_GL(tmpbuf.L)); +#line 3772 "vm_x64.dasc" + break; + + case BC_JMP: + //| ins_AJ // RA = unused, RD = target + //| branchPC RD + //| ins_next + dasm_put(Dst, 15460, -BCBIAS_J*4); +#line 3778 "vm_x64.dasc" + break; + + /* -- Function headers -------------------------------------------------- */ + + /* + ** Reminder: A function may be called with func/args above L->maxstack, + ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, + ** too. This means all FUNC* ops (including fast functions) must check + ** for stack overflow _before_ adding more slots! + */ + + case BC_FUNCF: + //| hotcall RBd + dasm_put(Dst, 15488, HOTCOUNT_PCMASK, GG_DISP2HOT, HOTCOUNT_CALL); +#line 3791 "vm_x64.dasc" + case BC_FUNCV: /* NYI: compiled vararg functions. */ + //| // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. + break; + + case BC_JFUNCF: + case BC_IFUNCF: + //| ins_AD // BASE = new base, RA = framesize, RD = nargs+1 + //| mov KBASE, [PC-4+PC2PROTO(k)] + //| mov L:RB, SAVE_L + //| lea RA, [BASE+RA*8] // Top of frame. + //| cmp RA, L:RB->maxstack + //| ja ->vm_growstack_f + //| movzx RAd, byte [PC-4+PC2PROTO(numparams)] + //| cmp NARGS:RDd, RAd // Check for missing parameters. + //| jbe >3 + //|2: + dasm_put(Dst, 15509, -4+PC2PROTO(k), Dt1(->maxstack), -4+PC2PROTO(numparams)); +#line 3807 "vm_x64.dasc" + if (op == BC_JFUNCF) { + //| movzx RDd, PC_RD + //| jmp =>BC_JLOOP + dasm_put(Dst, 15543, BC_JLOOP); +#line 3810 "vm_x64.dasc" + } else { + //| ins_next + dasm_put(Dst, 9797); +#line 3812 "vm_x64.dasc" + } + //| + //|3: // Clear missing parameters. + //| mov aword [BASE+NARGS:RD*8-8], LJ_TNIL + //| add NARGS:RDd, 1 + //| cmp NARGS:RDd, RAd + //| jbe <3 + //| jmp <2 + dasm_put(Dst, 15552, LJ_TNIL); +#line 3820 "vm_x64.dasc" + break; + + case BC_JFUNCV: + //| int3 // NYI: compiled vararg functions + dasm_put(Dst, 9308); +#line 3824 "vm_x64.dasc" + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + //| ins_AD // BASE = new base, RA = framesize, RD = nargs+1 + //| lea RBd, [NARGS:RD*8+FRAME_VARG+8] + //| lea RD, [BASE+NARGS:RD*8+8] + //| mov LFUNC:KBASE, [BASE-16] + //| mov [RD-8], RB // Store delta + FRAME_VARG. + //| mov [RD-16], LFUNC:KBASE // Store copy of LFUNC. + //| mov L:RB, SAVE_L + //| lea RA, [RD+RA*8] + //| cmp RA, L:RB->maxstack + //| ja ->vm_growstack_v // Need to grow stack. + //| mov RA, BASE + //| mov BASE, RD + //| movzx RBd, byte [PC-4+PC2PROTO(numparams)] + //| test RBd, RBd + //| jz >2 + //| add RA, 8 + //|1: // Copy fixarg slots up to new frame. + //| add RA, 8 + //| cmp RA, BASE + //| jnb >3 // Less args than parameters? + //| mov KBASE, [RA-16] + //| mov [RD], KBASE + //| add RD, 8 + //| mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC). + //| sub RBd, 1 + //| jnz <1 + //|2: + dasm_put(Dst, 15575, FRAME_VARG+8, Dt1(->maxstack), -4+PC2PROTO(numparams), LJ_TNIL); +#line 3854 "vm_x64.dasc" + if (op == BC_JFUNCV) { + //| movzx RDd, PC_RD + //| jmp =>BC_JLOOP + dasm_put(Dst, 15543, BC_JLOOP); +#line 3857 "vm_x64.dasc" + } else { + //| mov KBASE, [PC-4+PC2PROTO(k)] + //| ins_next + dasm_put(Dst, 15679, -4+PC2PROTO(k)); +#line 3860 "vm_x64.dasc" + } + //| + //|3: // Clear missing parameters. + //| mov aword [RD], LJ_TNIL + //| add RD, 8 + //| sub RBd, 1 + //| jnz <3 + //| jmp <2 + dasm_put(Dst, 15705, LJ_TNIL); +#line 3868 "vm_x64.dasc" + break; + + case BC_FUNCC: + case BC_FUNCCW: + //| ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1 + //| mov CFUNC:RB, [BASE-16] + //| cleartp CFUNC:RB + //| mov KBASE, CFUNC:RB->f + //| mov L:RB, SAVE_L + //| lea RD, [BASE+NARGS:RD*8-8] + //| mov L:RB->base, BASE + //| lea RA, [RD+8*LUA_MINSTACK] + //| cmp RA, L:RB->maxstack + //| mov L:RB->top, RD + dasm_put(Dst, 15728, Dt8(->f), Dt1(->base), 8*LUA_MINSTACK, Dt1(->maxstack), Dt1(->top)); +#line 3882 "vm_x64.dasc" + if (op == BC_FUNCC) { + //| mov CARG1, L:RB // Caveat: CARG1 may be RA. + dasm_put(Dst, 15774); +#line 3884 "vm_x64.dasc" + } else { + //| mov CARG2, KBASE + //| mov CARG1, L:RB // Caveat: CARG1 may be RA. + dasm_put(Dst, 15779); +#line 3887 "vm_x64.dasc" + } + //| ja ->vm_growstack_c // Need to grow stack. + //| set_vmstate C + dasm_put(Dst, 15788, DISPATCH_GL(vmstate), ~LJ_VMST_C); +#line 3890 "vm_x64.dasc" + if (op == BC_FUNCC) { + //| call KBASE // (lua_State *L) + dasm_put(Dst, 15798); +#line 3892 "vm_x64.dasc" + } else { + //| // (lua_State *L, lua_CFunction f) + //| call aword [DISPATCH+DISPATCH_GL(wrapf)] + dasm_put(Dst, 15803, DISPATCH_GL(wrapf)); +#line 3895 "vm_x64.dasc" + } + //| // nresults returned in eax (RD). + //| mov BASE, L:RB->base + //| mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + //| set_vmstate INTERP + //| lea RA, [BASE+RD*8] + //| neg RA + //| add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 + //| mov PC, [BASE-8] // Fetch PC of caller. + //| jmp ->vm_returnc + dasm_put(Dst, 15809, Dt1(->base), DISPATCH_GL(cur_L), DISPATCH_GL(vmstate), ~LJ_VMST_INTERP, Dt1(->top)); +#line 3905 "vm_x64.dasc" + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + dasm_growpc(Dst, BC__MAX); + build_subroutines(ctx); + //|.code_op + dasm_put(Dst, 15844); +#line 3922 "vm_x64.dasc" + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.long .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.long 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 0x10\n" + "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" + "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" + "\t.align 8\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.long .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.long .Lframe0\n" + "\t.quad .Lbegin\n" + "\t.quad %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ + "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ + "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ + "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ + "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ + "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ + "\t.align 8\n" + ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.long .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.long .Lframe0\n" + "\t.quad lj_vm_ffi_call\n" + "\t.quad %d\n" + "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ + "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ + "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ + "\t.align 8\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); + break; + default: /* Difficult for other modes. */ + break; + } +} + diff --git a/src/reusevm/lj_bcdef.h b/src/reusevm/lj_bcdef.h new file mode 100644 index 0000000000..c6579f4145 --- /dev/null +++ b/src/reusevm/lj_bcdef.h @@ -0,0 +1,220 @@ +/* This is a generated file. DO NOT EDIT! */ + +LJ_DATADEF const uint16_t lj_bc_ofs[] = { +0, +92, +184, +276, +368, +518, +671, +751, +831, +909, +987, +1047, +1106, +1162, +1218, +1267, +1316, +1351, +1388, +1415, +1465, +1522, +1613, +1672, +1731, +1790, +1849, +1913, +1972, +2031, +2090, +2149, +2189, +2265, +2341, +2417, +2493, +2550, +2641, +2725, +2768, +2811, +2842, +2872, +2902, +2953, +3001, +3113, +3217, +3267, +3317, +3376, +3461, +3583, +3689, +3717, +3745, +3910, +4056, +4161, +4226, +4421, +4673, +4808, +4970, +5065, +5129, +5190, +5193, +5383, +5468, +5625, +5834, +5948, +5951, +6101, +6208, +6329, +6444, +6567, +6587, +6662, +6733, +6753, +6797, +6835, +6855, +6874, +6924, +6951, +6971, +7039, +7097, +7097, +7232, +7233, +7343, +9512, +9587, +10209, +10339, +10437, +10597, +9658, +9842, +9967, +10040, +10082, +10695, +10759, +11520, +10813, +11200, +11568, +11731, +11759, +11613, +11836, +11882, +11928, +11974, +12020, +12066, +12112, +12158, +12204, +12250, +12296, +12614, +12690, +11790, +12411, +12342, +12480, +12549, +12754, +12822, +13591, +14027, +13970, +14099, +14187, +14278, +14369, +14460, +13649, +13756, +13863, +12890, +12954, +13088, +13267, +13375, +13483 +}; + +LJ_DATADEF const uint16_t lj_bc_mode[] = {}; + diff --git a/src/reusevm/lj_ffdef.h b/src/reusevm/lj_ffdef.h new file mode 100644 index 0000000000..6a9505df69 --- /dev/null +++ b/src/reusevm/lj_ffdef.h @@ -0,0 +1,202 @@ +/* This is a generated file. DO NOT EDIT! */ + +FFDEF(assert) +FFDEF(type) +FFDEF(next) +FFDEF(pairs) +FFDEF(ipairs_aux) +FFDEF(ipairs) +FFDEF(getmetatable) +FFDEF(setmetatable) +FFDEF(getfenv) +FFDEF(setfenv) +FFDEF(rawget) +FFDEF(rawset) +FFDEF(rawequal) +FFDEF(unpack) +FFDEF(select) +FFDEF(tonumber) +FFDEF(tostring) +FFDEF(error) +FFDEF(pcall) +FFDEF(xpcall) +FFDEF(loadfile) +FFDEF(load) +FFDEF(loadstring) +FFDEF(dofile) +FFDEF(gcinfo) +FFDEF(collectgarbage) +FFDEF(newproxy) +FFDEF(print) +FFDEF(coroutine_status) +FFDEF(coroutine_running) +FFDEF(coroutine_isyieldable) +FFDEF(coroutine_create) +FFDEF(coroutine_yield) +FFDEF(coroutine_resume) +FFDEF(coroutine_wrap_aux) +FFDEF(coroutine_wrap) +FFDEF(math_abs) +FFDEF(math_floor) +FFDEF(math_ceil) +FFDEF(math_sqrt) +FFDEF(math_log10) +FFDEF(math_exp) +FFDEF(math_sin) +FFDEF(math_cos) +FFDEF(math_tan) +FFDEF(math_asin) +FFDEF(math_acos) +FFDEF(math_atan) +FFDEF(math_sinh) +FFDEF(math_cosh) +FFDEF(math_tanh) +FFDEF(math_frexp) +FFDEF(math_modf) +FFDEF(math_log) +FFDEF(math_atan2) +FFDEF(math_pow) +FFDEF(math_fmod) +FFDEF(math_ldexp) +FFDEF(math_min) +FFDEF(math_max) +FFDEF(math_random) +FFDEF(math_randomseed) +FFDEF(bit_tobit) +FFDEF(bit_bnot) +FFDEF(bit_bswap) +FFDEF(bit_lshift) +FFDEF(bit_rshift) +FFDEF(bit_arshift) +FFDEF(bit_rol) +FFDEF(bit_ror) +FFDEF(bit_band) +FFDEF(bit_bor) +FFDEF(bit_bxor) +FFDEF(bit_tohex) +FFDEF(string_byte) +FFDEF(string_char) +FFDEF(string_sub) +FFDEF(string_rep) +FFDEF(string_reverse) +FFDEF(string_lower) +FFDEF(string_upper) +FFDEF(string_dump) +FFDEF(string_find) +FFDEF(string_match) +FFDEF(string_gmatch_aux) +FFDEF(string_gmatch) +FFDEF(string_gsub) +FFDEF(string_format) +FFDEF(table_maxn) +FFDEF(table_insert) +FFDEF(table_concat) +FFDEF(table_sort) +FFDEF(table_new) +FFDEF(table_clear) +FFDEF(io_method_close) +FFDEF(io_method_read) +FFDEF(io_method_write) +FFDEF(io_method_flush) +FFDEF(io_method_seek) +FFDEF(io_method_setvbuf) +FFDEF(io_method_lines) +FFDEF(io_method___gc) +FFDEF(io_method___tostring) +FFDEF(io_open) +FFDEF(io_popen) +FFDEF(io_tmpfile) +FFDEF(io_close) +FFDEF(io_read) +FFDEF(io_write) +FFDEF(io_flush) +FFDEF(io_input) +FFDEF(io_output) +FFDEF(io_lines) +FFDEF(io_type) +FFDEF(os_execute) +FFDEF(os_remove) +FFDEF(os_rename) +FFDEF(os_tmpname) +FFDEF(os_getenv) +FFDEF(os_exit) +FFDEF(os_clock) +FFDEF(os_date) +FFDEF(os_time) +FFDEF(os_difftime) +FFDEF(os_setlocale) +FFDEF(debug_getregistry) +FFDEF(debug_getmetatable) +FFDEF(debug_setmetatable) +FFDEF(debug_getfenv) +FFDEF(debug_setfenv) +FFDEF(debug_getinfo) +FFDEF(debug_getlocal) +FFDEF(debug_setlocal) +FFDEF(debug_getupvalue) +FFDEF(debug_setupvalue) +FFDEF(debug_upvalueid) +FFDEF(debug_upvaluejoin) +FFDEF(debug_sethook) +FFDEF(debug_gethook) +FFDEF(debug_debug) +FFDEF(debug_traceback) +FFDEF(jit_on) +FFDEF(jit_off) +FFDEF(jit_flush) +FFDEF(jit_auditlog) +FFDEF(jit_status) +FFDEF(jit_tracebarrier) +FFDEF(jit_opt_start) +FFDEF(jit_vmprofile_open) +FFDEF(jit_vmprofile_close) +FFDEF(jit_vmprofile_select) +FFDEF(jit_vmprofile_start) +FFDEF(jit_vmprofile_stop) +FFDEF(ffi_meta___index) +FFDEF(ffi_meta___newindex) +FFDEF(ffi_meta___eq) +FFDEF(ffi_meta___len) +FFDEF(ffi_meta___lt) +FFDEF(ffi_meta___le) +FFDEF(ffi_meta___concat) +FFDEF(ffi_meta___call) +FFDEF(ffi_meta___add) +FFDEF(ffi_meta___sub) +FFDEF(ffi_meta___mul) +FFDEF(ffi_meta___div) +FFDEF(ffi_meta___mod) +FFDEF(ffi_meta___pow) +FFDEF(ffi_meta___unm) +FFDEF(ffi_meta___tostring) +FFDEF(ffi_meta___pairs) +FFDEF(ffi_meta___ipairs) +FFDEF(ffi_clib___index) +FFDEF(ffi_clib___newindex) +FFDEF(ffi_clib___gc) +FFDEF(ffi_callback_free) +FFDEF(ffi_callback_set) +FFDEF(ffi_cdef) +FFDEF(ffi_new) +FFDEF(ffi_cast) +FFDEF(ffi_typeof) +FFDEF(ffi_typeinfo) +FFDEF(ffi_istype) +FFDEF(ffi_sizeof) +FFDEF(ffi_alignof) +FFDEF(ffi_offsetof) +FFDEF(ffi_errno) +FFDEF(ffi_string) +FFDEF(ffi_copy) +FFDEF(ffi_fill) +FFDEF(ffi_abi) +FFDEF(ffi_metatype) +FFDEF(ffi_gc) +FFDEF(ffi_load) + +#undef FFDEF + +#ifndef FF_NUM_ASMFUNC +#define FF_NUM_ASMFUNC 57 +#endif + diff --git a/src/reusevm/lj_folddef.h b/src/reusevm/lj_folddef.h new file mode 100644 index 0000000000..1e7f20bcc6 --- /dev/null +++ b/src/reusevm/lj_folddef.h @@ -0,0 +1,1154 @@ +/* This is a generated file. DO NOT EDIT! */ + +static const FoldFunc fold_func[] = { + fold_kfold_numarith, + fold_kfold_numabsneg, + fold_kfold_ldexp, + fold_kfold_fpmath, + fold_kfold_numpow, + fold_kfold_numcomp, + fold_kfold_intarith, + fold_kfold_intovarith, + fold_kfold_bnot, + fold_kfold_bswap, + fold_kfold_intcomp, + fold_kfold_intcomp0, + fold_kfold_int64arith, + fold_kfold_int64arith2, + fold_kfold_int64shift, + fold_kfold_bnot64, + fold_kfold_bswap64, + fold_kfold_int64comp, + fold_kfold_int64comp0, + fold_kfold_snew_kptr, + fold_kfold_snew_empty, + fold_kfold_strref, + fold_kfold_strref_snew, + fold_kfold_strcmp, + fold_bufput_append, + fold_bufput_kgc, + fold_bufstr_kfold_cse, + fold_bufput_kfold_op, + fold_bufput_kfold_rep, + fold_bufput_kfold_fmt, + fold_kfold_add_kgc, + fold_kfold_add_kptr, + fold_kfold_add_kright, + fold_kfold_tobit, + fold_kfold_conv_kint_num, + fold_kfold_conv_kintu32_num, + fold_kfold_conv_kint_ext, + fold_kfold_conv_kint_i64, + fold_kfold_conv_kint64_num_i64, + fold_kfold_conv_kint64_num_u64, + fold_kfold_conv_kint64_int_i64, + fold_kfold_conv_knum_int_num, + fold_kfold_conv_knum_u32_num, + fold_kfold_conv_knum_i64_num, + fold_kfold_conv_knum_u64_num, + fold_kfold_tostr_knum, + fold_kfold_tostr_kint, + fold_kfold_strto, + lj_opt_cse, + fold_kfold_kref, + fold_shortcut_round, + fold_shortcut_left, + fold_shortcut_dropleft, + fold_shortcut_leftleft, + fold_simplify_numadd_negx, + fold_simplify_numadd_xneg, + fold_simplify_numsub_k, + fold_simplify_numsub_negk, + fold_simplify_numsub_xneg, + fold_simplify_nummuldiv_k, + fold_simplify_nummuldiv_negk, + fold_simplify_nummuldiv_negneg, + fold_simplify_numpow_xk, + fold_simplify_numpow_kx, + fold_shortcut_conv_num_int, + fold_simplify_conv_int_num, + fold_simplify_conv_i64_num, + fold_simplify_conv_int_i64, + fold_simplify_conv_flt_num, + fold_simplify_tobit_conv, + fold_simplify_floor_conv, + fold_simplify_conv_sext, + fold_simplify_conv_narrow, + fold_cse_conv, + fold_narrow_convert, + fold_simplify_intadd_k, + fold_simplify_intmul_k, + fold_simplify_intsub_k, + fold_simplify_intsub_kleft, + fold_simplify_intadd_k64, + fold_simplify_intsub_k64, + fold_simplify_intmul_k32, + fold_simplify_intmul_k64, + fold_simplify_intmod_k, + fold_simplify_intmod_kleft, + fold_simplify_intsub, + fold_simplify_intsubadd_leftcancel, + fold_simplify_intsubsub_leftcancel, + fold_simplify_intsubsub_rightcancel, + fold_simplify_intsubadd_rightcancel, + fold_simplify_intsubaddadd_cancel, + fold_simplify_band_k, + fold_simplify_bor_k, + fold_simplify_bxor_k, + fold_simplify_shift_ik, + fold_simplify_shift_andk, + fold_simplify_shift1_ki, + fold_simplify_shift2_ki, + fold_simplify_shiftk_andk, + fold_simplify_andk_shiftk, + fold_simplify_andor_k, + fold_simplify_andor_k64, + fold_reassoc_intarith_k, + fold_reassoc_intarith_k64, + fold_reassoc_dup, + fold_reassoc_bxor, + fold_reassoc_shift, + fold_reassoc_minmax_k, + fold_reassoc_minmax_left, + fold_reassoc_minmax_right, + fold_abc_fwd, + fold_abc_k, + fold_abc_invar, + fold_comm_swap, + fold_comm_equal, + fold_comm_comp, + fold_comm_dup, + fold_comm_bxor, + fold_merge_eqne_snew_kgc, + lj_opt_fwd_aload, + fold_kfold_hload_kkptr, + lj_opt_fwd_hload, + lj_opt_fwd_uload, + lj_opt_fwd_tab_len, + fold_cse_uref, + lj_opt_fwd_hrefk, + fold_fwd_href_tnew, + fold_fwd_href_tdup, + fold_fload_tab_tnew_asize, + fold_fload_tab_tnew_hmask, + fold_fload_tab_tdup_asize, + fold_fload_tab_tdup_hmask, + fold_fload_tab_ah, + fold_fload_str_len_kgc, + fold_fload_str_len_snew, + fold_fload_str_len_tostr, + fold_fload_cdata_typeid_kgc, + fold_fload_cdata_int64_kgc, + fold_fload_cdata_typeid_cnew, + fold_fload_cdata_ptr_int64_cnew, + lj_opt_cse, + lj_opt_fwd_fload, + fold_fwd_sload, + fold_xload_kptr, + lj_opt_fwd_xload, + fold_barrier_tab, + fold_barrier_tnew_tdup, + lj_opt_dse_ahstore, + lj_opt_dse_ustore, + lj_opt_dse_fstore, + lj_opt_dse_xstore, + lj_ir_emit +}; + +static const uint32_t fold_hash[992] = { +0xffffffff, +0x28b27295, +0xffffffff, +0x48b2a695, +0xffffffff, +0xffffffff, +0x7f754015, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x6542801c, +0x730bffff, +0xffffffff, +0x1108701c, +0x0a085816, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x1e505c16, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x6740801c, +0x7309ffff, +0x21b46c1b, +0x1106701c, +0x0a065816, +0xffffffff, +0xffffffff, +0x26b271d5, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x7307ffff, +0xffffffff, +0x1104701c, +0x0a045816, +0xffffffff, +0xffffffff, +0xffffffff, +0x92ad4000, +0x17bb8800, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x48b2a275, +0x5f4ffc20, +0xffffffff, +0x7305ffff, +0x25b25ad4, +0x1102701c, +0x0a025816, +0xffffffff, +0x9179ffff, +0x18a94c55, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x5f4dfc20, +0x2051fc17, +0x7303ffff, +0xffffffff, +0x1100701c, +0x0a005816, +0x6b66cc16, +0x4ab4a01b, +0x9777ffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x6a489016, +0xffffffff, +0xffffffff, +0xffffffff, +0x5f4bfc20, +0xffffffff, +0x7301ffff, +0xffffffff, +0xffffffff, +0x91adfc00, +0xffffffff, +0x8475ffff, +0x0e4e7016, +0x015e6c44, +0x120bfc1c, +0x3268d001, +0x5953fc28, +0x41b3666e, +0x3e5bfc16, +0xffffffff, +0xffffffff, +0x5f49fc20, +0xffffffff, +0xffffffff, +0x89885c11, +0x42b366ce, +0xffffffff, +0xffffffff, +0x7d73ffff, +0x0e4c7016, +0x015c6c44, +0xffffffff, +0xffffffff, +0xffffffff, +0x5359fc16, +0xffffffff, +0xffffffff, +0x1dbf880f, +0x5f47fc20, +0x8c89fc01, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x0e4a7016, +0xffffffff, +0x076e5816, +0xffffffff, +0x44b365ae, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x69448bff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x716fffff, +0x0e487016, +0xffffffff, +0x076c5816, +0xffffffff, +0xffffffff, +0x63409016, +0x5155fc16, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x87896c00, +0xffffffff, +0x556dffff, +0x0e467016, +0xffffffff, +0x076a5816, +0xffffffff, +0xffffffff, +0x4d53fc16, +0xffffffff, +0x25b25ad3, +0x4ab2a6ae, +0xffffffff, +0x2ab26e8e, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x43b36676, +0x6744881c, +0x716bffff, +0xffffffff, +0xffffffff, +0x48b2aa76, +0xffffffff, +0x4b51fc16, +0xffffffff, +0x46696400, +0xffffffff, +0xffffffff, +0xffffffff, +0x353c7800, +0xffffffff, +0xffffffff, +0x167f37ff, +0x97a1fc00, +0xffffffff, +0xffffffff, +0xffffffff, +0x06665816, +0x8489fc09, +0x8a89440e, +0x5e4ffc16, +0xffffffff, +0xffffffff, +0x83894009, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x7467ffff, +0xffffffff, +0xffffffff, +0x06645816, +0xffffffff, +0xffffffff, +0x5e4dfc16, +0xffffffff, +0x89885c10, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x85885c00, +0xffffffff, +0xffffffff, +0xffffffff, +0x7465ffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x1dbf880e, +0xffffffff, +0x5e4bfc16, +0x093e5800, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x5e49fc16, +0x083c5800, +0xffffffff, +0x3113fc1a, +0xffffffff, +0x1f506416, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x5e47fc16, +0xffffffff, +0xffffffff, +0x3111fc1a, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x8c89fc11, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x065c5816, +0xffffffff, +0xffffffff, +0x5d45fc16, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x43b36675, +0xffffffff, +0x6a4c9816, +0xffffffff, +0xffffffff, +0x48b2aa75, +0x02626c16, +0xffffffff, +0x0d5a701c, +0xffffffff, +0xffffffff, +0xffffffff, +0x5c43fc16, +0x7f75401b, +0x48b2a296, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x1cbf8816, +0x66428416, +0x8489fc08, +0x45b5641b, +0x0d58701c, +0x06585816, +0x82894008, +0x1e505c1c, +0x7bbffc1e, +0x5b41fc16, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x24b25a72, +0x4e5273ff, +0xffffffff, +0x5752a7ff, +0x64408416, +0xffffffff, +0xffffffff, +0x0d56701c, +0x89885c0f, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x6866cfff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x1dbf880d, +0xffffffff, +0xffffffff, +0x0c54701c, +0x06545816, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x6c64cfff, +0xffffffff, +0xffffffff, +0x614e73ff, +0x355cbbff, +0xffffffff, +0xffffffff, +0x7155ffff, +0x045a6c16, +0x0c52701c, +0x06525816, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x3268d000, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x614c73ff, +0xffffffff, +0xffffffff, +0xffffffff, +0x5553ffff, +0x3d56b82e, +0x0c50701c, +0x06505816, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x8c89fc10, +0xffffffff, +0xffffffff, +0x614a73ff, +0x23b259d4, +0x8c89fc00, +0xffffffff, +0x7151ffff, +0x3d54b82e, +0xffffffff, +0x064e5816, +0xffffffff, +0x3a53fc2e, +0xffffffff, +0x335ebc44, +0x05126c1b, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x604873ff, +0xffffffff, +0x7a87fc00, +0x41b3668e, +0xffffffff, +0x5a52a028, +0xffffffff, +0x064c5816, +0x48b2a295, +0x3751fc2e, +0x97c3ffff, +0xffffffff, +0x05106c1b, +0x31125c17, +0x1bbf8815, +0x8489fc07, +0xffffffff, +0x604673ff, +0xffffffff, +0x7985fc00, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x064a5816, +0xffffffff, +0x97c1ffff, +0xffffffff, +0x24b25a71, +0x050e6c1b, +0x31105c17, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x7783fc00, +0x88885c0e, +0xffffffff, +0xffffffff, +0x78846400, +0x06485816, +0xffffffff, +0x97bfffff, +0xffffffff, +0x5255fc1c, +0x050c6c1b, +0xffffffff, +0xffffffff, +0x1dbf880c, +0xffffffff, +0x3650bbff, +0x6b64c816, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x92ad3c00, +0x06465816, +0x7e753fff, +0x6a468c16, +0x97bdffff, +0x5053fc1c, +0x050a6c1b, +0x8b894811, +0x4ab2a66e, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x0c44701c, +0x06445816, +0x2bb26eae, +0xffffffff, +0xffffffff, +0x4f51fc1c, +0x05086c1b, +0x43b36696, +0xffffffff, +0x6b66cc1b, +0xffffffff, +0xffffffff, +0x48b2aa96, +0xffffffff, +0xffffffff, +0x7545ffff, +0xffffffff, +0x0c42701c, +0x06425816, +0x6650a016, +0x8c89fc0f, +0xffffffff, +0xffffffff, +0x05066c1b, +0x22b259d3, +0xffffffff, +0xffffffff, +0x03686fff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x7443ffff, +0xffffffff, +0x0c40701c, +0x06405816, +0xffffffff, +0x63408c16, +0xffffffff, +0xffffffff, +0x05046c1b, +0xffffffff, +0x684287ff, +0xffffffff, +0x86893400, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x7441ffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x1bbf8814, +0x8489fc06, +0x4ab2a2ae, +0x05026c1b, +0xffffffff, +0xffffffff, +0x149bfc16, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x3b57fc1b, +0x24b25a70, +0x48b2a676, +0x49b3ffff, +0xffffffff, +0xffffffff, +0x05006c1b, +0xffffffff, +0xffffffff, +0xffffffff, +0x1f50641c, +0xffffffff, +0x3c56b81b, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x3b55fc1b, +0xffffffff, +0xffffffff, +0xffffffff, +0x1dbf880b, +0xffffffff, +0x81893c09, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x3c54b81b, +0x624e8016, +0x54585bff, +0xffffffff, +0xffffffff, +0x3853fc1b, +0xffffffff, +0x8b894810, +0x91afffff, +0xffffffff, +0x5d45fc1c, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x3952b81b, +0x624c8016, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x43b36695, +0xffffffff, +0xffffffff, +0x5c43fc1c, +0xffffffff, +0x157e5c16, +0x48b2aa95, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x139a6416, +0x6742841c, +0x8c89fc0e, +0xffffffff, +0xffffffff, +0x6e15fc28, +0x1aabffff, +0x31126bff, +0x5b41fc1c, +0xffffffff, +0xffffffff, +0x1f506016, +0xffffffff, +0x3f5a6fff, +0xffffffff, +0xffffffff, +0x62488016, +0x4e525bff, +0x6540841c, +0xffffffff, +0xffffffff, +0x97b00000, +0xffffffff, +0x19a9ffff, +0x31106bff, +0x47b3feb3, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x62468016, +0xffffffff, +0x1bbf8813, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x97a7ffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x6654a816, +0xffffffff, +0xffffffff, +0xffffffff, +0x28b27275, +0x24b25a6f, +0x614e5bff, +0x48b2a675, +0xffffffff, +0x6d65fc33, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x6a4a9416, +0x00666c1b, +0xffffffff, +0x64428016, +0x614c5bff, +0xffffffff, +0x1dbf880a, +0xffffffff, +0x80893c08, +0xffffffff, +0x97a3ffff, +0x4ab4a41b, +0x6f15fc16, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x00646c1b, +0x5652a3ff, +0x66408016, +0x614a5bff, +0x8b89480f, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x6c66cbff, +0xffffffff, +0xffffffff, +0xffffffff, +0x00626c1b, +0xffffffff, +0x60485bff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x979fffff, +0xffffffff, +0xffffffff, +0x76133417, +0x25b25ab4, +0x6864cbff, +0xffffffff, +0xffffffff, +0x00606c1b, +0xffffffff, +0x60465bff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x979dffff, +0xffffffff, +0xffffffff, +0x76113417, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x7f754017, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x6d67fc32, +0x8c89fc04, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x2eb65bff, +0xffffffff, +0x9699ffff, +0x0b0bfc16, +0x345eb844, +0xffffffff, +0x42b366ae, +0xffffffff, +0xffffffff, +0xffffffff, +0x46696402, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x353e7c00, +0xffffffff, +0xffffffff, +0xffffffff, +0x9597ffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x1bbf8809, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x2fb85c00, +0xffffffff, +0xffffffff, +0x6750a01c, +0x9495ffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x8a89480e, +0xffffffff, +0x00566c1b, +0xffffffff, +0xffffffff, +0xffffffff, +0x3013101a, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x9393ffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x2051fc19, +0xffffffff, +0xffffffff, +0xffffffff, +0x00546c1b, +0xffffffff, +0xffffffff, +0xffffffff, +0x3011101a, +0x2db66fff, +0xffffffff, +0x25b25ab3, +0xffffffff, +0x9391ffff, +0x29b26e6e, +0xffffffff, +0x4ab2a26e, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x00526c1b, +0xffffffff, +0x2cb26ece, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x7c7a5fff, +0x66448816, +0x8c8fffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x40b365d3, +0xffffffff, +0x00506c1b, +0x48b2a696, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x7c785fff, +0xffffffff, +0x8e8dffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x6b64c81b, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x9717ffff, +0xffffffff, +0x6a4e9c16, +0x0a145816, +0xffffffff, +0x908bffff, +0xffffffff, +0xffffffff, +0x46696401, +0xffffffff, +0xffffffff, +0x27b271d6, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x7015ffff, +0xffffffff, +0x3112701c, +0x31125816, +0xffffffff, +0x8d89ffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x684083ff, +0x103e7000, +0x4c6ffc16, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x7213ffff, +0xffffffff, +0x3110701c, +0x48b2a276, +0x31105816, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x0f3c7000, +0x1f50601c, +0x4b6dfc16, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x7211ffff, +0xffffffff, +0x110e701c, +0x0a0e5816, +0x2051fc18, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x4b6bfc16, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x47b3fed3, +0x730fffff, +0xffffffff, +0x110c701c, +0x0a0c5816, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0xffffffff, +0x6754a81c, +0x8f8a67ff, +0xffffffff, +0x3268d002, +0x5853fc29, +0xffffffff, +0xffffffff, +0x730dffff, +0xffffffff, +0x110a701c, +0x0a0a5816, +0xffffffff, +0xffffffff +}; + +#define fold_hashkey(k) (lj_rol(lj_rol((k),26)-(k),25)%991) + diff --git a/src/reusevm/lj_libdef.h b/src/reusevm/lj_libdef.h new file mode 100644 index 0000000000..f6a64a0e6c --- /dev/null +++ b/src/reusevm/lj_libdef.h @@ -0,0 +1,401 @@ +/* This is a generated file. DO NOT EDIT! */ + +#ifdef LJLIB_MODULE_base +#undef LJLIB_MODULE_base +static const lua_CFunction lj_lib_cf_base[] = { + lj_ffh_assert, + lj_ffh_next, + lj_ffh_pairs, + lj_ffh_ipairs_aux, + lj_ffh_ipairs, + lj_ffh_setmetatable, + lj_cf_getfenv, + lj_cf_setfenv, + lj_ffh_rawget, + lj_cf_rawset, + lj_cf_rawequal, + lj_cf_unpack, + lj_cf_select, + lj_ffh_tonumber, + lj_ffh_tostring, + lj_cf_error, + lj_ffh_pcall, + lj_cf_loadfile, + lj_cf_load, + lj_cf_loadstring, + lj_cf_dofile, + lj_cf_gcinfo, + lj_cf_collectgarbage, + lj_cf_newproxy, + lj_cf_print +}; +static const uint8_t lj_lib_init_base[] = { +2,0,28,70,97,115,115,101,114,116,195,110,105,108,199,98,111,111,108,101,97, +110,252,1,200,117,115,101,114,100,97,116,97,198,115,116,114,105,110,103,197, +117,112,118,97,108,198,116,104,114,101,97,100,197,112,114,111,116,111,200,102, +117,110,99,116,105,111,110,197,116,114,97,99,101,197,99,100,97,116,97,197,116, +97,98,108,101,252,9,198,110,117,109,98,101,114,132,116,121,112,101,68,110,101, +120,116,253,69,112,97,105,114,115,64,253,70,105,112,97,105,114,115,140,103, +101,116,109,101,116,97,116,97,98,108,101,76,115,101,116,109,101,116,97,116, +97,98,108,101,7,103,101,116,102,101,110,118,7,115,101,116,102,101,110,118,70, +114,97,119,103,101,116,6,114,97,119,115,101,116,8,114,97,119,101,113,117,97, +108,6,117,110,112,97,99,107,6,115,101,108,101,99,116,72,116,111,110,117,109, +98,101,114,72,116,111,115,116,114,105,110,103,5,101,114,114,111,114,69,112, +99,97,108,108,134,120,112,99,97,108,108,8,108,111,97,100,102,105,108,101,4, +108,111,97,100,10,108,111,97,100,115,116,114,105,110,103,6,100,111,102,105, +108,101,6,103,99,105,110,102,111,14,99,111,108,108,101,99,116,103,97,114,98, +97,103,101,252,2,8,110,101,119,112,114,111,120,121,200,116,111,115,116,114, +105,110,103,5,112,114,105,110,116,252,3,200,95,86,69,82,83,73,79,78,250,255 +}; +#endif + +#ifdef LJLIB_MODULE_coroutine +#undef LJLIB_MODULE_coroutine +static const lua_CFunction lj_lib_cf_coroutine[] = { + lj_cf_coroutine_status, + lj_cf_coroutine_running, + lj_cf_coroutine_isyieldable, + lj_cf_coroutine_create, + lj_ffh_coroutine_yield, + lj_ffh_coroutine_resume, + lj_cf_coroutine_wrap +}; +static const uint8_t lj_lib_init_coroutine[] = { +30,13,7,6,115,116,97,116,117,115,7,114,117,110,110,105,110,103,11,105,115,121, +105,101,108,100,97,98,108,101,6,99,114,101,97,116,101,69,121,105,101,108,100, +70,114,101,115,117,109,101,254,4,119,114,97,112,255 +}; +#endif + +#ifdef LJLIB_MODULE_math +#undef LJLIB_MODULE_math +static const lua_CFunction lj_lib_cf_math[] = { + lj_ffh_math_abs, + lj_ffh_math_sqrt, + lj_ffh_math_log, + lj_ffh_math_atan2, + lj_ffh_math_ldexp, + lj_ffh_math_min, + lj_cf_math_random, + lj_cf_math_randomseed +}; +static const uint8_t lj_lib_init_math[] = { +38,16,30,67,97,98,115,133,102,108,111,111,114,132,99,101,105,108,68,115,113, +114,116,133,108,111,103,49,48,131,101,120,112,131,115,105,110,131,99,111,115, +131,116,97,110,132,97,115,105,110,132,97,99,111,115,132,97,116,97,110,132,115, +105,110,104,132,99,111,115,104,132,116,97,110,104,133,102,114,101,120,112,132, +109,111,100,102,67,108,111,103,249,3,100,101,103,0,1,2,0,0,1,2,24,1,0,0,76, +1,2,0,241,135,158,166,3,220,203,178,130,4,249,3,114,97,100,0,1,2,0,0,1,2,24, +1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,69,97,116,97,110,50,131, +112,111,119,132,102,109,111,100,69,108,100,101,120,112,67,109,105,110,131,109, +97,120,251,24,45,68,84,251,33,9,64,194,112,105,250,251,0,0,0,0,0,0,240,127, +196,104,117,103,101,250,252,2,6,114,97,110,100,111,109,252,2,10,114,97,110, +100,111,109,115,101,101,100,255 +}; +#endif + +#ifdef LJLIB_MODULE_bit +#undef LJLIB_MODULE_bit +static const lua_CFunction lj_lib_cf_bit[] = { + lj_ffh_bit_tobit, + lj_ffh_bit_bnot, + lj_ffh_bit_bswap, + lj_ffh_bit_lshift, + lj_ffh_bit_band, + lj_cf_bit_tohex +}; +static const uint8_t lj_lib_init_bit[] = { +64,40,12,69,116,111,98,105,116,68,98,110,111,116,69,98,115,119,97,112,70,108, +115,104,105,102,116,134,114,115,104,105,102,116,135,97,114,115,104,105,102, +116,131,114,111,108,131,114,111,114,68,98,97,110,100,131,98,111,114,132,98, +120,111,114,5,116,111,104,101,120,255 +}; +#endif + +#ifdef LJLIB_MODULE_string +#undef LJLIB_MODULE_string +static const lua_CFunction lj_lib_cf_string[] = { + lj_ffh_string_byte, + lj_ffh_string_char, + lj_ffh_string_sub, + lj_cf_string_rep, + lj_ffh_string_reverse, + lj_cf_string_dump, + lj_cf_string_find, + lj_cf_string_match, + lj_cf_string_gmatch, + lj_cf_string_gsub, + lj_cf_string_format +}; +static const uint8_t lj_lib_init_string[] = { +76,51,14,249,3,108,101,110,0,1,2,0,0,0,3,16,0,5,0,21,1,0,0,76,1,2,0,68,98,121, +116,101,68,99,104,97,114,67,115,117,98,3,114,101,112,71,114,101,118,101,114, +115,101,133,108,111,119,101,114,133,117,112,112,101,114,4,100,117,109,112,4, +102,105,110,100,5,109,97,116,99,104,254,6,103,109,97,116,99,104,4,103,115,117, +98,6,102,111,114,109,97,116,255 +}; +#endif + +#ifdef LJLIB_MODULE_table +#undef LJLIB_MODULE_table +static const lua_CFunction lj_lib_cf_table[] = { + lj_cf_table_maxn, + lj_cf_table_insert, + lj_cf_table_concat, + lj_cf_table_sort +}; +static const uint8_t lj_lib_init_table[] = { +90,57,9,249,8,102,111,114,101,97,99,104,105,0,2,10,0,0,0,15,16,0,12,0,16,1, +9,0,41,2,1,0,21,3,0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3, +2,10,6,0,0,88,7,1,128,76,6,2,0,79,2,248,127,75,0,1,0,249,7,102,111,114,101, +97,99,104,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2,0,0,18,3,0,0,41,4,0,0,88, +5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,0,0,88,8,1,128,76,7,2,0,70, +5,3,3,82,5,247,127,75,0,1,0,249,4,103,101,116,110,0,1,2,0,0,0,3,16,0,12,0,21, +1,0,0,76,1,2,0,4,109,97,120,110,6,105,110,115,101,114,116,249,6,114,101,109, +111,118,101,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,8,2,0,0, +88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,17,1,15,0,41,3, +1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,0,41,6, +1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,2,0,76, +3,2,0,75,0,1,0,0,2,249,4,109,111,118,101,0,5,12,0,0,0,35,16,0,12,0,17,1,15, +0,17,2,15,0,17,3,15,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24, +128,33,5,1,3,0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6, +1,0,18,7,2,0,41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127, +88,6,8,128,18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64, +11,10,4,79,6,252,127,76,4,2,0,6,99,111,110,99,97,116,4,115,111,114,116,254, +254,255 +}; +#endif + +#ifdef LJLIB_MODULE_io_method +#undef LJLIB_MODULE_io_method +static const lua_CFunction lj_lib_cf_io_method[] = { + lj_cf_io_method_close, + lj_cf_io_method_read, + lj_cf_io_method_write, + lj_cf_io_method_flush, + lj_cf_io_method_seek, + lj_cf_io_method_setvbuf, + lj_cf_io_method_lines, + lj_cf_io_method___gc, + lj_cf_io_method___tostring +}; +static const uint8_t lj_lib_init_io_method[] = { +96,57,10,5,99,108,111,115,101,4,114,101,97,100,5,119,114,105,116,101,5,102, +108,117,115,104,4,115,101,101,107,7,115,101,116,118,98,117,102,5,108,105,110, +101,115,4,95,95,103,99,10,95,95,116,111,115,116,114,105,110,103,252,1,199,95, +95,105,110,100,101,120,250,255 +}; +#endif + +#ifdef LJLIB_MODULE_io +#undef LJLIB_MODULE_io +static const lua_CFunction lj_lib_cf_io[] = { + lj_cf_io_open, + lj_cf_io_popen, + lj_cf_io_tmpfile, + lj_cf_io_close, + lj_cf_io_read, + lj_cf_io_write, + lj_cf_io_flush, + lj_cf_io_input, + lj_cf_io_output, + lj_cf_io_lines, + lj_cf_io_type +}; +static const uint8_t lj_lib_init_io[] = { +105,57,12,252,2,192,250,4,111,112,101,110,5,112,111,112,101,110,7,116,109,112, +102,105,108,101,5,99,108,111,115,101,4,114,101,97,100,5,119,114,105,116,101, +5,102,108,117,115,104,5,105,110,112,117,116,6,111,117,116,112,117,116,5,108, +105,110,101,115,4,116,121,112,101,255 +}; +#endif + +#ifdef LJLIB_MODULE_os +#undef LJLIB_MODULE_os +static const lua_CFunction lj_lib_cf_os[] = { + lj_cf_os_execute, + lj_cf_os_remove, + lj_cf_os_rename, + lj_cf_os_tmpname, + lj_cf_os_getenv, + lj_cf_os_exit, + lj_cf_os_clock, + lj_cf_os_date, + lj_cf_os_time, + lj_cf_os_difftime, + lj_cf_os_setlocale +}; +static const uint8_t lj_lib_init_os[] = { +116,57,11,7,101,120,101,99,117,116,101,6,114,101,109,111,118,101,6,114,101, +110,97,109,101,7,116,109,112,110,97,109,101,6,103,101,116,101,110,118,4,101, +120,105,116,5,99,108,111,99,107,4,100,97,116,101,4,116,105,109,101,8,100,105, +102,102,116,105,109,101,9,115,101,116,108,111,99,97,108,101,255 +}; +#endif + +#ifdef LJLIB_MODULE_debug +#undef LJLIB_MODULE_debug +static const lua_CFunction lj_lib_cf_debug[] = { + lj_cf_debug_getregistry, + lj_cf_debug_getmetatable, + lj_cf_debug_setmetatable, + lj_cf_debug_getfenv, + lj_cf_debug_setfenv, + lj_cf_debug_getinfo, + lj_cf_debug_getlocal, + lj_cf_debug_setlocal, + lj_cf_debug_getupvalue, + lj_cf_debug_setupvalue, + lj_cf_debug_upvalueid, + lj_cf_debug_upvaluejoin, + lj_cf_debug_sethook, + lj_cf_debug_gethook, + lj_cf_debug_debug, + lj_cf_debug_traceback +}; +static const uint8_t lj_lib_init_debug[] = { +127,57,16,11,103,101,116,114,101,103,105,115,116,114,121,12,103,101,116,109, +101,116,97,116,97,98,108,101,12,115,101,116,109,101,116,97,116,97,98,108,101, +7,103,101,116,102,101,110,118,7,115,101,116,102,101,110,118,7,103,101,116,105, +110,102,111,8,103,101,116,108,111,99,97,108,8,115,101,116,108,111,99,97,108, +10,103,101,116,117,112,118,97,108,117,101,10,115,101,116,117,112,118,97,108, +117,101,9,117,112,118,97,108,117,101,105,100,11,117,112,118,97,108,117,101, +106,111,105,110,7,115,101,116,104,111,111,107,7,103,101,116,104,111,111,107, +5,100,101,98,117,103,9,116,114,97,99,101,98,97,99,107,255 +}; +#endif + +#ifdef LJLIB_MODULE_jit +#undef LJLIB_MODULE_jit +static const lua_CFunction lj_lib_cf_jit[] = { + lj_cf_jit_on, + lj_cf_jit_off, + lj_cf_jit_flush, + lj_cf_jit_auditlog, + lj_cf_jit_status, + lj_cf_jit_tracebarrier +}; +static const uint8_t lj_lib_init_jit[] = { +143,57,10,2,111,110,3,111,102,102,5,102,108,117,115,104,8,97,117,100,105,116, +108,111,103,6,115,116,97,116,117,115,12,116,114,97,99,101,98,97,114,114,105, +101,114,252,5,194,111,115,250,252,4,196,97,114,99,104,250,252,3,203,118,101, +114,115,105,111,110,95,110,117,109,250,252,2,199,118,101,114,115,105,111,110, +250,255 +}; +#endif + +#ifdef LJLIB_MODULE_jit_opt +#undef LJLIB_MODULE_jit_opt +static const lua_CFunction lj_lib_cf_jit_opt[] = { + lj_cf_jit_opt_start +}; +static const uint8_t lj_lib_init_jit_opt[] = { +149,57,1,5,115,116,97,114,116,255 +}; +#endif + +#ifdef LJLIB_MODULE_jit_vmprofile +#undef LJLIB_MODULE_jit_vmprofile +static const lua_CFunction lj_lib_cf_jit_vmprofile[] = { + lj_cf_jit_vmprofile_open, + lj_cf_jit_vmprofile_close, + lj_cf_jit_vmprofile_select, + lj_cf_jit_vmprofile_start, + lj_cf_jit_vmprofile_stop +}; +static const uint8_t lj_lib_init_jit_vmprofile[] = { +150,57,5,4,111,112,101,110,5,99,108,111,115,101,6,115,101,108,101,99,116,5, +115,116,97,114,116,4,115,116,111,112,255 +}; +#endif + +#ifdef LJLIB_MODULE_ffi_meta +#undef LJLIB_MODULE_ffi_meta +static const lua_CFunction lj_lib_cf_ffi_meta[] = { + lj_cf_ffi_meta___index, + lj_cf_ffi_meta___newindex, + lj_cf_ffi_meta___eq, + lj_cf_ffi_meta___len, + lj_cf_ffi_meta___lt, + lj_cf_ffi_meta___le, + lj_cf_ffi_meta___concat, + lj_cf_ffi_meta___call, + lj_cf_ffi_meta___add, + lj_cf_ffi_meta___sub, + lj_cf_ffi_meta___mul, + lj_cf_ffi_meta___div, + lj_cf_ffi_meta___mod, + lj_cf_ffi_meta___pow, + lj_cf_ffi_meta___unm, + lj_cf_ffi_meta___tostring, + lj_cf_ffi_meta___pairs, + lj_cf_ffi_meta___ipairs +}; +static const uint8_t lj_lib_init_ffi_meta[] = { +155,57,19,7,95,95,105,110,100,101,120,10,95,95,110,101,119,105,110,100,101, +120,4,95,95,101,113,5,95,95,108,101,110,4,95,95,108,116,4,95,95,108,101,8,95, +95,99,111,110,99,97,116,6,95,95,99,97,108,108,5,95,95,97,100,100,5,95,95,115, +117,98,5,95,95,109,117,108,5,95,95,100,105,118,5,95,95,109,111,100,5,95,95, +112,111,119,5,95,95,117,110,109,10,95,95,116,111,115,116,114,105,110,103,7, +95,95,112,97,105,114,115,8,95,95,105,112,97,105,114,115,195,102,102,105,203, +95,95,109,101,116,97,116,97,98,108,101,250,255 +}; +#endif + +#ifdef LJLIB_MODULE_ffi_clib +#undef LJLIB_MODULE_ffi_clib +static const lua_CFunction lj_lib_cf_ffi_clib[] = { + lj_cf_ffi_clib___index, + lj_cf_ffi_clib___newindex, + lj_cf_ffi_clib___gc +}; +static const uint8_t lj_lib_init_ffi_clib[] = { +173,57,3,7,95,95,105,110,100,101,120,10,95,95,110,101,119,105,110,100,101,120, +4,95,95,103,99,255 +}; +#endif + +#ifdef LJLIB_MODULE_ffi_callback +#undef LJLIB_MODULE_ffi_callback +static const lua_CFunction lj_lib_cf_ffi_callback[] = { + lj_cf_ffi_callback_free, + lj_cf_ffi_callback_set +}; +static const uint8_t lj_lib_init_ffi_callback[] = { +176,57,3,4,102,114,101,101,3,115,101,116,252,1,199,95,95,105,110,100,101,120, +250,255 +}; +#endif + +#ifdef LJLIB_MODULE_ffi +#undef LJLIB_MODULE_ffi +static const lua_CFunction lj_lib_cf_ffi[] = { + lj_cf_ffi_cdef, + lj_cf_ffi_new, + lj_cf_ffi_cast, + lj_cf_ffi_typeof, + lj_cf_ffi_typeinfo, + lj_cf_ffi_istype, + lj_cf_ffi_sizeof, + lj_cf_ffi_alignof, + lj_cf_ffi_offsetof, + lj_cf_ffi_errno, + lj_cf_ffi_string, + lj_cf_ffi_copy, + lj_cf_ffi_fill, + lj_cf_ffi_abi, + lj_cf_ffi_metatype, + lj_cf_ffi_gc, + lj_cf_ffi_load +}; +static const uint8_t lj_lib_init_ffi[] = { +178,57,23,4,99,100,101,102,3,110,101,119,4,99,97,115,116,6,116,121,112,101, +111,102,8,116,121,112,101,105,110,102,111,6,105,115,116,121,112,101,6,115,105, +122,101,111,102,7,97,108,105,103,110,111,102,8,111,102,102,115,101,116,111, +102,5,101,114,114,110,111,6,115,116,114,105,110,103,4,99,111,112,121,4,102, +105,108,108,3,97,98,105,252,8,192,250,8,109,101,116,97,116,121,112,101,252, +7,192,250,2,103,99,252,5,192,250,4,108,111,97,100,252,4,193,67,250,252,3,194, +111,115,250,252,2,196,97,114,99,104,250,255 +}; +#endif + diff --git a/src/reusevm/lj_recdef.h b/src/reusevm/lj_recdef.h new file mode 100644 index 0000000000..2c02001914 --- /dev/null +++ b/src/reusevm/lj_recdef.h @@ -0,0 +1,263 @@ +/* This is a generated file. DO NOT EDIT! */ + +static const uint16_t recff_idmap[] = { +0, +0x0100, +0x0200, +0x0300, +0, +0x0400+(0), +0x0500, +0x0400+(1), +0x0600, +0x0700, +0x0800, +0, +0x0900, +0x0a00, +0x0b00, +0, +0x0c00, +0x0d00, +0x0e00, +0, +0x0f00, +0x1000, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0x1100, +0x1200+(IRFPM_FLOOR), +0x1200+(IRFPM_CEIL), +0x1300+(IRFPM_SQRT), +0x1300+(IRFPM_LOG10), +0x1300+(IRFPM_EXP), +0x1300+(IRFPM_SIN), +0x1300+(IRFPM_COS), +0x1300+(IRFPM_TAN), +0x1400+(FF_math_asin), +0x1400+(FF_math_acos), +0x1400+(FF_math_atan), +0x1500+(IRCALL_sinh), +0x1500+(IRCALL_cosh), +0x1500+(IRCALL_tanh), +0, +0x1600, +0x1700, +0x1800, +0x1900, +0, +0x1a00, +0x1b00+(IR_MIN), +0x1b00+(IR_MAX), +0x1c00, +0, +0x1d00, +0x1e00+(IR_BNOT), +0x1e00+(IR_BSWAP), +0x1f00+(IR_BSHL), +0x1f00+(IR_BSHR), +0x1f00+(IR_BSAR), +0x1f00+(IR_BROL), +0x1f00+(IR_BROR), +0x2000+(IR_BAND), +0x2000+(IR_BOR), +0x2000+(IR_BXOR), +0x2100, +0x2200+(0), +0x2300, +0x2200+(1), +0x2400, +0x2500+(IRCALL_lj_buf_putstr_reverse), +0x2500+(IRCALL_lj_buf_putstr_lower), +0x2500+(IRCALL_lj_buf_putstr_upper), +0, +0x2600, +0, +0, +0, +0, +0x2700, +0, +0x2800, +0x2900, +0, +0x2a00, +0x2b00, +0, +0, +0x2c00+(0), +0x2d00+(0), +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0x2c00+(GCROOT_IO_OUTPUT), +0x2d00+(GCROOT_IO_OUTPUT), +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0x2e00, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0, +0x2f00+(0), +0x2f00+(1), +0x3000+(MM_eq), +0x3000+(MM_len), +0x3000+(MM_lt), +0x3000+(MM_le), +0x3000+(MM_concat), +0x3100, +0x3000+(MM_add), +0x3000+(MM_sub), +0x3000+(MM_mul), +0x3000+(MM_div), +0x3000+(MM_mod), +0x3000+(MM_pow), +0x3000+(MM_unm), +0, +0, +0, +0x3200+(1), +0x3200+(0), +0, +0, +0, +0, +0x3300, +0x3300, +0x3400, +0, +0x3500, +0x3600+(FF_ffi_sizeof), +0x3600+(FF_ffi_alignof), +0x3600+(FF_ffi_offsetof), +0x3700, +0x3800, +0x3900, +0x3a00, +0x3b00, +0, +0x3c00 +}; + +static const RecordFunc recff_func[] = { +recff_nyi, +recff_c, +recff_assert, +recff_type, +recff_xpairs, +recff_ipairs_aux, +recff_getmetatable, +recff_setmetatable, +recff_getfenv, +recff_rawget, +recff_rawset, +recff_rawequal, +recff_select, +recff_tonumber, +recff_tostring, +recff_pcall, +recff_xpcall, +recff_math_abs, +recff_math_round, +recff_math_unary, +recff_math_atrig, +recff_math_htrig, +recff_math_modf, +recff_math_log, +recff_math_atan2, +recff_math_pow, +recff_math_ldexp, +recff_math_minmax, +recff_math_random, +recff_bit_tobit, +recff_bit_unary, +recff_bit_shift, +recff_bit_nary, +recff_bit_tohex, +recff_string_range, +recff_string_char, +recff_string_rep, +recff_string_op, +recff_string_find, +recff_string_format, +recff_table_insert, +recff_table_concat, +recff_table_new, +recff_table_clear, +recff_io_write, +recff_io_flush, +recff_debug_getmetatable, +recff_cdata_index, +recff_cdata_arith, +recff_cdata_call, +recff_clib_index, +recff_ffi_new, +recff_ffi_typeof, +recff_ffi_istype, +recff_ffi_xof, +recff_ffi_errno, +recff_ffi_string, +recff_ffi_copy, +recff_ffi_fill, +recff_ffi_abi, +recff_ffi_gc +}; + diff --git a/src/reusevm/lj_vm.S b/src/reusevm/lj_vm.S new file mode 100644 index 0000000000..be74e0d3a1 --- /dev/null +++ b/src/reusevm/lj_vm.S @@ -0,0 +1,2768 @@ + .file "buildvm_x86.dasc" + .text + .p2align 4 + + .globl lj_vm_asm_begin + .hidden lj_vm_asm_begin + .type lj_vm_asm_begin, @object + .size lj_vm_asm_begin, 0 +lj_vm_asm_begin: +.Lbegin: + + .globl lj_BC_ISLT + .hidden lj_BC_ISLT + .type lj_BC_ISLT, @function + .size lj_BC_ISLT, 92 +lj_BC_ISLT: + .byte 76,139,28,202,72,139,44,194,76,137,217,72,137,232,73,193 + .byte 251,47,72,193,253,47,65,131,251,242,15,131,235,34,0,0 + .byte 131,253,242,15,131,226,34,0,0,102,72,15,110,192,102,72 + .byte 15,110,201,72,131,195,4,102,15,46,193,118,12,15,183,67 + .byte 254,72,141,156,131,0,0,254,255,139,3,15,182,204,15,182 + .byte 232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_ISGE + .hidden lj_BC_ISGE + .type lj_BC_ISGE, @function + .size lj_BC_ISGE, 92 +lj_BC_ISGE: + .byte 76,139,28,202,72,139,44,194,76,137,217,72,137,232,73,193 + .byte 251,47,72,193,253,47,65,131,251,242,15,131,143,34,0,0 + .byte 131,253,242,15,131,134,34,0,0,102,72,15,110,192,102,72 + .byte 15,110,201,72,131,195,4,102,15,46,193,119,12,15,183,67 + .byte 254,72,141,156,131,0,0,254,255,139,3,15,182,204,15,182 + .byte 232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_ISLE + .hidden lj_BC_ISLE + .type lj_BC_ISLE, @function + .size lj_BC_ISLE, 92 +lj_BC_ISLE: + .byte 76,139,28,202,72,139,44,194,76,137,217,72,137,232,73,193 + .byte 251,47,72,193,253,47,65,131,251,242,15,131,51,34,0,0 + .byte 131,253,242,15,131,42,34,0,0,102,72,15,110,192,102,72 + .byte 15,110,201,72,131,195,4,102,15,46,193,114,12,15,183,67 + .byte 254,72,141,156,131,0,0,254,255,139,3,15,182,204,15,182 + .byte 232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_ISGT + .hidden lj_BC_ISGT + .type lj_BC_ISGT, @function + .size lj_BC_ISGT, 92 +lj_BC_ISGT: + .byte 76,139,28,202,72,139,44,194,76,137,217,72,137,232,73,193 + .byte 251,47,72,193,253,47,65,131,251,242,15,131,215,33,0,0 + .byte 131,253,242,15,131,206,33,0,0,102,72,15,110,192,102,72 + .byte 15,110,201,72,131,195,4,102,15,46,193,115,12,15,183,67 + .byte 254,72,141,156,131,0,0,254,255,139,3,15,182,204,15,182 + .byte 232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_ISEQV + .hidden lj_BC_ISEQV + .type lj_BC_ISEQV, @function + .size lj_BC_ISEQV, 150 +lj_BC_ISEQV: + .byte 72,139,44,194,76,139,28,202,72,131,195,4,72,137,232,76 + .byte 137,217,72,193,253,47,73,193,251,47,131,253,242,115,55,65 + .byte 131,251,242,115,49,102,72,15,110,200,102,72,15,110,193,102 + .byte 15,46,193,122,14,117,12,15,183,67,254,72,141,156,131,0 + .byte 0,254,255,139,3,15,182,204,15,182,232,72,131,195,4,193 + .byte 232,16,65,255,36,238,131,253,245,15,132,229,33,0,0,65 + .byte 131,251,245,15,132,219,33,0,0,72,57,193,116,201,68,57 + .byte 221,117,208,131,253,244,119,203,72,193,225,17,72,193,233,17 + .byte 72,139,105,32,72,133,237,116,186,246,69,10,16,117,180,49 + .byte 237,233,130,33,0,0 + + .globl lj_BC_ISNEV + .hidden lj_BC_ISNEV + .type lj_BC_ISNEV, @function + .size lj_BC_ISNEV, 153 +lj_BC_ISNEV: + .byte 72,139,44,194,76,139,28,202,72,131,195,4,72,137,232,76 + .byte 137,217,72,193,253,47,73,193,251,47,131,253,242,115,55,65 + .byte 131,251,242,115,49,102,72,15,110,200,102,72,15,110,193,102 + .byte 15,46,193,122,2,116,12,15,183,67,254,72,141,156,131,0 + .byte 0,254,255,139,3,15,182,204,15,182,232,72,131,195,4,193 + .byte 232,16,65,255,36,238,131,253,245,15,132,79,33,0,0,65 + .byte 131,251,245,15,132,69,33,0,0,72,57,193,116,213,68,57 + .byte 221,117,196,131,253,244,119,191,72,193,225,17,72,193,233,17 + .byte 72,139,105,32,72,133,237,116,174,246,69,10,16,117,168,189 + .byte 1,0,0,0,233,233,32,0,0 + + .globl lj_BC_ISEQS + .hidden lj_BC_ISEQS + .type lj_BC_ISEQS, @function + .size lj_BC_ISEQS, 80 +lj_BC_ISEQS: + .byte 72,247,208,72,139,44,202,72,131,195,4,73,137,235,72,193 + .byte 229,17,72,193,237,17,73,193,251,47,65,131,251,251,117,37 + .byte 73,59,44,199,117,12,15,183,67,254,72,141,156,131,0,0 + .byte 254,255,139,3,15,182,204,15,182,232,72,131,195,4,193,232 + .byte 16,65,255,36,238,65,131,251,245,117,231,233,197,32,0,0 + + .globl lj_BC_ISNES + .hidden lj_BC_ISNES + .type lj_BC_ISNES, @function + .size lj_BC_ISNES, 80 +lj_BC_ISNES: + .byte 72,247,208,72,139,44,202,72,131,195,4,73,137,235,72,193 + .byte 229,17,72,193,237,17,73,193,251,47,65,131,251,251,117,37 + .byte 73,59,44,199,116,12,15,183,67,254,72,141,156,131,0,0 + .byte 254,255,139,3,15,182,204,15,182,232,72,131,195,4,193,232 + .byte 16,65,255,36,238,65,131,251,245,117,219,233,117,32,0,0 + + .globl lj_BC_ISEQN + .hidden lj_BC_ISEQN + .type lj_BC_ISEQN, @function + .size lj_BC_ISEQN, 78 +lj_BC_ISEQN: + .byte 72,139,44,202,72,131,195,4,73,137,235,73,193,251,47,65 + .byte 131,251,242,115,46,242,65,15,16,4,199,102,15,46,4,202 + .byte 122,14,117,12,15,183,67,254,72,141,156,131,0,0,254,255 + .byte 139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65 + .byte 255,36,238,65,131,251,245,117,231,233,39,32,0,0 + + .globl lj_BC_ISNEN + .hidden lj_BC_ISNEN + .type lj_BC_ISNEN, @function + .size lj_BC_ISNEN, 78 +lj_BC_ISNEN: + .byte 72,139,44,202,72,131,195,4,73,137,235,73,193,251,47,65 + .byte 131,251,242,115,46,242,65,15,16,4,199,102,15,46,4,202 + .byte 122,2,116,12,15,183,67,254,72,141,156,131,0,0,254,255 + .byte 139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65 + .byte 255,36,238,65,131,251,245,117,219,233,217,31,0,0 + + .globl lj_BC_ISEQP + .hidden lj_BC_ISEQP + .type lj_BC_ISEQP, @function + .size lj_BC_ISEQP, 60 +lj_BC_ISEQP: + .byte 72,247,208,72,139,44,202,72,193,253,47,72,131,195,4,57 + .byte 197,117,31,15,183,67,254,72,141,156,131,0,0,254,255,139 + .byte 3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,255 + .byte 36,238,131,253,245,117,232,233,157,31,0,0 + + .globl lj_BC_ISNEP + .hidden lj_BC_ISNEP + .type lj_BC_ISNEP, @function + .size lj_BC_ISNEP, 59 +lj_BC_ISNEP: + .byte 72,247,208,72,139,44,202,72,193,253,47,72,131,195,4,57 + .byte 197,116,21,131,253,245,15,132,129,31,0,0,15,183,67,254 + .byte 72,141,156,131,0,0,254,255,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_ISTC + .hidden lj_BC_ISTC + .type lj_BC_ISTC, @function + .size lj_BC_ISTC, 56 +lj_BC_ISTC: + .byte 76,139,28,194,72,131,195,4,76,137,221,73,193,251,47,65 + .byte 131,251,254,115,16,72,137,44,202,15,183,67,254,72,141,156 + .byte 131,0,0,254,255,139,3,15,182,204,15,182,232,72,131,195 + .byte 4,193,232,16,65,255,36,238 + + .globl lj_BC_ISFC + .hidden lj_BC_ISFC + .type lj_BC_ISFC, @function + .size lj_BC_ISFC, 56 +lj_BC_ISFC: + .byte 76,139,28,194,72,131,195,4,76,137,221,73,193,251,47,65 + .byte 131,251,254,114,16,72,137,44,202,15,183,67,254,72,141,156 + .byte 131,0,0,254,255,139,3,15,182,204,15,182,232,72,131,195 + .byte 4,193,232,16,65,255,36,238 + + .globl lj_BC_IST + .hidden lj_BC_IST + .type lj_BC_IST, @function + .size lj_BC_IST, 49 +lj_BC_IST: + .byte 76,139,28,194,72,131,195,4,73,193,251,47,65,131,251,254 + .byte 115,12,15,183,67,254,72,141,156,131,0,0,254,255,139,3 + .byte 15,182,204,15,182,232,72,131,195,4,193,232,16,65,255,36 + .byte 238 + + .globl lj_BC_ISF + .hidden lj_BC_ISF + .type lj_BC_ISF, @function + .size lj_BC_ISF, 49 +lj_BC_ISF: + .byte 76,139,28,194,72,131,195,4,73,193,251,47,65,131,251,254 + .byte 114,12,15,183,67,254,72,141,156,131,0,0,254,255,139,3 + .byte 15,182,204,15,182,232,72,131,195,4,193,232,16,65,255,36 + .byte 238 + + .globl lj_BC_ISTYPE + .hidden lj_BC_ISTYPE + .type lj_BC_ISTYPE, @function + .size lj_BC_ISTYPE, 35 +lj_BC_ISTYPE: + .byte 72,139,44,202,72,193,253,47,1,197,15,133,162,30,0,0 + .byte 139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65 + .byte 255,36,238 + + .globl lj_BC_ISNUM + .hidden lj_BC_ISNUM + .type lj_BC_ISNUM, @function + .size lj_BC_ISNUM, 37 +lj_BC_ISNUM: + .byte 76,139,28,202,73,193,251,47,65,131,251,242,15,131,125,30 + .byte 0,0,139,3,15,182,204,15,182,232,72,131,195,4,193,232 + .byte 16,65,255,36,238 + + .globl lj_BC_MOV + .hidden lj_BC_MOV + .type lj_BC_MOV, @function + .size lj_BC_MOV, 27 +lj_BC_MOV: + .byte 72,139,44,194,72,137,44,202,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_NOT + .hidden lj_BC_NOT + .type lj_BC_NOT, @function + .size lj_BC_NOT, 50 +lj_BC_NOT: + .byte 72,139,44,194,72,193,253,47,184,2,0,0,0,72,131,253 + .byte 254,131,216,0,72,193,224,47,72,247,208,72,137,4,202,139 + .byte 3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,255 + .byte 36,238 + + .globl lj_BC_UNM + .hidden lj_BC_UNM + .type lj_BC_UNM, @function + .size lj_BC_UNM, 57 +lj_BC_UNM: + .byte 72,139,44,194,73,137,235,73,193,251,47,65,131,251,242,15 + .byte 131,62,30,0,0,72,184,0,0,0,0,0,0,0,128,72 + .byte 49,197,72,137,44,202,139,3,15,182,204,15,182,232,72,131 + .byte 195,4,193,232,16,65,255,36,238 + + .globl lj_BC_LEN + .hidden lj_BC_LEN + .type lj_BC_LEN, @function + .size lj_BC_LEN, 91 +lj_BC_LEN: + .byte 72,139,4,194,73,137,195,72,193,224,17,72,193,232,17,73 + .byte 193,251,47,65,131,251,251,117,32,15,87,192,242,15,42,64 + .byte 16,242,15,17,4,202,139,3,15,182,204,15,182,232,72,131 + .byte 195,4,193,232,16,65,255,36,238,65,131,251,244,15,133,53 + .byte 30,0,0,72,137,199,72,137,213 + call lj_tab_len + .byte 242,15,42,192,72,137,234,15,182,75,253,235,198 + + .globl lj_BC_ADDVN + .hidden lj_BC_ADDVN + .type lj_BC_ADDVN, @function + .size lj_BC_ADDVN, 59 +lj_BC_ADDVN: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,148,29,0,0,242,15,16,4,234,242,65,15 + .byte 88,4,199,242,15,17,4,202,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_SUBVN + .hidden lj_BC_SUBVN + .type lj_BC_SUBVN, @function + .size lj_BC_SUBVN, 59 +lj_BC_SUBVN: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,89,29,0,0,242,15,16,4,234,242,65,15 + .byte 92,4,199,242,15,17,4,202,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_MULVN + .hidden lj_BC_MULVN + .type lj_BC_MULVN, @function + .size lj_BC_MULVN, 59 +lj_BC_MULVN: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,30,29,0,0,242,15,16,4,234,242,65,15 + .byte 89,4,199,242,15,17,4,202,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_DIVVN + .hidden lj_BC_DIVVN + .type lj_BC_DIVVN, @function + .size lj_BC_DIVVN, 59 +lj_BC_DIVVN: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,227,28,0,0,242,15,16,4,234,242,65,15 + .byte 94,4,199,242,15,17,4,202,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_MODVN + .hidden lj_BC_MODVN + .type lj_BC_MODVN, @function + .size lj_BC_MODVN, 64 +lj_BC_MODVN: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,168,28,0,0,242,15,16,4,234,242,65,15 + .byte 16,12,199,232,228,54,0,0,242,15,17,4,202,139,3,15 + .byte 182,204,15,182,232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_ADDNV + .hidden lj_BC_ADDNV + .type lj_BC_ADDNV, @function + .size lj_BC_ADDNV, 59 +lj_BC_ADDNV: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,110,28,0,0,242,65,15,16,4,199,242,15 + .byte 88,4,234,242,15,17,4,202,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_SUBNV + .hidden lj_BC_SUBNV + .type lj_BC_SUBNV, @function + .size lj_BC_SUBNV, 59 +lj_BC_SUBNV: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,51,28,0,0,242,65,15,16,4,199,242,15 + .byte 92,4,234,242,15,17,4,202,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_MULNV + .hidden lj_BC_MULNV + .type lj_BC_MULNV, @function + .size lj_BC_MULNV, 59 +lj_BC_MULNV: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,248,27,0,0,242,65,15,16,4,199,242,15 + .byte 89,4,234,242,15,17,4,202,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_DIVNV + .hidden lj_BC_DIVNV + .type lj_BC_DIVNV, @function + .size lj_BC_DIVNV, 59 +lj_BC_DIVNV: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,189,27,0,0,242,65,15,16,4,199,242,15 + .byte 94,4,234,242,15,17,4,202,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_MODNV + .hidden lj_BC_MODNV + .type lj_BC_MODNV, @function + .size lj_BC_MODNV, 40 +lj_BC_MODNV: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,130,27,0,0,242,65,15,16,4,199,242,15 + .byte 16,12,234,233,207,254,255,255 + + .globl lj_BC_ADDVV + .hidden lj_BC_ADDVV + .type lj_BC_ADDVV, @function + .size lj_BC_ADDVV, 76 +lj_BC_ADDVV: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,112,27,0,0,76,139,28,194,73,193,251,47 + .byte 65,131,251,242,15,131,94,27,0,0,242,15,16,4,234,242 + .byte 15,88,4,194,242,15,17,4,202,139,3,15,182,204,15,182 + .byte 232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_SUBVV + .hidden lj_BC_SUBVV + .type lj_BC_SUBVV, @function + .size lj_BC_SUBVV, 76 +lj_BC_SUBVV: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,36,27,0,0,76,139,28,194,73,193,251,47 + .byte 65,131,251,242,15,131,18,27,0,0,242,15,16,4,234,242 + .byte 15,92,4,194,242,15,17,4,202,139,3,15,182,204,15,182 + .byte 232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_MULVV + .hidden lj_BC_MULVV + .type lj_BC_MULVV, @function + .size lj_BC_MULVV, 76 +lj_BC_MULVV: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,216,26,0,0,76,139,28,194,73,193,251,47 + .byte 65,131,251,242,15,131,198,26,0,0,242,15,16,4,234,242 + .byte 15,89,4,194,242,15,17,4,202,139,3,15,182,204,15,182 + .byte 232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_DIVVV + .hidden lj_BC_DIVVV + .type lj_BC_DIVVV, @function + .size lj_BC_DIVVV, 76 +lj_BC_DIVVV: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,140,26,0,0,76,139,28,194,73,193,251,47 + .byte 65,131,251,242,15,131,122,26,0,0,242,15,16,4,234,242 + .byte 15,94,4,194,242,15,17,4,202,139,3,15,182,204,15,182 + .byte 232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_MODVV + .hidden lj_BC_MODVV + .type lj_BC_MODVV, @function + .size lj_BC_MODVV, 57 +lj_BC_MODVV: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,64,26,0,0,76,139,28,194,73,193,251,47 + .byte 65,131,251,242,15,131,46,26,0,0,242,15,16,4,234,242 + .byte 15,16,12,194,233,102,253,255,255 + + .globl lj_BC_POW + .hidden lj_BC_POW + .type lj_BC_POW, @function + .size lj_BC_POW, 91 +lj_BC_POW: + .byte 15,182,236,15,182,192,76,139,28,234,73,193,251,47,65,131 + .byte 251,242,15,131,7,26,0,0,76,139,28,194,73,193,251,47 + .byte 65,131,251,242,15,131,245,25,0,0,242,15,16,4,234,242 + .byte 15,16,12,194,72,137,213 + call pow@PLT + .byte 15,182,75,253,72,137,234,242,15,17,4,202,139,3,15,182 + .byte 204,15,182,232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_CAT + .hidden lj_BC_CAT + .type lj_BC_CAT, @function + .size lj_BC_CAT, 84 +lj_BC_CAT: + .byte 15,182,236,15,182,192,72,139,124,36,16,72,137,87,32,72 + .byte 141,52,194,137,194,41,234,72,137,253,72,137,92,36,24 + call lj_meta_cat + .byte 72,139,85,32,72,133,192,15,133,208,25,0,0,15,182,107 + .byte 255,15,182,75,253,72,139,4,234,72,137,4,202,139,3,15 + .byte 182,204,15,182,232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_KSTR + .hidden lj_BC_KSTR + .type lj_BC_KSTR, @function + .size lj_BC_KSTR, 86 +lj_BC_KSTR: + .byte 72,247,208,73,139,4,199,73,187,0,0,0,0,0,128,253 + .byte 255,76,9,216,72,137,4,202,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238,72,247,208,73,139 + .byte 4,199,73,187,0,0,0,0,0,128,250,255,76,9,216,72 + .byte 137,4,202,139,3,15,182,204,15,182,232,72,131,195,4,193 + .byte 232,16,65,255,36,238 + + .globl lj_BC_KSHORT + .hidden lj_BC_KSHORT + .type lj_BC_KSHORT, @function + .size lj_BC_KSHORT, 31 +lj_BC_KSHORT: + .byte 15,191,192,242,15,42,192,242,15,17,4,202,139,3,15,182 + .byte 204,15,182,232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_KNUM + .hidden lj_BC_KNUM + .type lj_BC_KNUM, @function + .size lj_BC_KNUM, 30 +lj_BC_KNUM: + .byte 242,65,15,16,4,199,242,15,17,4,202,139,3,15,182,204 + .byte 15,182,232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_KPRI + .hidden lj_BC_KPRI + .type lj_BC_KPRI, @function + .size lj_BC_KPRI, 30 +lj_BC_KPRI: + .byte 72,193,224,47,72,247,208,72,137,4,202,139,3,15,182,204 + .byte 15,182,232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_KNIL + .hidden lj_BC_KNIL + .type lj_BC_KNIL, @function + .size lj_BC_KNIL, 51 +lj_BC_KNIL: + .byte 72,141,76,202,8,72,141,4,194,72,199,197,255,255,255,255 + .byte 72,137,105,248,72,137,41,72,131,193,8,72,57,193,118,244 + .byte 139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65 + .byte 255,36,238 + + .globl lj_BC_UGET + .hidden lj_BC_UGET + .type lj_BC_UGET, @function + .size lj_BC_UGET, 48 +lj_BC_UGET: + .byte 72,139,106,240,72,193,229,17,72,193,237,17,72,139,108,197 + .byte 40,72,139,109,32,72,139,69,0,72,137,4,202,139,3,15 + .byte 182,204,15,182,232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_USETV + .hidden lj_BC_USETV + .type lj_BC_USETV, @function + .size lj_BC_USETV, 112 +lj_BC_USETV: + .byte 72,139,106,240,72,193,229,17,72,193,237,17,72,139,108,205 + .byte 40,128,125,10,0,72,139,109,32,72,139,12,194,72,137,77 + .byte 0,116,6,246,69,248,4,117,19,139,3,15,182,204,15,182 + .byte 232,72,131,195,4,193,232,16,65,255,36,238,72,137,200,72 + .byte 193,248,47,131,232,252,131,248,246,118,222,72,193,225,17,72 + .byte 193,233,17,246,65,8,3,116,208,72,137,238,72,137,213,73 + .byte 141,190,160,240,255,255 + call lj_gc_barrieruv + .byte 72,137,234,235,185 + + .globl lj_BC_USETS + .hidden lj_BC_USETS + .type lj_BC_USETS, @function + .size lj_BC_USETS, 104 +lj_BC_USETS: + .byte 72,247,208,72,139,106,240,72,193,229,17,72,193,237,17,72 + .byte 139,108,205,40,73,139,12,199,72,139,69,32,73,187,0,0 + .byte 0,0,0,128,253,255,73,9,203,76,137,24,246,69,8,4 + .byte 117,19,139,3,15,182,204,15,182,232,72,131,195,4,193,232 + .byte 16,65,255,36,238,246,65,8,3,116,231,128,125,10,0,116 + .byte 225,72,137,213,72,137,198,73,141,190,160,240,255,255 + call lj_gc_barrieruv + .byte 72,137,234,235,202 + + .globl lj_BC_USETN + .hidden lj_BC_USETN + .type lj_BC_USETN, @function + .size lj_BC_USETN, 50 +lj_BC_USETN: + .byte 72,139,106,240,72,193,229,17,72,193,237,17,242,65,15,16 + .byte 4,199,72,139,108,205,40,72,139,77,32,242,15,17,1,139 + .byte 3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,255 + .byte 36,238 + + .globl lj_BC_USETP + .hidden lj_BC_USETP + .type lj_BC_USETP, @function + .size lj_BC_USETP, 50 +lj_BC_USETP: + .byte 72,139,106,240,72,193,229,17,72,193,237,17,72,139,108,205 + .byte 40,72,193,224,47,72,247,208,72,139,77,32,72,137,1,139 + .byte 3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,255 + .byte 36,238 + + .globl lj_BC_UCLO + .hidden lj_BC_UCLO + .type lj_BC_UCLO, @function + .size lj_BC_UCLO, 59 +lj_BC_UCLO: + .byte 72,141,156,131,0,0,254,255,72,139,108,36,16,72,131,125 + .byte 64,0,116,20,72,137,85,32,72,141,52,202,72,137,239 + call lj_func_closeuv + .byte 72,139,85,32,139,3,15,182,204,15,182,232,72,131,195,4 + .byte 193,232,16,65,255,36,238 + + .globl lj_BC_FNEW + .hidden lj_BC_FNEW + .type lj_BC_FNEW, @function + .size lj_BC_FNEW, 85 +lj_BC_FNEW: + .byte 72,247,208,72,139,108,36,16,72,137,85,32,72,139,82,240 + .byte 72,193,226,17,72,193,234,17,73,139,52,199,72,137,239,72 + .byte 137,92,36,24 + call lj_func_newL_gc + .byte 72,139,85,32,15,182,75,253,73,187,0,0,0,0,0,128 + .byte 251,255,76,9,216,72,137,4,202,139,3,15,182,204,15,182 + .byte 232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_TNEW + .hidden lj_BC_TNEW + .type lj_BC_TNEW, @function + .size lj_BC_TNEW, 122 +lj_BC_TNEW: + .byte 72,139,108,36,16,72,137,85,32,73,139,142,192,240,255,255 + .byte 73,59,142,200,240,255,255,72,137,92,36,24,115,78,137,194 + .byte 37,255,7,0,0,193,234,11,61,255,7,0,0,116,54,72 + .byte 137,239,137,198 + call lj_tab_new + .byte 72,139,85,32,15,182,75,253,73,187,0,0,0,0,0,0 + .byte 250,255,76,9,216,72,137,4,202,139,3,15,182,204,15,182 + .byte 232,72,131,195,4,193,232,16,65,255,36,238,184,1,8,0 + .byte 0,235,195,72,137,239 + call lj_gc_step_fixtop + .byte 15,183,67,254,235,164 + + .globl lj_BC_TDUP + .hidden lj_BC_TDUP + .type lj_BC_TDUP, @function + .size lj_BC_TDUP, 106 +lj_BC_TDUP: + .byte 72,247,208,72,139,108,36,16,73,139,142,192,240,255,255,72 + .byte 137,92,36,24,73,59,142,200,240,255,255,72,137,85,32,115 + .byte 56,73,139,52,199,72,137,239 + call lj_tab_dup + .byte 72,139,85,32,15,182,75,253,73,187,0,0,0,0,0,0 + .byte 250,255,76,9,216,72,137,4,202,139,3,15,182,204,15,182 + .byte 232,72,131,195,4,193,232,16,65,255,36,238,72,137,239 + call lj_gc_step_fixtop + .byte 15,183,67,254,72,247,208,235,183 + + .globl lj_BC_GGET + .hidden lj_BC_GGET + .type lj_BC_GGET, @function + .size lj_BC_GGET, 28 +lj_BC_GGET: + .byte 72,247,208,72,139,106,240,72,193,229,17,72,193,237,17,72 + .byte 139,109,16,73,139,4,199,233,235,0,0,0 + + .globl lj_BC_GSET + .hidden lj_BC_GSET + .type lj_BC_GSET, @function + .size lj_BC_GSET, 28 +lj_BC_GSET: + .byte 72,247,208,72,139,106,240,72,193,229,17,72,193,237,17,72 + .byte 139,109,16,73,139,4,199,233,206,2,0,0 + + .globl lj_BC_TGETV + .hidden lj_BC_TGETV + .type lj_BC_TGETV, @function + .size lj_BC_TGETV, 165 +lj_BC_TGETV: + .byte 15,182,236,15,182,192,72,139,44,234,72,139,4,194,73,137 + .byte 235,72,193,229,17,72,193,237,17,73,193,251,47,65,131,251 + .byte 244,15,133,175,18,0,0,73,137,195,73,193,251,47,65,131 + .byte 251,242,115,93,102,72,15,110,192,242,15,44,192,242,15,42 + .byte 200,102,15,46,193,15,133,139,18,0,0,59,69,48,15,131 + .byte 130,18,0,0,193,224,3,72,3,69,16,76,139,24,73,131 + .byte 251,255,116,23,76,137,28,202,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238,76,139,85,32,77 + .byte 133,210,116,224,65,246,66,10,1,15,132,71,18,0,0,235 + .byte 211,65,131,251,251,15,133,59,18,0,0,72,193,224,17,72 + .byte 193,232,17,235,42 + + .globl lj_BC_TGETS + .hidden lj_BC_TGETS + .type lj_BC_TGETS, @function + .size lj_BC_TGETS, 146 +lj_BC_TGETS: + .byte 15,182,236,15,182,192,72,139,44,234,72,247,208,73,139,4 + .byte 199,73,137,235,72,193,229,17,72,193,237,17,73,193,251,47 + .byte 65,131,251,244,15,133,191,17,0,0,68,139,85,52,68,35 + .byte 80,12,69,107,210,24,76,3,85,40,73,187,0,0,0,0 + .byte 0,128,253,255,73,9,195,77,57,90,8,117,32,77,139,26 + .byte 73,131,251,255,116,39,76,137,28,202,139,3,15,182,204,15 + .byte 182,232,72,131,195,4,193,232,16,65,255,36,238,77,139,82 + .byte 16,77,133,210,117,209,73,199,195,255,255,255,255,76,139,85 + .byte 32,77,133,210,116,208,65,246,66,10,1,117,201,233,87,17 + .byte 0,0 + + .globl lj_BC_TGETB + .hidden lj_BC_TGETB + .type lj_BC_TGETB, @function + .size lj_BC_TGETB, 105 +lj_BC_TGETB: + .byte 15,182,236,15,182,192,72,139,44,234,73,137,235,72,193,229 + .byte 17,72,193,237,17,73,193,251,47,65,131,251,244,15,133,105 + .byte 17,0,0,59,69,48,15,131,96,17,0,0,193,224,3,72 + .byte 3,69,16,76,139,24,73,131,251,255,116,23,76,137,28,202 + .byte 139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65 + .byte 255,36,238,76,139,85,32,77,133,210,116,224,65,246,66,10 + .byte 1,15,132,37,17,0,0,235,211 + + .globl lj_BC_TGETR + .hidden lj_BC_TGETR + .type lj_BC_TGETR, @function + .size lj_BC_TGETR, 65 +lj_BC_TGETR: + .byte 15,182,236,15,182,192,72,139,44,234,72,193,229,17,72,193 + .byte 237,17,242,15,44,4,194,59,69,48,15,131,142,17,0,0 + .byte 193,224,3,72,3,69,16,76,139,24,76,137,28,202,139,3 + .byte 15,182,204,15,182,232,72,131,195,4,193,232,16,65,255,36 + .byte 238 + + .globl lj_BC_TSETV + .hidden lj_BC_TSETV + .type lj_BC_TSETV, @function + .size lj_BC_TSETV, 195 +lj_BC_TSETV: + .byte 15,182,236,15,182,192,72,139,44,234,72,139,4,194,73,137 + .byte 235,72,193,229,17,72,193,237,17,73,193,251,47,65,131,251 + .byte 244,15,133,183,17,0,0,73,137,195,73,193,251,47,65,131 + .byte 251,242,115,99,102,72,15,110,192,242,15,44,192,242,15,42 + .byte 200,102,15,46,193,15,133,147,17,0,0,59,69,48,15,131 + .byte 138,17,0,0,193,224,3,72,3,69,16,72,131,56,255,116 + .byte 32,246,69,8,4,117,68,72,139,44,202,72,137,40,139,3 + .byte 15,182,204,15,182,232,72,131,195,4,193,232,16,65,255,36 + .byte 238,76,139,85,32,77,133,210,116,215,65,246,66,10,2,15 + .byte 132,73,17,0,0,235,202,65,131,251,251,15,133,61,17,0 + .byte 0,72,193,224,17,72,193,232,17,235,66,128,101,8,251,77 + .byte 139,150,240,240,255,255,73,137,174,240,240,255,255,76,137,85 + .byte 24,235,164 + + .globl lj_BC_TSETS + .hidden lj_BC_TSETS + .type lj_BC_TSETS, @function + .size lj_BC_TSETS, 252 +lj_BC_TSETS: + .byte 15,182,236,15,182,192,72,139,44,234,72,247,208,73,139,4 + .byte 199,73,137,235,72,193,229,17,72,193,237,17,73,193,251,47 + .byte 65,131,251,244,15,133,169,16,0,0,68,139,85,52,68,35 + .byte 80,12,69,107,210,24,198,69,10,0,76,3,85,40,73,187 + .byte 0,0,0,0,0,128,253,255,73,9,195,77,57,90,8,117 + .byte 64,73,131,58,255,116,36,246,69,8,4,15,133,128,0,0 + .byte 0,76,139,28,202,77,137,26,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238,76,139,93,32,77 + .byte 133,219,116,211,65,246,67,10,2,15,132,68,16,0,0,235 + .byte 198,77,139,82,16,77,133,210,117,177,76,139,85,32,77,133 + .byte 210,116,11,65,246,66,10,2,15,132,37,16,0,0,76,137 + .byte 28,36,72,139,124,36,16,72,137,87,32,72,141,20,36,72 + .byte 137,238,72,137,92,36,24 + call lj_tab_newkey + .byte 72,139,124,36,16,72,139,87,32,73,137,194,15,182,75,253 + .byte 233,118,255,255,255,128,101,8,251,77,139,158,240,240,255,255 + .byte 73,137,174,240,240,255,255,76,137,93,24,233,101,255,255,255 + + .globl lj_BC_TSETB + .hidden lj_BC_TSETB + .type lj_BC_TSETB, @function + .size lj_BC_TSETB, 135 +lj_BC_TSETB: + .byte 15,182,236,15,182,192,72,139,44,234,73,137,235,72,193,229 + .byte 17,72,193,237,17,73,193,251,47,65,131,251,244,15,133,233 + .byte 15,0,0,59,69,48,15,131,224,15,0,0,193,224,3,72 + .byte 3,69,16,72,131,56,255,116,32,246,69,8,4,117,48,76 + .byte 139,28,202,76,137,24,139,3,15,182,204,15,182,232,72,131 + .byte 195,4,193,232,16,65,255,36,238,76,139,85,32,77,133,210 + .byte 116,215,65,246,66,10,2,15,132,159,15,0,0,235,202,128 + .byte 101,8,251,77,139,150,240,240,255,255,73,137,174,240,240,255 + .byte 255,76,137,85,24,235,184 + + .globl lj_BC_TSETM + .hidden lj_BC_TSETM + .type lj_BC_TSETM, @function + .size lj_BC_TSETM, 162 +lj_BC_TSETM: + .byte 69,139,20,199,72,141,12,202,72,139,105,248,72,193,229,17 + .byte 72,193,237,17,246,69,8,4,117,109,139,4,36,131,232,1 + .byte 116,38,68,1,208,59,69,48,119,49,68,41,208,65,193,226 + .byte 3,76,3,85,16,72,139,41,72,131,193,8,73,137,42,73 + .byte 131,194,8,131,232,1,117,237,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238,72,139,124,36,16 + .byte 72,137,87,32,72,137,238,137,194,72,137,253,72,137,92,36 + .byte 24 + call lj_tab_reasize + .byte 72,139,85,32,15,182,75,253,15,183,67,254,233,121,255,255 + .byte 255,128,101,8,251,73,139,134,240,240,255,255,73,137,174,240 + .byte 240,255,255,72,137,69,24,233,120,255,255,255 + + .globl lj_BC_TSETR + .hidden lj_BC_TSETR + .type lj_BC_TSETR, @function + .size lj_BC_TSETR, 95 +lj_BC_TSETR: + .byte 15,182,236,15,182,192,72,139,44,234,72,193,229,17,72,193 + .byte 237,17,242,15,44,4,194,246,69,8,4,117,42,59,69,48 + .byte 15,131,84,15,0,0,193,224,3,72,3,69,16,76,139,28 + .byte 202,76,137,24,139,3,15,182,204,15,182,232,72,131,195,4 + .byte 193,232,16,65,255,36,238,128,101,8,251,77,139,150,240,240 + .byte 255,255,73,137,174,240,240,255,255,76,137,85,24,235,190 + + .globl lj_BC_CALLM + .hidden lj_BC_CALLM + .type lj_BC_CALLM, @function + .size lj_BC_CALLM, 64 +lj_BC_CALLM: + .byte 15,182,192,3,4,36,72,139,44,202,73,137,235,72,193,229 + .byte 17,72,193,237,17,73,193,251,47,65,131,251,247,15,133,162 + .byte 16,0,0,72,141,84,202,16,72,137,90,248,72,139,93,32 + .byte 139,11,15,182,233,15,182,205,72,131,195,4,65,255,36,238 + + .globl lj_BC_CALL + .hidden lj_BC_CALL + .type lj_BC_CALL, @function + .size lj_BC_CALL, 61 +lj_BC_CALL: + .byte 15,182,192,72,139,44,202,73,137,235,72,193,229,17,72,193 + .byte 237,17,73,193,251,47,65,131,251,247,15,133,101,16,0,0 + .byte 72,141,84,202,16,72,137,90,248,72,139,93,32,139,11,15 + .byte 182,233,15,182,205,72,131,195,4,65,255,36,238 + + .globl lj_BC_CALLMT + .hidden lj_BC_CALLMT + .type lj_BC_CALLMT, @function + .size lj_BC_CALLMT, 3 +lj_BC_CALLMT: + .byte 3,4,36 + + .globl lj_BC_CALLT + .hidden lj_BC_CALLT + .type lj_BC_CALLT, @function + .size lj_BC_CALLT, 190 +lj_BC_CALLT: + .byte 72,141,76,202,16,73,137,215,72,139,105,240,73,137,235,73 + .byte 193,251,47,65,131,251,247,15,133,45,16,0,0,72,139,90 + .byte 248,247,195,3,0,0,0,15,133,110,0,0,0,72,137,106 + .byte 240,137,4,36,131,232,1,116,23,72,139,41,72,131,193,8 + .byte 73,137,47,73,131,199,8,131,232,1,117,237,72,139,106,240 + .byte 72,193,229,17,72,193,237,17,139,4,36,128,125,10,1,119 + .byte 20,72,139,93,32,139,11,15,182,233,15,182,205,72,131,195 + .byte 4,65,255,36,238,247,195,3,0,0,0,117,228,15,182,75 + .byte 253,72,247,217,76,139,124,202,224,73,193,231,17,73,193,239 + .byte 17,77,139,127,32,77,139,127,176,235,198,72,131,235,3,247 + .byte 195,7,0,0,0,117,15,72,41,218,73,137,215,72,139,90 + .byte 248,233,119,255,255,255,131,195,3,233,111,255,255,255 + + .globl lj_BC_ITERC + .hidden lj_BC_ITERC + .type lj_BC_ITERC, @function + .size lj_BC_ITERC, 85 +lj_BC_ITERC: + .byte 72,141,76,202,16,72,139,105,224,72,139,65,232,72,137,41 + .byte 72,137,65,8,72,139,105,216,72,137,105,240,184,3,0,0 + .byte 0,73,137,235,72,193,229,17,72,193,237,17,73,193,251,47 + .byte 65,131,251,247,15,133,82,15,0,0,72,137,202,72,137,90 + .byte 248,72,139,93,32,139,11,15,182,233,15,182,205,72,131,195 + .byte 4,65,255,36,238 + + .globl lj_BC_ITERN + .hidden lj_BC_ITERN + .type lj_BC_ITERN, @function + .size lj_BC_ITERN, 157 +lj_BC_ITERN: + .byte 72,139,108,202,240,72,193,229,17,72,193,237,17,139,68,202 + .byte 248,68,139,85,48,72,131,195,4,76,139,93,16,68,57,208 + .byte 115,68,73,131,60,195,255,116,56,242,15,42,192,73,139,44 + .byte 195,72,137,108,202,8,242,15,17,4,202,131,192,1,137,68 + .byte 202,248,15,183,67,254,72,141,156,131,0,0,254,255,139,3 + .byte 15,182,204,15,182,232,72,131,195,4,193,232,16,65,255,36 + .byte 238,131,192,1,235,183,68,41,208,59,69,52,119,224,68,107 + .byte 216,24,76,3,93,40,73,131,59,255,116,28,70,141,84,16 + .byte 1,73,139,107,8,73,139,3,72,137,44,202,72,137,68,202 + .byte 8,68,137,84,202,248,235,170,131,192,1,235,204 + + .globl lj_BC_VARG + .hidden lj_BC_VARG + .type lj_BC_VARG, @function + .size lj_BC_VARG, 209 +lj_BC_VARG: + .byte 15,182,236,15,182,192,76,141,84,194,19,72,141,12,202,76 + .byte 43,82,248,72,133,237,116,70,72,141,108,233,248,73,57,210 + .byte 115,25,73,139,66,240,73,131,194,8,72,137,1,72,131,193 + .byte 8,72,57,233,115,21,73,57,210,114,231,72,199,1,255,255 + .byte 255,255,72,131,193,8,72,57,233,114,240,139,3,15,182,204 + .byte 15,182,232,72,131,195,4,193,232,16,65,255,36,238,199,4 + .byte 36,1,0,0,0,72,137,208,76,41,208,118,222,137,197,193 + .byte 237,3,131,197,1,137,44,36,72,139,108,36,16,72,1,200 + .byte 72,59,69,48,119,22,73,139,66,240,73,131,194,8,72,137 + .byte 1,72,131,193,8,73,57,210,114,236,235,175,72,137,85,32 + .byte 72,137,77,40,72,137,92,36,24,73,41,210,68,137,84,36 + .byte 4,139,52,36,131,238,1,72,137,239 + call lj_state_growstack + .byte 72,139,85,32,76,99,84,36,4,72,139,77,40,73,1,210 + .byte 235,181 + + .globl lj_BC_ISNEXT + .hidden lj_BC_ISNEXT + .type lj_BC_ISNEXT, @function + .size lj_BC_ISNEXT, 114 +lj_BC_ISNEXT: + .byte 72,139,108,202,232,73,137,235,72,193,229,17,72,193,237,17 + .byte 73,193,251,47,65,131,251,247,117,71,76,139,92,202,240,73 + .byte 193,251,47,65,131,251,244,117,56,72,131,124,202,248,255,117 + .byte 48,128,125,10,4,117,42,72,141,156,131,0,0,254,255,73 + .byte 186,0,0,0,0,255,127,254,255,76,137,84,202,248,139,3 + .byte 15,182,204,15,182,232,72,131,195,4,193,232,16,65,255,36 + .byte 238,198,67,252,88,72,141,156,131,0,0,254,255,198,3,69 + .byte 235,220 + + .globl lj_BC_RETM + .hidden lj_BC_RETM + .type lj_BC_RETM, @function + .size lj_BC_RETM, 3 +lj_BC_RETM: + .byte 3,4,36 + + .globl lj_BC_RET + .hidden lj_BC_RET + .type lj_BC_RET, @function + .size lj_BC_RET, 150 +lj_BC_RET: + .byte 193,225,3,72,139,90,248,137,4,36,247,195,3,0,0,0 + .byte 117,105,73,137,215,131,232,1,116,17,73,139,44,15,73,137 + .byte 111,240,73,131,199,8,131,232,1,117,239,139,4,36,15,182 + .byte 107,255,57,197,119,51,15,182,75,253,72,247,217,72,141,84 + .byte 202,240,76,139,122,240,73,193,231,17,73,193,239,17,77,139 + .byte 127,32,77,139,127,176,139,3,15,182,204,15,182,232,72,131 + .byte 195,4,193,232,16,65,255,36,238,73,199,71,240,255,255,255 + .byte 255,73,131,199,8,72,131,192,1,235,183,72,141,107,253,247 + .byte 197,7,0,0,0,15,133,154,5,0,0,72,41,234,72,1 + .byte 233,233,109,255,255,255 + + .globl lj_BC_RET0 + .hidden lj_BC_RET0 + .type lj_BC_RET0, @function + .size lj_BC_RET0, 107 +lj_BC_RET0: + .byte 72,139,90,248,137,4,36,247,195,3,0,0,0,117,71,56 + .byte 67,255,119,51,15,182,75,253,72,247,217,72,141,84,202,240 + .byte 76,139,122,240,73,193,231,17,73,193,239,17,77,139,127,32 + .byte 77,139,127,176,139,3,15,182,204,15,182,232,72,131,195,4 + .byte 193,232,16,65,255,36,238,72,199,68,194,232,255,255,255,255 + .byte 72,131,192,1,235,185,72,141,107,253,247,197,7,0,0,0 + .byte 15,133,41,5,0,0,72,41,234,235,149 + + .globl lj_BC_RET1 + .hidden lj_BC_RET1 + .type lj_BC_RET1, @function + .size lj_BC_RET1, 121 +lj_BC_RET1: + .byte 193,225,3,72,139,90,248,137,4,36,247,195,3,0,0,0 + .byte 117,79,72,139,44,10,72,137,106,240,56,67,255,119,51,15 + .byte 182,75,253,72,247,217,72,141,84,202,240,76,139,122,240,73 + .byte 193,231,17,73,193,239,17,77,139,127,32,77,139,127,176,139 + .byte 3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,255 + .byte 36,238,72,199,68,194,232,255,255,255,255,72,131,192,1,235 + .byte 185,72,141,107,253,247,197,7,0,0,0,15,133,179,4,0 + .byte 0,72,41,234,72,1,233,235,138 + + .globl lj_BC_FORI + .hidden lj_BC_FORI + .type lj_BC_FORI, @function + .size lj_BC_FORI, 115 +lj_BC_FORI: + .byte 72,141,12,202,76,139,25,73,193,251,47,65,131,251,242,15 + .byte 131,41,12,0,0,76,139,89,8,73,193,251,47,65,131,251 + .byte 242,15,131,23,12,0,0,72,139,105,16,73,137,235,73,193 + .byte 251,47,65,131,251,242,15,131,2,12,0,0,242,15,16,1 + .byte 242,15,16,73,8,124,38,102,15,46,200,242,15,17,65,24 + .byte 115,8,72,141,156,131,0,0,254,255,139,3,15,182,204,15 + .byte 182,232,72,131,195,4,193,232,16,65,255,36,238,102,15,46 + .byte 193,235,216 + + .globl lj_BC_JFORI + .hidden lj_BC_JFORI + .type lj_BC_JFORI, @function + .size lj_BC_JFORI, 123 +lj_BC_JFORI: + .byte 72,141,12,202,76,139,25,73,193,251,47,65,131,251,242,15 + .byte 131,182,11,0,0,76,139,89,8,73,193,251,47,65,131,251 + .byte 242,15,131,164,11,0,0,72,139,105,16,73,137,235,73,193 + .byte 251,47,65,131,251,242,15,131,143,11,0,0,242,15,16,1 + .byte 242,15,16,73,8,124,46,102,15,46,200,242,15,17,65,24 + .byte 72,141,156,131,0,0,254,255,15,183,67,254,15,131,76,1 + .byte 0,0,139,3,15,182,204,15,182,232,72,131,195,4,193,232 + .byte 16,65,255,36,238,102,15,46,193,235,208 + + .globl lj_BC_FORL + .hidden lj_BC_FORL + .type lj_BC_FORL, @function + .size lj_BC_FORL, 20 +lj_BC_FORL: + .byte 137,221,209,237,131,229,126,102,65,131,108,46,128,2,15,130 + .byte 131,32,0,0 + + .globl lj_BC_IFORL + .hidden lj_BC_IFORL + .type lj_BC_IFORL, @function + .size lj_BC_IFORL, 75 +lj_BC_IFORL: + .byte 72,141,12,202,72,139,105,16,242,15,16,1,242,15,16,73 + .byte 8,242,15,88,65,16,242,15,17,1,72,133,237,120,38,102 + .byte 15,46,200,242,15,17,65,24,114,8,72,141,156,131,0,0 + .byte 254,255,139,3,15,182,204,15,182,232,72,131,195,4,193,232 + .byte 16,65,255,36,238,102,15,46,193,235,216 + + .globl lj_BC_JFORL + .hidden lj_BC_JFORL + .type lj_BC_JFORL, @function + .size lj_BC_JFORL, 71 +lj_BC_JFORL: + .byte 72,141,12,202,72,139,105,16,242,15,16,1,242,15,16,73 + .byte 8,242,15,88,65,16,242,15,17,1,72,133,237,120,34,102 + .byte 15,46,200,242,15,17,65,24,15,131,166,0,0,0,139,3 + .byte 15,182,204,15,182,232,72,131,195,4,193,232,16,65,255,36 + .byte 238,102,15,46,193,235,220 + + .globl lj_BC_ITERL + .hidden lj_BC_ITERL + .type lj_BC_ITERL, @function + .size lj_BC_ITERL, 20 +lj_BC_ITERL: + .byte 137,221,209,237,131,229,126,102,65,131,108,46,128,2,15,130 + .byte 221,31,0,0 + + .globl lj_BC_IITERL + .hidden lj_BC_IITERL + .type lj_BC_IITERL, @function + .size lj_BC_IITERL, 44 +lj_BC_IITERL: + .byte 72,141,12,202,72,139,41,72,131,253,255,116,12,72,141,156 + .byte 131,0,0,254,255,72,137,105,248,139,3,15,182,204,15,182 + .byte 232,72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_JITERL + .hidden lj_BC_JITERL + .type lj_BC_JITERL, @function + .size lj_BC_JITERL, 38 +lj_BC_JITERL: + .byte 72,141,12,202,72,139,41,72,131,253,255,116,6,72,137,105 + .byte 248,235,58,139,3,15,182,204,15,182,232,72,131,195,4,193 + .byte 232,16,65,255,36,238 + + .globl lj_BC_LOOP + .hidden lj_BC_LOOP + .type lj_BC_LOOP, @function + .size lj_BC_LOOP, 20 +lj_BC_LOOP: + .byte 137,221,209,237,131,229,126,102,65,131,108,46,128,2,15,130 + .byte 119,31,0,0 + + .globl lj_BC_ILOOP + .hidden lj_BC_ILOOP + .type lj_BC_ILOOP, @function + .size lj_BC_ILOOP, 19 +lj_BC_ILOOP: + .byte 139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65 + .byte 255,36,238 + + .globl lj_BC_JLOOP + .hidden lj_BC_JLOOP + .type lj_BC_JLOOP, @function + .size lj_BC_JLOOP, 50 +lj_BC_JLOOP: + .byte 73,139,142,232,244,255,255,72,139,4,193,72,139,64,88,72 + .byte 139,108,36,16,73,137,150,8,242,255,255,73,137,174,72,241 + .byte 255,255,72,131,236,16,76,137,100,36,16,76,137,108,36,8 + .byte 255,224 + + .globl lj_BC_JMP + .hidden lj_BC_JMP + .type lj_BC_JMP, @function + .size lj_BC_JMP, 27 +lj_BC_JMP: + .byte 72,141,156,131,0,0,254,255,139,3,15,182,204,15,182,232 + .byte 72,131,195,4,193,232,16,65,255,36,238 + + .globl lj_BC_FUNCF + .hidden lj_BC_FUNCF + .type lj_BC_FUNCF, @function + .size lj_BC_FUNCF, 20 +lj_BC_FUNCF: + .byte 137,221,209,237,131,229,126,102,65,131,108,46,128,1,15,130 + .byte 76,31,0,0 + + .globl lj_BC_IFUNCF + .hidden lj_BC_IFUNCF + .type lj_BC_IFUNCF, @function + .size lj_BC_IFUNCF, 68 +lj_BC_IFUNCF: + .byte 76,139,123,172,72,139,108,36,16,72,141,12,202,72,59,77 + .byte 48,15,135,81,3,0,0,15,182,75,150,57,200,118,19,139 + .byte 3,15,182,204,15,182,232,72,131,195,4,193,232,16,65,255 + .byte 36,238,72,199,68,194,248,255,255,255,255,131,192,1,57,200 + .byte 118,240,235,219 + + .globl lj_BC_JFUNCF + .hidden lj_BC_JFUNCF + .type lj_BC_JFUNCF, @function + .size lj_BC_JFUNCF, 58 +lj_BC_JFUNCF: + .byte 76,139,123,172,72,139,108,36,16,72,141,12,202,72,59,77 + .byte 48,15,135,13,3,0,0,15,182,75,150,57,200,118,9,15 + .byte 183,67,254,233,51,255,255,255,72,199,68,194,248,255,255,255 + .byte 255,131,192,1,57,200,118,240,235,229 + + .globl lj_BC_FUNCV + .hidden lj_BC_FUNCV + .type lj_BC_FUNCV, @function + .size lj_BC_FUNCV, 0 +lj_BC_FUNCV: + + .globl lj_BC_IFUNCV + .hidden lj_BC_IFUNCV + .type lj_BC_IFUNCV, @function + .size lj_BC_IFUNCV, 135 +lj_BC_IFUNCV: + .byte 141,44,197,11,0,0,0,72,141,68,194,8,76,139,122,240 + .byte 72,137,104,248,76,137,120,240,72,139,108,36,16,72,141,12 + .byte 200,72,59,77,48,15,135,185,2,0,0,72,137,209,72,137 + .byte 194,15,182,107,150,133,237,116,37,72,131,193,8,72,131,193 + .byte 8,72,57,209,115,47,76,139,121,240,76,137,56,72,131,192 + .byte 8,72,199,65,240,255,255,255,255,131,237,1,117,223,76,139 + .byte 123,172,139,3,15,182,204,15,182,232,72,131,195,4,193,232 + .byte 16,65,255,36,238,72,199,0,255,255,255,255,72,131,192,8 + .byte 131,237,1,117,240,235,215 + + .globl lj_BC_JFUNCV + .hidden lj_BC_JFUNCV + .type lj_BC_JFUNCV, @function + .size lj_BC_JFUNCV, 1 +lj_BC_JFUNCV: + .byte 204 + + .globl lj_BC_FUNCC + .hidden lj_BC_FUNCC + .type lj_BC_FUNCC, @function + .size lj_BC_FUNCC, 110 +lj_BC_FUNCC: + .byte 72,139,106,240,72,193,229,17,72,193,237,17,76,139,125,40 + .byte 72,139,108,36,16,72,141,68,194,248,72,137,85,32,72,141 + .byte 136,160,0,0,0,72,59,77,48,72,137,69,40,72,137,239 + .byte 15,135,31,2,0,0,65,199,134,32,241,255,255,254,255,255 + .byte 255,65,255,215,72,139,85,32,73,137,174,0,242,255,255,65 + .byte 199,134,32,241,255,255,255,255,255,255,72,141,12,194,72,247 + .byte 217,72,3,77,40,72,139,90,248,233,156,0,0,0 + + .globl lj_BC_FUNCCW + .hidden lj_BC_FUNCCW + .type lj_BC_FUNCCW, @function + .size lj_BC_FUNCCW, 114 +lj_BC_FUNCCW: + .byte 72,139,106,240,72,193,229,17,72,193,237,17,76,139,125,40 + .byte 72,139,108,36,16,72,141,68,194,248,72,137,85,32,72,141 + .byte 136,160,0,0,0,72,59,77,48,72,137,69,40,76,137,254 + .byte 72,137,239,15,135,174,1,0,0,65,199,134,32,241,255,255 + .byte 254,255,255,255,65,255,150,232,241,255,255,72,139,85,32,73 + .byte 137,174,0,242,255,255,65,199,134,32,241,255,255,255,255,255 + .byte 255,72,141,12,194,72,247,217,72,3,77,40,72,139,90,248 + .byte 235,42 + + .globl lj_vm_returnp + .hidden lj_vm_returnp + .type lj_vm_returnp, @function + .size lj_vm_returnp, 42 +lj_vm_returnp: + .byte 247,195,4,0,0,0,15,132,124,3,0,0,72,131,227,248 + .byte 72,41,218,72,141,76,25,248,72,139,90,248,73,187,255,255 + .byte 255,255,255,255,254,255,76,137,28,10 + + .globl lj_vm_returnc + .hidden lj_vm_returnc + .type lj_vm_returnc, @function + .size lj_vm_returnc, 25 +lj_vm_returnc: + .byte 131,192,1,15,132,186,0,0,0,137,4,36,72,247,195,3 + .byte 0,0,0,15,132,237,249,255,255 + + .globl lj_vm_return + .hidden lj_vm_return + .type lj_vm_return, @function + .size lj_vm_return, 83 +lj_vm_return: + .byte 72,131,243,1,247,195,3,0,0,0,117,177,65,199,134,32 + .byte 241,255,255,254,255,255,255,72,131,227,248,72,41,211,72,247 + .byte 219,131,232,1,116,17,72,139,44,10,72,137,106,240,72,131 + .byte 194,8,131,232,1,117,239,72,139,108,36,16,72,137,93,32 + .byte 139,4,36,139,76,36,8,57,193,117,34,72,131,234,16,72 + .byte 137,85,40 + + .globl lj_vm_leave_cp + .hidden lj_vm_leave_cp + .type lj_vm_leave_cp, @function + .size lj_vm_leave_cp, 11 +lj_vm_leave_cp: + .byte 72,139,76,36,32,72,137,77,80,49,192 + + .globl lj_vm_leave_unw + .hidden lj_vm_leave_unw + .type lj_vm_leave_unw, @function + .size lj_vm_leave_unw, 76 +lj_vm_leave_unw: + .byte 72,131,196,40,65,92,65,93,65,94,65,95,91,93,195,114 + .byte 23,72,59,85,48,119,30,72,199,66,240,255,255,255,255,72 + .byte 131,194,8,131,192,1,235,193,133,201,116,193,72,41,193,72 + .byte 141,20,202,235,184,72,137,85,40,137,4,36,137,206,72,137 + .byte 239 + call lj_state_growstack + .byte 72,139,85,40,235,150 + + .globl lj_vm_unwind_yield + .hidden lj_vm_unwind_yield + .type lj_vm_unwind_yield, @function + .size lj_vm_unwind_yield, 4 +lj_vm_unwind_yield: + .byte 176,1,235,5 + + .globl lj_vm_unwind_c + .hidden lj_vm_unwind_c + .type lj_vm_unwind_c, @function + .size lj_vm_unwind_c, 5 +lj_vm_unwind_c: + .byte 137,240,72,137,252 + + .globl lj_vm_unwind_c_eh + .hidden lj_vm_unwind_c_eh + .type lj_vm_unwind_c_eh, @function + .size lj_vm_unwind_c_eh, 21 +lj_vm_unwind_c_eh: + .byte 72,139,108,36,16,72,139,109,16,199,133,128,0,0,0,254 + .byte 255,255,255,235,150 + + .globl lj_vm_unwind_rethrow + .hidden lj_vm_unwind_rethrow + .type lj_vm_unwind_rethrow, @function + .size lj_vm_unwind_rethrow, 26 +lj_vm_unwind_rethrow: + .byte 72,139,124,36,16,137,198,72,131,196,40,65,92,65,93,65 + .byte 94,65,95,91,93 + jmp lj_err_throw + + .globl lj_vm_unwind_ff + .hidden lj_vm_unwind_ff + .type lj_vm_unwind_ff, @function + .size lj_vm_unwind_ff, 7 +lj_vm_unwind_ff: + .byte 72,131,231,252,72,137,252 + + .globl lj_vm_unwind_ff_eh + .hidden lj_vm_unwind_ff_eh + .type lj_vm_unwind_ff_eh, @function + .size lj_vm_unwind_ff_eh, 73 +lj_vm_unwind_ff_eh: + .byte 72,139,108,36,16,184,2,0,0,0,72,139,85,32,76,139 + .byte 117,16,73,129,198,96,15,0,0,72,139,90,248,72,185,255 + .byte 255,255,255,255,127,255,255,72,139,42,72,137,74,240,72,137 + .byte 106,248,72,199,193,240,255,255,255,65,199,134,32,241,255,255 + .byte 255,255,255,255,233,181,254,255,255 + + .globl lj_vm_growstack_c + .hidden lj_vm_growstack_c + .type lj_vm_growstack_c, @function + .size lj_vm_growstack_c, 7 +lj_vm_growstack_c: + .byte 190,20,0,0,0,235,35 + + .globl lj_vm_growstack_v + .hidden lj_vm_growstack_v + .type lj_vm_growstack_v, @function + .size lj_vm_growstack_v, 6 +lj_vm_growstack_v: + .byte 72,131,232,16,235,5 + + .globl lj_vm_growstack_f + .hidden lj_vm_growstack_f + .type lj_vm_growstack_f, @function + .size lj_vm_growstack_f, 86 +lj_vm_growstack_f: + .byte 72,141,68,194,248,15,182,75,151,72,131,195,4,72,137,85 + .byte 32,72,137,69,40,72,137,92,36,24,72,137,206,72,137,239 + call lj_state_growstack + .byte 72,139,85,32,72,139,69,40,72,139,106,240,72,193,229,17 + .byte 72,193,237,17,72,41,208,193,232,3,131,192,1,72,139,93 + .byte 32,139,11,15,182,233,15,182,205,72,131,195,4,65,255,36 + .byte 238 + + .globl lj_vm_resume + .hidden lj_vm_resume + .type lj_vm_resume, @function + .size lj_vm_resume, 144 +lj_vm_resume: + .byte 85,83,65,87,65,86,65,85,65,84,72,131,236,40,72,137 + .byte 253,72,137,124,36,16,72,137,241,187,5,0,0,0,49,192 + .byte 76,141,124,36,1,76,139,117,16,73,129,198,96,15,0,0 + .byte 72,137,68,36,24,72,137,68,36,32,137,68,36,8,137,68 + .byte 36,12,76,137,125,80,56,69,11,15,132,153,0,0,0,73 + .byte 137,174,0,242,255,255,65,199,134,32,241,255,255,255,255,255 + .byte 255,136,69,11,72,139,85,32,72,139,69,40,72,41,200,193 + .byte 232,3,131,192,1,72,41,209,72,139,90,248,137,4,36,247 + .byte 195,3,0,0,0,15,132,205,247,255,255,233,219,253,255,255 + + .globl lj_vm_pcall + .hidden lj_vm_pcall + .type lj_vm_pcall, @function + .size lj_vm_pcall, 25 +lj_vm_pcall: + .byte 85,83,65,87,65,86,65,85,65,84,72,131,236,40,187,5 + .byte 0,0,0,137,76,36,12,235,19 + + .globl lj_vm_call + .hidden lj_vm_call + .type lj_vm_call, @function + .size lj_vm_call, 104 +lj_vm_call: + .byte 85,83,65,87,65,86,65,85,65,84,72,131,236,40,187,1 + .byte 0,0,0,137,84,36,8,72,137,253,72,137,124,36,16,72 + .byte 137,241,76,139,117,16,76,139,125,80,76,137,124,36,32,72 + .byte 137,108,36,24,73,129,198,96,15,0,0,72,137,101,80,73 + .byte 137,174,0,242,255,255,65,199,134,32,241,255,255,255,255,255 + .byte 255,72,139,85,32,72,1,203,72,41,211,72,139,69,40,72 + .byte 41,200,193,232,3,131,192,1 + + .globl lj_vm_call_dispatch + .hidden lj_vm_call_dispatch + .type lj_vm_call_dispatch, @function + .size lj_vm_call_dispatch, 29 +lj_vm_call_dispatch: + .byte 72,139,105,240,73,137,235,72,193,229,17,72,193,237,17,73 + .byte 193,251,47,65,131,251,247,15,133,108,4,0,0 + + .globl lj_vm_call_dispatch_f + .hidden lj_vm_call_dispatch_f + .type lj_vm_call_dispatch_f, @function + .size lj_vm_call_dispatch_f, 27 +lj_vm_call_dispatch_f: + .byte 72,137,202,72,137,90,248,72,139,93,32,139,11,15,182,233 + .byte 15,182,205,72,131,195,4,65,255,36,238 + + .globl lj_vm_cpcall + .hidden lj_vm_cpcall + .type lj_vm_cpcall, @function + .size lj_vm_cpcall, 103 +lj_vm_cpcall: + .byte 85,83,65,87,65,86,65,85,65,84,72,131,236,40,72,137 + .byte 253,72,137,124,36,16,72,137,108,36,24,76,139,125,56,76 + .byte 43,125,40,76,139,117,16,199,68,36,12,0,0,0,0,68 + .byte 137,124,36,8,73,129,198,96,15,0,0,76,139,125,80,76 + .byte 137,124,36,32,72,137,101,80,73,137,174,0,242,255,255,255 + .byte 209,72,133,192,15,132,27,253,255,255,72,137,193,187,5,0 + .byte 0,0,233,56,255,255,255 + + .globl lj_cont_dispatch + .hidden lj_cont_dispatch + .type lj_cont_dispatch, @function + .size lj_cont_dispatch, 81 +lj_cont_dispatch: + .byte 72,1,209,72,131,227,248,72,137,213,72,41,218,72,199,68 + .byte 193,248,255,255,255,255,72,137,200,72,139,93,232,72,139,77 + .byte 224,72,131,249,1,118,22,76,139,122,240,73,193,231,17,73 + .byte 193,239,17,77,139,127,32,77,139,127,176,255,225,15,132,19 + .byte 31,0,0,72,41,213,193,237,3,141,69,253,233,84,24,0 + .byte 0 + + .globl lj_cont_cat + .hidden lj_cont_cat + .type lj_cont_cat, @function + .size lj_cont_cat, 53 +lj_cont_cat: + .byte 15,182,75,255,72,131,237,32,72,141,12,202,72,41,233,15 + .byte 132,157,0,0,0,72,247,217,193,233,3,72,139,124,36,16 + .byte 72,137,87,32,137,202,72,139,8,72,137,77,0,72,137,238 + .byte 233,57,233,255,255 + + .globl lj_vmeta_tgets + .hidden lj_vmeta_tgets + .type lj_vmeta_tgets, @function + .size lj_vmeta_tgets, 53 +lj_vmeta_tgets: + .byte 73,187,0,0,0,0,0,128,253,255,76,9,216,72,137,4 + .byte 36,72,141,4,36,128,123,252,54,117,53,72,185,0,0,0 + .byte 0,0,0,250,255,72,9,233,73,141,174,128,241,255,255,72 + .byte 137,77,0,235,35 + + .globl lj_vmeta_tgetb + .hidden lj_vmeta_tgetb + .type lj_vmeta_tgetb, @function + .size lj_vmeta_tgetb, 19 +lj_vmeta_tgetb: + .byte 15,182,67,254,242,15,42,192,242,15,17,4,36,72,141,4 + .byte 36,235,8 + + .globl lj_vmeta_tgetv + .hidden lj_vmeta_tgetv + .type lj_vmeta_tgetv, @function + .size lj_vmeta_tgetv, 53 +lj_vmeta_tgetv: + .byte 15,182,67,254,72,141,4,194,15,182,107,255,72,141,44,234 + .byte 72,139,124,36,16,72,137,87,32,72,137,238,72,137,194,72 + .byte 137,253,72,137,92,36,24 + call lj_meta_tget + .byte 72,139,85,32,72,133,192,116,30 + + .globl lj_cont_ra + .hidden lj_cont_ra + .type lj_cont_ra, @function + .size lj_cont_ra, 67 +lj_cont_ra: + .byte 15,182,75,253,72,139,40,72,137,44,202,139,3,15,182,204 + .byte 15,182,232,72,131,195,4,193,232,16,65,255,36,238,72,139 + .byte 77,40,72,137,89,232,72,141,89,2,72,41,211,72,139,105 + .byte 240,184,3,0,0,0,72,193,229,17,72,193,237,17,233,56 + .byte 254,255,255 + + .globl lj_vmeta_tgetr + .hidden lj_vmeta_tgetr + .type lj_vmeta_tgetr, @function + .size lj_vmeta_tgetr, 41 +lj_vmeta_tgetr: + .byte 72,137,239,72,137,213,137,198 + call lj_tab_getinth + .byte 15,182,75,253,72,137,234,72,133,192,15,133,92,238,255,255 + .byte 73,199,195,255,255,255,255,233,83,238,255,255 + + .globl lj_vmeta_tsets + .hidden lj_vmeta_tsets + .type lj_vmeta_tsets, @function + .size lj_vmeta_tsets, 53 +lj_vmeta_tsets: + .byte 73,187,0,0,0,0,0,128,253,255,76,9,216,72,137,4 + .byte 36,72,141,4,36,128,123,252,55,117,53,72,185,0,0,0 + .byte 0,0,0,250,255,72,9,233,73,141,174,128,241,255,255,72 + .byte 137,77,0,235,35 + + .globl lj_vmeta_tsetb + .hidden lj_vmeta_tsetb + .type lj_vmeta_tsetb, @function + .size lj_vmeta_tsetb, 19 +lj_vmeta_tsetb: + .byte 15,182,67,254,242,15,42,192,242,15,17,4,36,72,141,4 + .byte 36,235,8 + + .globl lj_vmeta_tsetv + .hidden lj_vmeta_tsetv + .type lj_vmeta_tsetv, @function + .size lj_vmeta_tsetv, 64 +lj_vmeta_tsetv: + .byte 15,182,67,254,72,141,4,194,15,182,107,255,72,141,44,234 + .byte 72,139,124,36,16,72,137,87,32,72,137,238,72,137,194,72 + .byte 137,253,72,137,92,36,24 + call lj_meta_tset + .byte 72,139,85,32,72,133,192,116,30,15,182,75,253,72,139,44 + .byte 202,72,137,40 + + .globl lj_cont_nop + .hidden lj_cont_nop + .type lj_cont_nop, @function + .size lj_cont_nop, 68 +lj_cont_nop: + .byte 139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65 + .byte 255,36,238,72,139,77,40,72,137,89,232,15,182,67,253,72 + .byte 139,44,194,72,137,105,16,72,141,89,2,72,41,211,72,139 + .byte 105,240,184,4,0,0,0,72,193,229,17,72,193,237,17,233 + .byte 67,253,255,255 + + .globl lj_vmeta_tsetr + .hidden lj_vmeta_tsetr + .type lj_vmeta_tsetr, @function + .size lj_vmeta_tsetr, 39 +lj_vmeta_tsetr: + .byte 72,139,124,36,16,72,137,238,72,137,87,32,72,137,213,137 + .byte 194,72,137,92,36,24 + call lj_tab_setinth + .byte 15,182,75,253,72,137,234,233,140,240,255,255 + + .globl lj_vmeta_comp + .hidden lj_vmeta_comp + .type lj_vmeta_comp, @function + .size lj_vmeta_comp, 93 +lj_vmeta_comp: + .byte 15,183,67,254,15,182,75,253,72,139,108,36,16,72,137,85 + .byte 32,72,141,52,202,72,141,20,194,72,137,239,15,182,75,252 + .byte 72,137,92,36,24 + call lj_meta_comp + .byte 72,139,85,32,72,131,248,1,15,135,15,1,0,0,72,141 + .byte 91,4,114,12,15,183,67,254,72,141,156,131,0,0,254,255 + .byte 139,3,15,182,204,15,182,232,72,131,195,4,193,232,16,65 + .byte 255,36,238 + + .globl lj_cont_condt + .hidden lj_cont_condt + .type lj_cont_condt, @function + .size lj_cont_condt, 19 +lj_cont_condt: + .byte 72,131,195,4,76,139,24,73,193,251,47,65,131,251,254,114 + .byte 208,235,218 + + .globl lj_cont_condf + .hidden lj_cont_condf + .type lj_cont_condf, @function + .size lj_cont_condf, 13 +lj_cont_condf: + .byte 76,139,24,73,193,251,47,65,131,251,254,235,187 + + .globl lj_vmeta_equal + .hidden lj_vmeta_equal + .type lj_vmeta_equal, @function + .size lj_vmeta_equal, 44 +lj_vmeta_equal: + .byte 72,193,224,17,72,193,232,17,72,131,235,4,72,137,206,137 + .byte 233,72,139,108,36,16,72,137,85,32,72,137,194,72,137,239 + .byte 72,137,92,36,24 + call lj_meta_equal + .byte 235,129 + + .globl lj_vmeta_equal_cd + .hidden lj_vmeta_equal_cd + .type lj_vmeta_equal_cd, @function + .size lj_vmeta_equal_cd, 34 +lj_vmeta_equal_cd: + .byte 72,131,235,4,72,139,108,36,16,72,137,85,32,72,137,239 + .byte 139,115,252,72,137,92,36,24 + call lj_meta_equal_cd + .byte 233,95,255,255,255 + + .globl lj_vmeta_istype + .hidden lj_vmeta_istype + .type lj_vmeta_istype, @function + .size lj_vmeta_istype, 35 +lj_vmeta_istype: + .byte 72,139,108,36,16,72,137,85,32,137,206,137,194,72,137,239 + .byte 72,137,92,36,24 + call lj_meta_istype + .byte 72,139,85,32,233,92,255,255,255 + + .globl lj_vmeta_arith_vno + .hidden lj_vmeta_arith_vno + .type lj_vmeta_arith_vno, @function + .size lj_vmeta_arith_vno, 0 +lj_vmeta_arith_vno: + + .globl lj_vmeta_arith_vn + .hidden lj_vmeta_arith_vn + .type lj_vmeta_arith_vn, @function + .size lj_vmeta_arith_vn, 6 +lj_vmeta_arith_vn: + .byte 73,141,4,199,235,26 + + .globl lj_vmeta_arith_nvo + .hidden lj_vmeta_arith_nvo + .type lj_vmeta_arith_nvo, @function + .size lj_vmeta_arith_nvo, 0 +lj_vmeta_arith_nvo: + + .globl lj_vmeta_arith_nv + .hidden lj_vmeta_arith_nv + .type lj_vmeta_arith_nv, @function + .size lj_vmeta_arith_nv, 13 +lj_vmeta_arith_nv: + .byte 77,141,20,199,72,141,4,234,76,137,213,235,17 + + .globl lj_vmeta_unm + .hidden lj_vmeta_unm + .type lj_vmeta_unm, @function + .size lj_vmeta_unm, 9 +lj_vmeta_unm: + .byte 72,141,4,194,72,137,197,235,8 + + .globl lj_vmeta_arith_vvo + .hidden lj_vmeta_arith_vvo + .type lj_vmeta_arith_vvo, @function + .size lj_vmeta_arith_vvo, 0 +lj_vmeta_arith_vvo: + + .globl lj_vmeta_arith_vv + .hidden lj_vmeta_arith_vv + .type lj_vmeta_arith_vv, @function + .size lj_vmeta_arith_vv, 61 +lj_vmeta_arith_vv: + .byte 72,141,4,194,72,141,44,234,72,141,12,202,68,15,182,67 + .byte 252,72,137,206,72,137,193,72,139,124,36,16,72,137,87,32 + .byte 72,137,234,72,137,253,72,137,92,36,24 + call lj_meta_arith + .byte 72,139,85,32,72,133,192,15,132,78,254,255,255 + + .globl lj_vmeta_binop + .hidden lj_vmeta_binop + .type lj_vmeta_binop, @function + .size lj_vmeta_binop, 24 +lj_vmeta_binop: + .byte 72,137,193,72,41,208,72,137,89,232,72,141,88,2,184,3 + .byte 0,0,0,233,160,251,255,255 + + .globl lj_vmeta_len + .hidden lj_vmeta_len + .type lj_vmeta_len, @function + .size lj_vmeta_len, 36 +lj_vmeta_len: + .byte 15,183,67,254,72,139,108,36,16,72,137,85,32,72,141,52 + .byte 194,72,137,239,72,137,92,36,24 + call lj_meta_len + .byte 72,139,85,32,235,196 + + .globl lj_vmeta_call_ra + .hidden lj_vmeta_call_ra + .type lj_vmeta_call_ra, @function + .size lj_vmeta_call_ra, 5 +lj_vmeta_call_ra: + .byte 72,141,76,202,16 + + .globl lj_vmeta_call + .hidden lj_vmeta_call + .type lj_vmeta_call, @function + .size lj_vmeta_call, 100 +lj_vmeta_call: + .byte 137,4,36,72,137,205,72,139,124,36,16,72,137,87,32,72 + .byte 141,113,240,72,141,84,193,248,72,137,92,36,24 + call lj_meta_call + .byte 72,137,233,72,139,108,36,16,72,139,85,32,139,4,36,72 + .byte 139,105,240,131,192,1,73,57,215,15,132,146,239,255,255,72 + .byte 193,229,17,72,193,237,17,72,137,202,72,137,90,248,72,139 + .byte 93,32,139,11,15,182,233,15,182,205,72,131,195,4,65,255 + .byte 36,238 + + .globl lj_vmeta_for + .hidden lj_vmeta_for + .type lj_vmeta_for, @function + .size lj_vmeta_for, 49 +lj_vmeta_for: + .byte 72,139,108,36,16,72,137,85,32,72,137,206,72,137,239,72 + .byte 137,92,36,24 + call lj_meta_for + .byte 72,139,85,32,139,67,252,15,182,204,15,182,232,193,232,16 + .byte 65,255,164,238,208,4,0,0 + + .globl lj_ff_assert + .hidden lj_ff_assert + .type lj_ff_assert, @function + .size lj_ff_assert, 75 +lj_ff_assert: + .byte 131,248,2,15,130,166,19,0,0,76,139,26,76,137,221,73 + .byte 193,251,47,65,131,251,254,15,131,146,19,0,0,72,139,90 + .byte 248,137,4,36,72,139,42,72,137,106,240,131,232,2,116,19 + .byte 72,137,209,72,131,193,8,72,139,41,72,137,105,240,131,232 + .byte 1,117,240,139,4,36,233,25,8,0,0 + + .globl lj_ff_type + .hidden lj_ff_type + .type lj_ff_type, @function + .size lj_ff_type, 71 +lj_ff_type: + .byte 131,248,2,15,130,91,19,0,0,72,139,2,72,193,248,47 + .byte 189,242,255,255,255,57,232,15,66,197,247,208,72,139,106,240 + .byte 72,193,229,17,72,193,237,17,72,139,68,197,48,72,139,90 + .byte 248,73,187,0,0,0,0,0,128,253,255,76,9,216,72,137 + .byte 66,240,233,202,7,0,0 + + .globl lj_ff_getmetatable + .hidden lj_ff_getmetatable + .type lj_ff_getmetatable, @function + .size lj_ff_getmetatable, 184 +lj_ff_getmetatable: + .byte 131,248,2,15,130,20,19,0,0,72,139,42,72,139,90,248 + .byte 73,137,235,72,193,229,17,72,193,237,17,73,193,251,47,65 + .byte 131,251,244,117,113,72,139,109,32,72,133,237,72,199,66,240 + .byte 255,255,255,255,15,132,144,7,0,0,72,184,0,0,0,0 + .byte 0,0,250,255,72,9,232,72,137,66,240,73,139,134,160,242 + .byte 255,255,139,77,52,35,72,12,73,187,0,0,0,0,0,128 + .byte 253,255,76,9,216,107,201,24,72,3,77,40,72,57,65,8 + .byte 116,14,72,139,73,16,72,133,201,117,241,233,74,7,0,0 + .byte 72,139,41,72,131,253,255,15,132,61,7,0,0,72,137,106 + .byte 240,233,52,7,0,0,65,131,251,243,116,137,65,131,251,242 + .byte 119,6,65,187,242,255,255,255,65,247,211,75,139,172,222,200 + .byte 242,255,255,233,113,255,255,255 + + .globl lj_ff_setmetatable + .hidden lj_ff_setmetatable + .type lj_ff_setmetatable, @function + .size lj_ff_setmetatable, 125 +lj_ff_setmetatable: + .byte 131,248,3,15,130,92,18,0,0,72,139,42,73,137,234,73 + .byte 137,235,72,193,229,17,72,193,237,17,73,193,251,47,65,131 + .byte 251,244,15,133,61,18,0,0,72,131,125,32,0,15,133,50 + .byte 18,0,0,72,139,74,8,73,137,203,72,193,225,17,72,193 + .byte 233,17,73,193,251,47,65,131,251,244,15,133,21,18,0,0 + .byte 72,137,77,32,72,139,90,248,76,137,82,240,246,69,8,4 + .byte 116,22,128,101,8,251,73,139,134,240,240,255,255,73,137,174 + .byte 240,240,255,255,72,137,69,24,233,149,6,0,0 + + .globl lj_ff_rawget + .hidden lj_ff_rawget + .type lj_ff_rawget, @function + .size lj_ff_rawget, 73 +lj_ff_rawget: + .byte 131,248,3,15,130,223,17,0,0,72,139,50,73,137,243,72 + .byte 193,230,17,72,193,238,17,73,193,251,47,65,131,251,244,15 + .byte 133,195,17,0,0,72,137,213,72,141,82,8,72,139,124,36 + .byte 16 + call lj_tab_get + .byte 72,137,234,72,139,40,72,139,90,248,72,137,106,240,233,76 + .byte 6,0,0 + + .globl lj_ff_tonumber + .hidden lj_ff_tonumber + .type lj_ff_tonumber, @function + .size lj_ff_tonumber, 42 +lj_ff_tonumber: + .byte 131,248,2,15,133,150,17,0,0,72,139,42,73,137,235,73 + .byte 193,251,47,65,131,251,242,15,135,130,17,0,0,72,139,90 + .byte 248,72,137,106,240,233,34,6,0,0 + + .globl lj_ff_tostring + .hidden lj_ff_tostring + .type lj_ff_tostring, @function + .size lj_ff_tostring, 127 +lj_ff_tostring: + .byte 131,248,2,15,130,108,17,0,0,72,139,90,248,72,139,42 + .byte 73,137,235,73,193,251,47,65,131,251,251,117,9,72,137,106 + .byte 240,233,252,5,0,0,65,131,251,242,15,135,64,17,0,0 + .byte 73,131,190,48,243,255,255,0,15,133,55,17,0,0,73,139 + .byte 174,192,240,255,255,73,59,174,200,240,255,255,114,5,232,217 + .byte 17,0,0,72,139,108,36,16,72,137,85,32,72,137,92,36 + .byte 24,72,137,214,72,137,239 + call lj_strfmt_num + .byte 72,139,85,32,72,189,0,0,0,0,0,128,253,255,72,9 + .byte 197,235,158 + + .globl lj_ff_next + .hidden lj_ff_next + .type lj_ff_next, @function + .size lj_ff_next, 97 +lj_ff_next: + .byte 131,248,2,15,130,237,16,0,0,116,96,72,139,50,73,137 + .byte 243,72,193,230,17,72,193,238,17,73,193,251,47,65,131,251 + .byte 244,15,133,207,16,0,0,72,139,108,36,16,72,137,85,32 + .byte 72,137,85,40,72,139,90,248,72,141,82,8,72,137,239,72 + .byte 137,92,36,24 + call lj_tab_next + .byte 72,139,85,32,133,192,116,36,72,139,106,8,72,139,66,16 + .byte 72,137,106,240,72,137,66,248 + + .globl lj_fff_res2 + .hidden lj_fff_res2 + .type lj_fff_res2, @function + .size lj_fff_res2, 33 +lj_fff_res2: + .byte 184,3,0,0,0,233,61,5,0,0,72,199,66,8,255,255 + .byte 255,255,235,150,72,199,66,240,255,255,255,255,233,33,5,0 + .byte 0 + + .globl lj_ff_pairs + .hidden lj_ff_pairs + .type lj_ff_pairs, @function + .size lj_ff_pairs, 98 +lj_ff_pairs: + .byte 131,248,2,15,130,107,16,0,0,72,139,42,73,137,234,73 + .byte 137,235,72,193,229,17,72,193,237,17,73,193,251,47,65,131 + .byte 251,244,15,133,76,16,0,0,72,139,66,240,72,193,224,17 + .byte 72,193,232,17,72,139,64,48,73,187,0,0,0,0,0,128 + .byte 251,255,76,9,216,72,139,90,248,72,137,66,240,76,137,82 + .byte 248,72,199,2,255,255,255,255,184,4,0,0,0,233,196,4 + .byte 0,0 + + .globl lj_ff_ipairs_aux + .hidden lj_ff_ipairs_aux + .type lj_ff_ipairs_aux, @function + .size lj_ff_ipairs_aux, 150 +lj_ff_ipairs_aux: + .byte 131,248,3,15,130,9,16,0,0,72,139,42,73,137,235,72 + .byte 193,229,17,72,193,237,17,73,193,251,47,65,131,251,244,15 + .byte 133,237,15,0,0,76,139,90,8,73,193,251,47,65,131,251 + .byte 242,15,131,219,15,0,0,242,15,16,66,8,72,139,90,248 + .byte 73,186,0,0,0,0,0,0,240,63,102,73,15,110,202,242 + .byte 15,88,193,242,15,44,200,242,15,17,66,240,59,77,48,115 + .byte 26,72,139,69,16,72,141,4,200,72,131,56,255,116,39,72 + .byte 139,40,72,137,106,248,233,2,255,255,255,131,125,52,0,116 + .byte 21,72,137,239,72,137,213,137,206 + call lj_tab_getinth + .byte 72,137,234,72,133,192,117,211 + + .globl lj_fff_res0 + .hidden lj_fff_res0 + .type lj_fff_res0, @function + .size lj_fff_res0, 10 +lj_fff_res0: + .byte 184,1,0,0,0,233,36,4,0,0 + + .globl lj_ff_ipairs + .hidden lj_ff_ipairs + .type lj_ff_ipairs, @function + .size lj_ff_ipairs, 98 +lj_ff_ipairs: + .byte 131,248,2,15,130,105,15,0,0,72,139,42,73,137,234,73 + .byte 137,235,72,193,229,17,72,193,237,17,73,193,251,47,65,131 + .byte 251,244,15,133,74,15,0,0,72,139,66,240,72,193,224,17 + .byte 72,193,232,17,72,139,64,48,73,187,0,0,0,0,0,128 + .byte 251,255,76,9,216,72,139,90,248,72,137,66,240,76,137,82 + .byte 248,72,199,2,0,0,0,0,184,4,0,0,0,233,194,3 + .byte 0,0 + + .globl lj_ff_pcall + .hidden lj_ff_pcall + .type lj_ff_pcall, @function + .size lj_ff_pcall, 64 +lj_ff_pcall: + .byte 131,248,2,15,130,7,15,0,0,72,141,74,16,131,232,1 + .byte 187,22,0,0,0,65,15,182,174,105,241,255,255,72,193,237 + .byte 4,72,131,229,1,72,1,235,73,137,199,74,139,108,249,232 + .byte 74,137,108,249,240,73,131,239,1,119,240,233,3,246,255,255 + + .globl lj_ff_xpcall + .hidden lj_ff_xpcall + .type lj_ff_xpcall, @function + .size lj_ff_xpcall, 54 +lj_ff_xpcall: + .byte 131,248,3,15,130,199,14,0,0,72,139,74,8,73,137,203 + .byte 73,193,251,47,65,131,251,247,15,133,178,14,0,0,72,139 + .byte 42,72,137,10,72,137,106,8,72,141,74,24,131,232,2,187 + .byte 30,0,0,0,235,159 + + .globl lj_ff_coroutine_resume + .hidden lj_ff_coroutine_resume + .type lj_ff_coroutine_resume, @function + .size lj_ff_coroutine_resume, 387 +lj_ff_coroutine_resume: + .byte 131,248,2,15,130,145,14,0,0,72,139,42,72,193,229,17 + .byte 72,193,237,17,72,139,90,248,72,137,92,36,24,72,137,44 + .byte 36,76,139,26,73,193,251,47,65,131,251,249,15,133,104,14 + .byte 0,0,72,131,125,80,0,15,133,93,14,0,0,128,125,11 + .byte 1,15,135,83,14,0,0,72,139,77,40,116,21,72,59,77 + .byte 32,15,132,67,14,0,0,72,139,89,248,72,137,25,72,131 + .byte 193,8,72,141,92,193,240,72,59,93,48,15,135,41,14,0 + .byte 0,72,137,93,40,72,139,108,36,16,72,137,85,32,72,131 + .byte 194,8,72,137,85,40,72,141,108,194,232,72,41,221,72,57 + .byte 203,116,17,72,139,4,43,72,137,67,248,72,131,235,8,72 + .byte 57,203,117,239,72,137,206,72,139,60,36,232,12,244,255,255 + .byte 72,139,108,36,16,72,139,28,36,72,139,85,32,73,137,174 + .byte 0,242,255,255,65,199,134,32,241,255,255,255,255,255,255,131 + .byte 248,1,119,104,72,139,75,32,76,139,123,40,72,137,75,40 + .byte 76,137,251,72,41,203,116,35,72,141,4,26,193,235,3,72 + .byte 59,69,48,119,110,72,137,213,72,41,205,72,139,1,72,137 + .byte 4,41,72,131,193,8,76,57,249,117,240,141,67,2,73,187 + .byte 255,255,255,255,255,255,254,255,76,137,90,248,72,139,92,36 + .byte 24,137,4,36,72,199,193,248,255,255,255,247,195,3,0,0 + .byte 0,15,132,221,235,255,255,233,235,241,255,255,73,187,255,255 + .byte 255,255,255,127,255,255,76,137,90,248,72,139,75,40,72,131 + .byte 233,8,72,137,75,40,72,139,1,72,137,2,184,3,0,0 + .byte 0,235,185,72,139,12,36,76,137,121,40,72,137,222,72,137 + .byte 239 + call lj_state_growstack + .byte 72,139,28,36,72,139,85,32,233,81,255,255,255 + + .globl lj_ff_coroutine_wrap_aux + .hidden lj_ff_coroutine_wrap_aux + .type lj_ff_coroutine_wrap_aux, @function + .size lj_ff_coroutine_wrap_aux, 320 +lj_ff_coroutine_wrap_aux: + .byte 72,139,106,240,72,193,229,17,72,193,237,17,72,139,109,48 + .byte 72,193,229,17,72,193,237,17,72,139,90,248,72,137,92,36 + .byte 24,72,137,44,36,72,131,125,80,0,15,133,231,12,0,0 + .byte 128,125,11,1,15,135,221,12,0,0,72,139,77,40,116,21 + .byte 72,59,77,32,15,132,205,12,0,0,72,139,89,248,72,137 + .byte 25,72,131,193,8,72,141,92,193,248,72,59,93,48,15,135 + .byte 179,12,0,0,72,137,93,40,72,139,108,36,16,72,137,85 + .byte 32,72,137,85,40,72,141,108,194,240,72,41,221,72,57,203 + .byte 116,17,72,139,4,43,72,137,67,248,72,131,235,8,72,57 + .byte 203,117,239,72,137,206,72,139,60,36,232,154,242,255,255,72 + .byte 139,108,36,16,72,139,28,36,72,139,85,32,73,137,174,0 + .byte 242,255,255,65,199,134,32,241,255,255,255,255,255,255,131,248 + .byte 1,119,85,72,139,75,32,76,139,123,40,72,137,75,40,76 + .byte 137,251,72,41,203,116,35,72,141,4,26,193,235,3,72,59 + .byte 69,48,119,63,72,137,213,72,41,205,72,139,1,72,137,4 + .byte 41,72,131,193,8,76,57,249,117,240,141,67,1,72,139,92 + .byte 36,24,137,4,36,49,201,247,195,3,0,0,0,15,132,126 + .byte 234,255,255,233,140,240,255,255,72,137,222,72,137,239 + call lj_ffh_coroutine_wrap_err + .byte 72,139,12,36,76,137,121,40,72,137,222,72,137,239 + call lj_state_growstack + .byte 72,139,28,36,72,139,85,32,235,131 + + .globl lj_ff_coroutine_yield + .hidden lj_ff_coroutine_yield + .type lj_ff_coroutine_yield, @function + .size lj_ff_coroutine_yield, 48 +lj_ff_coroutine_yield: + .byte 72,139,108,36,16,72,247,69,80,1,0,0,0,15,132,196 + .byte 11,0,0,72,137,85,32,72,141,68,194,248,72,137,69,40 + .byte 49,192,72,137,69,80,176,1,136,69,11,233,146,240,255,255 + + .globl lj_ff_math_abs + .hidden lj_ff_math_abs + .type lj_ff_math_abs, @function + .size lj_ff_math_abs, 45 +lj_ff_math_abs: + .byte 131,248,2,15,130,158,11,0,0,72,139,42,73,137,235,73 + .byte 193,251,47,65,131,251,242,15,131,138,11,0,0,72,209,229 + .byte 72,209,237,72,139,90,248,72,137,106,240,235,39 + + .globl lj_ff_math_sqrt + .hidden lj_ff_math_sqrt + .type lj_ff_math_sqrt, @function + .size lj_ff_math_sqrt, 30 +lj_ff_math_sqrt: + .byte 131,248,2,15,130,113,11,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,96,11,0,0,242,15,81,2 + + .globl lj_fff_resxmm0 + .hidden lj_fff_resxmm0 + .type lj_fff_resxmm0, @function + .size lj_fff_resxmm0, 9 +lj_fff_resxmm0: + .byte 72,139,90,248,242,15,17,66,240 + + .globl lj_fff_res1 + .hidden lj_fff_res1 + .type lj_fff_res1, @function + .size lj_fff_res1, 5 +lj_fff_res1: + .byte 184,2,0,0,0 + + .globl lj_fff_res + .hidden lj_fff_res + .type lj_fff_res, @function + .size lj_fff_res, 3 +lj_fff_res: + .byte 137,4,36 + + .globl lj_fff_res_ + .hidden lj_fff_res_ + .type lj_fff_res_, @function + .size lj_fff_res_, 71 +lj_fff_res_: + .byte 247,195,3,0,0,0,117,51,56,67,255,119,31,15,182,75 + .byte 253,72,247,217,72,141,84,202,240,139,3,15,182,204,15,182 + .byte 232,72,131,195,4,193,232,16,65,255,36,238,72,199,68,194 + .byte 232,255,255,255,255,72,131,192,1,235,205,72,199,193,240,255 + .byte 255,255,233,145,239,255,255 + + .globl lj_ff_math_floor + .hidden lj_ff_math_floor + .type lj_ff_math_floor, @function + .size lj_ff_math_floor, 28 +lj_ff_math_floor: + .byte 76,139,26,73,193,251,47,65,131,251,242,15,131,243,10,0 + .byte 0,242,15,16,2,232,68,15,0,0,235,140 + + .globl lj_ff_math_ceil + .hidden lj_ff_math_ceil + .type lj_ff_math_ceil, @function + .size lj_ff_math_ceil, 31 +lj_ff_math_ceil: + .byte 76,139,26,73,193,251,47,65,131,251,242,15,131,215,10,0 + .byte 0,242,15,16,2,232,131,15,0,0,233,109,255,255,255 + + .globl lj_ff_math_log + .hidden lj_ff_math_log + .type lj_ff_math_log, @function + .size lj_ff_math_log, 46 +lj_ff_math_log: + .byte 131,248,2,15,133,192,10,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,175,10,0,0,242,15,16,2,72,137 + .byte 213 + call log@PLT + .byte 72,137,234,233,63,255,255,255 + + .globl lj_ff_math_log10 + .hidden lj_ff_math_log10 + .type lj_ff_math_log10, @function + .size lj_ff_math_log10, 46 +lj_ff_math_log10: + .byte 131,248,2,15,130,146,10,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,129,10,0,0,242,15,16,2,72,137 + .byte 213 + call log10@PLT + .byte 72,137,234,233,17,255,255,255 + + .globl lj_ff_math_exp + .hidden lj_ff_math_exp + .type lj_ff_math_exp, @function + .size lj_ff_math_exp, 46 +lj_ff_math_exp: + .byte 131,248,2,15,130,100,10,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,83,10,0,0,242,15,16,2,72,137 + .byte 213 + call exp@PLT + .byte 72,137,234,233,227,254,255,255 + + .globl lj_ff_math_sin + .hidden lj_ff_math_sin + .type lj_ff_math_sin, @function + .size lj_ff_math_sin, 46 +lj_ff_math_sin: + .byte 131,248,2,15,130,54,10,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,37,10,0,0,242,15,16,2,72,137 + .byte 213 + call sin@PLT + .byte 72,137,234,233,181,254,255,255 + + .globl lj_ff_math_cos + .hidden lj_ff_math_cos + .type lj_ff_math_cos, @function + .size lj_ff_math_cos, 46 +lj_ff_math_cos: + .byte 131,248,2,15,130,8,10,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,247,9,0,0,242,15,16,2,72,137 + .byte 213 + call cos@PLT + .byte 72,137,234,233,135,254,255,255 + + .globl lj_ff_math_tan + .hidden lj_ff_math_tan + .type lj_ff_math_tan, @function + .size lj_ff_math_tan, 46 +lj_ff_math_tan: + .byte 131,248,2,15,130,218,9,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,201,9,0,0,242,15,16,2,72,137 + .byte 213 + call tan@PLT + .byte 72,137,234,233,89,254,255,255 + + .globl lj_ff_math_asin + .hidden lj_ff_math_asin + .type lj_ff_math_asin, @function + .size lj_ff_math_asin, 46 +lj_ff_math_asin: + .byte 131,248,2,15,130,172,9,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,155,9,0,0,242,15,16,2,72,137 + .byte 213 + call asin@PLT + .byte 72,137,234,233,43,254,255,255 + + .globl lj_ff_math_acos + .hidden lj_ff_math_acos + .type lj_ff_math_acos, @function + .size lj_ff_math_acos, 46 +lj_ff_math_acos: + .byte 131,248,2,15,130,126,9,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,109,9,0,0,242,15,16,2,72,137 + .byte 213 + call acos@PLT + .byte 72,137,234,233,253,253,255,255 + + .globl lj_ff_math_atan + .hidden lj_ff_math_atan + .type lj_ff_math_atan, @function + .size lj_ff_math_atan, 46 +lj_ff_math_atan: + .byte 131,248,2,15,130,80,9,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,63,9,0,0,242,15,16,2,72,137 + .byte 213 + call atan@PLT + .byte 72,137,234,233,207,253,255,255 + + .globl lj_ff_math_sinh + .hidden lj_ff_math_sinh + .type lj_ff_math_sinh, @function + .size lj_ff_math_sinh, 46 +lj_ff_math_sinh: + .byte 131,248,2,15,130,34,9,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,17,9,0,0,242,15,16,2,72,137 + .byte 213 + call sinh@PLT + .byte 72,137,234,233,161,253,255,255 + + .globl lj_ff_math_cosh + .hidden lj_ff_math_cosh + .type lj_ff_math_cosh, @function + .size lj_ff_math_cosh, 46 +lj_ff_math_cosh: + .byte 131,248,2,15,130,244,8,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,227,8,0,0,242,15,16,2,72,137 + .byte 213 + call cosh@PLT + .byte 72,137,234,233,115,253,255,255 + + .globl lj_ff_math_tanh + .hidden lj_ff_math_tanh + .type lj_ff_math_tanh, @function + .size lj_ff_math_tanh, 46 +lj_ff_math_tanh: + .byte 131,248,2,15,130,198,8,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,181,8,0,0,242,15,16,2,72,137 + .byte 213 + call tanh@PLT + .byte 72,137,234,233,69,253,255,255 + + .globl lj_ff_math_pow + .hidden lj_ff_math_pow + .type lj_ff_math_pow, @function + .size lj_ff_math_pow, 69 +lj_ff_math_pow: + .byte 131,248,3,15,130,152,8,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,135,8,0,0,76,139,90,8,73,193 + .byte 251,47,65,131,251,242,15,131,117,8,0,0,242,15,16,2 + .byte 242,15,16,74,8,72,137,213 + call pow@PLT + .byte 72,137,234,233,0,253,255,255 + + .globl lj_ff_math_atan2 + .hidden lj_ff_math_atan2 + .type lj_ff_math_atan2, @function + .size lj_ff_math_atan2, 69 +lj_ff_math_atan2: + .byte 131,248,3,15,130,83,8,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,66,8,0,0,76,139,90,8,73,193 + .byte 251,47,65,131,251,242,15,131,48,8,0,0,242,15,16,2 + .byte 242,15,16,74,8,72,137,213 + call atan2@PLT + .byte 72,137,234,233,187,252,255,255 + + .globl lj_ff_math_fmod + .hidden lj_ff_math_fmod + .type lj_ff_math_fmod, @function + .size lj_ff_math_fmod, 69 +lj_ff_math_fmod: + .byte 131,248,3,15,130,14,8,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,253,7,0,0,76,139,90,8,73,193 + .byte 251,47,65,131,251,242,15,131,235,7,0,0,242,15,16,2 + .byte 242,15,16,74,8,72,137,213 + call fmod@PLT + .byte 72,137,234,233,118,252,255,255 + + .globl lj_ff_math_ldexp + .hidden lj_ff_math_ldexp + .type lj_ff_math_ldexp, @function + .size lj_ff_math_ldexp, 65 +lj_ff_math_ldexp: + .byte 131,248,3,15,130,201,7,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,184,7,0,0,76,139,90,8,73,193 + .byte 251,47,65,131,251,242,15,131,166,7,0,0,221,66,8,221 + .byte 2,217,253,221,217,72,139,90,248,221,90,240,233,62,252,255 + .byte 255 + + .globl lj_ff_math_frexp + .hidden lj_ff_math_frexp + .type lj_ff_math_frexp, @function + .size lj_ff_math_frexp, 76 +lj_ff_math_frexp: + .byte 131,248,2,15,130,136,7,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,119,7,0,0,242,15,16,2,72,137 + .byte 213,72,141,60,36 + call frexp@PLT + .byte 72,137,234,139,44,36,72,139,90,248,242,15,17,66,240,242 + .byte 15,42,205,242,15,17,74,248,184,3,0,0,0,233,247,251 + .byte 255,255 + + .globl lj_ff_math_modf + .hidden lj_ff_math_modf + .type lj_ff_math_modf, @function + .size lj_ff_math_modf, 64 +lj_ff_math_modf: + .byte 131,248,2,15,130,60,7,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,43,7,0,0,242,15,16,2,72,137 + .byte 213,72,141,122,240 + call modf@PLT + .byte 72,137,234,72,139,90,248,242,15,17,66,248,184,3,0,0 + .byte 0,233,183,251,255,255 + + .globl lj_ff_math_min + .hidden lj_ff_math_min + .type lj_ff_math_min, @function + .size lj_ff_math_min, 68 +lj_ff_math_min: + .byte 185,2,0,0,0,76,139,26,73,193,251,47,65,131,251,242 + .byte 15,131,239,6,0,0,242,15,16,2,57,193,15,131,135,251 + .byte 255,255,76,139,92,202,248,73,193,251,47,65,131,251,242,15 + .byte 131,208,6,0,0,242,15,16,76,202,248,242,15,93,193,131 + .byte 193,1,235,214 + + .globl lj_ff_math_max + .hidden lj_ff_math_max + .type lj_ff_math_max, @function + .size lj_ff_math_max, 68 +lj_ff_math_max: + .byte 185,2,0,0,0,76,139,26,73,193,251,47,65,131,251,242 + .byte 15,131,171,6,0,0,242,15,16,2,57,193,15,131,67,251 + .byte 255,255,76,139,92,202,248,73,193,251,47,65,131,251,242,15 + .byte 131,140,6,0,0,242,15,16,76,202,248,242,15,95,193,131 + .byte 193,1,235,214 + + .globl lj_ff_string_byte + .hidden lj_ff_string_byte + .type lj_ff_string_byte, @function + .size lj_ff_string_byte, 64 +lj_ff_string_byte: + .byte 131,248,2,15,133,116,6,0,0,72,139,42,73,137,235,72 + .byte 193,229,17,72,193,237,17,73,193,251,47,65,131,251,251,15 + .byte 133,88,6,0,0,72,139,90,248,131,125,16,1,15,130,206 + .byte 246,255,255,15,182,109,24,242,15,42,197,233,225,250,255,255 + + .globl lj_ff_string_char + .hidden lj_ff_string_char + .type lj_ff_string_char, @function + .size lj_ff_string_char, 76 +lj_ff_string_char: + .byte 73,139,174,192,240,255,255,73,59,174,200,240,255,255,114,5 + .byte 232,223,6,0,0,131,248,2,15,133,31,6,0,0,76,139 + .byte 26,73,193,251,47,65,131,251,242,15,131,14,6,0,0,242 + .byte 15,44,42,129,253,255,0,0,0,15,135,254,5,0,0,137 + .byte 44,36,65,186,1,0,0,0,72,141,4,36 + + .globl lj_fff_newstr + .hidden lj_fff_newstr + .type lj_fff_newstr, @function + .size lj_fff_newstr, 28 +lj_fff_newstr: + .byte 72,139,108,36,16,72,137,85,32,68,137,210,72,137,198,72 + .byte 137,239,72,137,92,36,24 + call lj_str_new + + .globl lj_fff_resstr + .hidden lj_fff_resstr + .type lj_fff_resstr, @function + .size lj_fff_resstr, 30 +lj_fff_resstr: + .byte 72,139,85,32,72,139,90,248,73,187,0,0,0,0,0,128 + .byte 253,255,76,9,216,72,137,66,240,233,100,250,255,255 + + .globl lj_ff_string_sub + .hidden lj_ff_string_sub + .type lj_ff_string_sub, @function + .size lj_ff_string_sub, 174 +lj_ff_string_sub: + .byte 73,139,174,192,240,255,255,73,59,174,200,240,255,255,114,5 + .byte 232,89,6,0,0,65,186,255,255,255,255,131,248,3,15,130 + .byte 147,5,0,0,118,24,76,139,90,16,73,193,251,47,65,131 + .byte 251,242,15,131,127,5,0,0,242,68,15,44,82,16,72,139 + .byte 42,73,137,235,72,193,229,17,72,193,237,17,73,193,251,47 + .byte 65,131,251,251,15,133,93,5,0,0,76,139,90,8,73,193 + .byte 251,47,65,131,251,242,15,131,75,5,0,0,242,15,44,74 + .byte 8,139,69,16,68,57,208,114,23,133,201,126,33,65,41,202 + .byte 124,44,72,141,68,13,23,65,131,194,1,233,54,255,255,255 + .byte 124,7,69,141,84,2,1,235,224,65,137,194,235,219,116,7 + .byte 1,193,131,193,1,127,214,185,1,0,0,0,235,207 + + .globl lj_fff_emptystr + .hidden lj_fff_emptystr + .type lj_fff_emptystr, @function + .size lj_fff_emptystr, 5 +lj_fff_emptystr: + .byte 69,49,210,235,216 + + .globl lj_ff_string_reverse + .hidden lj_ff_string_reverse + .type lj_ff_string_reverse, @function + .size lj_ff_string_reverse, 108 +lj_ff_string_reverse: + .byte 131,248,2,15,130,251,4,0,0,73,139,174,192,240,255,255 + .byte 73,59,174,200,240,255,255,114,5,232,157,5,0,0,72,139 + .byte 50,73,137,243,72,193,230,17,72,193,238,17,73,193,251,47 + .byte 65,131,251,251,15,133,202,4,0,0,72,139,108,36,16,73 + .byte 141,190,48,241,255,255,72,137,85,32,72,139,71,16,72,137 + .byte 111,24,72,137,7,72,137,92,36,24 + call lj_buf_putstr_reverse + .byte 72,137,199 + call lj_buf_tostr + .byte 233,195,254,255,255 + + .globl lj_ff_string_lower + .hidden lj_ff_string_lower + .type lj_ff_string_lower, @function + .size lj_ff_string_lower, 108 +lj_ff_string_lower: + .byte 131,248,2,15,130,143,4,0,0,73,139,174,192,240,255,255 + .byte 73,59,174,200,240,255,255,114,5,232,49,5,0,0,72,139 + .byte 50,73,137,243,72,193,230,17,72,193,238,17,73,193,251,47 + .byte 65,131,251,251,15,133,94,4,0,0,72,139,108,36,16,73 + .byte 141,190,48,241,255,255,72,137,85,32,72,139,71,16,72,137 + .byte 111,24,72,137,7,72,137,92,36,24 + call lj_buf_putstr_lower + .byte 72,137,199 + call lj_buf_tostr + .byte 233,87,254,255,255 + + .globl lj_ff_string_upper + .hidden lj_ff_string_upper + .type lj_ff_string_upper, @function + .size lj_ff_string_upper, 108 +lj_ff_string_upper: + .byte 131,248,2,15,130,35,4,0,0,73,139,174,192,240,255,255 + .byte 73,59,174,200,240,255,255,114,5,232,197,4,0,0,72,139 + .byte 50,73,137,243,72,193,230,17,72,193,238,17,73,193,251,47 + .byte 65,131,251,251,15,133,242,3,0,0,72,139,108,36,16,73 + .byte 141,190,48,241,255,255,72,137,85,32,72,139,71,16,72,137 + .byte 111,24,72,137,7,72,137,92,36,24 + call lj_buf_putstr_upper + .byte 72,137,199 + call lj_buf_tostr + .byte 233,235,253,255,255 + + .globl lj_ff_bit_tobit + .hidden lj_ff_bit_tobit + .type lj_ff_bit_tobit, @function + .size lj_ff_bit_tobit, 58 +lj_ff_bit_tobit: + .byte 131,248,2,15,130,183,3,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,166,3,0,0,242,15,16,2,72,189 + .byte 0,0,0,0,0,0,56,67,102,72,15,110,205,242,15,88 + .byte 193,102,15,126,197,233,177,1,0,0 + + .globl lj_ff_bit_band + .hidden lj_ff_bit_band + .type lj_ff_bit_band, @function + .size lj_ff_bit_band, 107 +lj_ff_bit_band: + .byte 131,248,2,15,130,125,3,0,0,72,189,0,0,0,0,0 + .byte 0,56,67,102,72,15,110,205,76,139,26,73,193,251,47,65 + .byte 131,251,242,15,131,93,3,0,0,242,15,16,2,242,15,88 + .byte 193,102,15,126,197,65,137,194,72,141,68,194,240,72,57,208 + .byte 15,134,107,1,0,0,76,139,24,73,193,251,47,65,131,251 + .byte 242,15,131,99,1,0,0,242,15,16,0,242,15,88,193,102 + .byte 15,126,193,33,205,72,131,232,8,235,210 + + .globl lj_ff_bit_bor + .hidden lj_ff_bit_bor + .type lj_ff_bit_bor, @function + .size lj_ff_bit_bor, 107 +lj_ff_bit_bor: + .byte 131,248,2,15,130,18,3,0,0,72,189,0,0,0,0,0 + .byte 0,56,67,102,72,15,110,205,76,139,26,73,193,251,47,65 + .byte 131,251,242,15,131,242,2,0,0,242,15,16,2,242,15,88 + .byte 193,102,15,126,197,65,137,194,72,141,68,194,240,72,57,208 + .byte 15,134,0,1,0,0,76,139,24,73,193,251,47,65,131,251 + .byte 242,15,131,248,0,0,0,242,15,16,0,242,15,88,193,102 + .byte 15,126,193,9,205,72,131,232,8,235,210 + + .globl lj_ff_bit_bxor + .hidden lj_ff_bit_bxor + .type lj_ff_bit_bxor, @function + .size lj_ff_bit_bxor, 107 +lj_ff_bit_bxor: + .byte 131,248,2,15,130,167,2,0,0,72,189,0,0,0,0,0 + .byte 0,56,67,102,72,15,110,205,76,139,26,73,193,251,47,65 + .byte 131,251,242,15,131,135,2,0,0,242,15,16,2,242,15,88 + .byte 193,102,15,126,197,65,137,194,72,141,68,194,240,72,57,208 + .byte 15,134,149,0,0,0,76,139,24,73,193,251,47,65,131,251 + .byte 242,15,131,141,0,0,0,242,15,16,0,242,15,88,193,102 + .byte 15,126,193,49,205,72,131,232,8,235,210 + + .globl lj_ff_bit_bswap + .hidden lj_ff_bit_bswap + .type lj_ff_bit_bswap, @function + .size lj_ff_bit_bswap, 57 +lj_ff_bit_bswap: + .byte 131,248,2,15,130,60,2,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,43,2,0,0,242,15,16,2,72,189 + .byte 0,0,0,0,0,0,56,67,102,72,15,110,205,242,15,88 + .byte 193,102,15,126,197,15,205,235,55 + + .globl lj_ff_bit_bnot + .hidden lj_ff_bit_bnot + .type lj_ff_bit_bnot, @function + .size lj_ff_bit_bnot, 55 +lj_ff_bit_bnot: + .byte 131,248,2,15,130,3,2,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,242,1,0,0,242,15,16,2,72,189 + .byte 0,0,0,0,0,0,56,67,102,72,15,110,205,242,15,88 + .byte 193,102,15,126,197,247,213 + + .globl lj_fff_resbit + .hidden lj_fff_resbit + .type lj_fff_resbit, @function + .size lj_fff_resbit, 9 +lj_fff_resbit: + .byte 242,15,42,197,233,112,246,255,255 + + .globl lj_fff_fallback_bit_op + .hidden lj_fff_fallback_bit_op + .type lj_fff_fallback_bit_op, @function + .size lj_fff_fallback_bit_op, 8 +lj_fff_fallback_bit_op: + .byte 68,137,208,233,196,1,0,0 + + .globl lj_ff_bit_lshift + .hidden lj_ff_bit_lshift + .type lj_ff_bit_lshift, @function + .size lj_ff_bit_lshift, 88 +lj_ff_bit_lshift: + .byte 131,248,3,15,130,187,1,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,170,1,0,0,76,139,90,8,73,193 + .byte 251,47,65,131,251,242,15,131,152,1,0,0,242,15,16,2 + .byte 242,15,16,74,8,72,189,0,0,0,0,0,0,56,67,102 + .byte 72,15,110,213,242,15,88,194,242,15,88,202,102,15,126,197 + .byte 102,15,126,201,211,229,235,151 + + .globl lj_ff_bit_rshift + .hidden lj_ff_bit_rshift + .type lj_ff_bit_rshift, @function + .size lj_ff_bit_rshift, 91 +lj_ff_bit_rshift: + .byte 131,248,3,15,130,99,1,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,82,1,0,0,76,139,90,8,73,193 + .byte 251,47,65,131,251,242,15,131,64,1,0,0,242,15,16,2 + .byte 242,15,16,74,8,72,189,0,0,0,0,0,0,56,67,102 + .byte 72,15,110,213,242,15,88,194,242,15,88,202,102,15,126,197 + .byte 102,15,126,201,211,237,233,60,255,255,255 + + .globl lj_ff_bit_arshift + .hidden lj_ff_bit_arshift + .type lj_ff_bit_arshift, @function + .size lj_ff_bit_arshift, 91 +lj_ff_bit_arshift: + .byte 131,248,3,15,130,8,1,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,247,0,0,0,76,139,90,8,73,193 + .byte 251,47,65,131,251,242,15,131,229,0,0,0,242,15,16,2 + .byte 242,15,16,74,8,72,189,0,0,0,0,0,0,56,67,102 + .byte 72,15,110,213,242,15,88,194,242,15,88,202,102,15,126,197 + .byte 102,15,126,201,211,253,233,225,254,255,255 + + .globl lj_ff_bit_rol + .hidden lj_ff_bit_rol + .type lj_ff_bit_rol, @function + .size lj_ff_bit_rol, 91 +lj_ff_bit_rol: + .byte 131,248,3,15,130,173,0,0,0,76,139,26,73,193,251,47 + .byte 65,131,251,242,15,131,156,0,0,0,76,139,90,8,73,193 + .byte 251,47,65,131,251,242,15,131,138,0,0,0,242,15,16,2 + .byte 242,15,16,74,8,72,189,0,0,0,0,0,0,56,67,102 + .byte 72,15,110,213,242,15,88,194,242,15,88,202,102,15,126,197 + .byte 102,15,126,201,211,197,233,134,254,255,255 + + .globl lj_ff_bit_ror + .hidden lj_ff_bit_ror + .type lj_ff_bit_ror, @function + .size lj_ff_bit_ror, 79 +lj_ff_bit_ror: + .byte 131,248,3,114,86,76,139,26,73,193,251,47,65,131,251,242 + .byte 115,73,76,139,90,8,73,193,251,47,65,131,251,242,115,59 + .byte 242,15,16,2,242,15,16,74,8,72,189,0,0,0,0,0 + .byte 0,56,67,102,72,15,110,213,242,15,88,194,242,15,88,202 + .byte 102,15,126,197,102,15,126,201,211,205,233,55,254,255,255 + + .globl lj_fff_fallback_2 + .hidden lj_fff_fallback_2 + .type lj_fff_fallback_2, @function + .size lj_fff_fallback_2, 7 +lj_fff_fallback_2: + .byte 184,3,0,0,0,235,5 + + .globl lj_fff_fallback_1 + .hidden lj_fff_fallback_1 + .type lj_fff_fallback_1, @function + .size lj_fff_fallback_1, 5 +lj_fff_fallback_1: + .byte 184,2,0,0,0 + + .globl lj_fff_fallback + .hidden lj_fff_fallback + .type lj_fff_fallback, @function + .size lj_fff_fallback, 119 +lj_fff_fallback: + .byte 72,139,108,36,16,72,139,90,248,72,137,92,36,24,72,137 + .byte 85,32,72,141,68,194,248,72,141,136,160,0,0,0,72,137 + .byte 69,40,72,139,66,240,72,193,224,17,72,193,232,17,72,59 + .byte 77,48,119,110,72,137,239,255,80,40,72,139,85,32,133,192 + .byte 15,143,108,244,255,255,72,139,77,40,72,41,209,193,233,3 + .byte 133,192,141,65,1,72,139,106,240,117,28,72,193,229,17,72 + .byte 193,237,17,72,139,93,32,139,11,15,182,233,15,182,205,72 + .byte 131,195,4,65,255,36,238 + + .globl lj_vm_call_tail + .hidden lj_vm_call_tail + .type lj_vm_call_tail, @function + .size lj_vm_call_tail, 64 +lj_vm_call_tail: + .byte 72,137,209,247,195,3,0,0,0,117,17,15,182,107,253,72 + .byte 247,221,72,141,84,234,240,233,160,230,255,255,72,137,221,72 + .byte 131,229,248,72,41,234,233,145,230,255,255,190,20,0,0,0 + .byte 72,137,239 + call lj_state_growstack + .byte 72,139,85,32,49,192,235,143 + + .globl lj_fff_gcstep + .hidden lj_fff_gcstep + .type lj_fff_gcstep, @function + .size lj_fff_gcstep, 59 +lj_fff_gcstep: + .byte 93,72,137,44,36,72,139,108,36,16,72,137,92,36,24,72 + .byte 137,85,32,72,141,68,194,248,72,137,239,72,137,69,40 + call lj_gc_step + .byte 72,139,85,32,72,139,69,40,72,41,208,193,232,3,131,192 + .byte 1,72,139,44,36,85,195 + + .globl lj_vm_record + .hidden lj_vm_record + .type lj_vm_record, @function + .size lj_vm_record, 17 +lj_vm_record: + .byte 168,16,117,56,168,12,116,52,65,255,142,216,241,255,255,235 + .byte 43 + + .globl lj_vm_rethook + .hidden lj_vm_rethook + .type lj_vm_rethook, @function + .size lj_vm_rethook, 14 +lj_vm_rethook: + .byte 65,15,182,134,105,241,255,255,168,16,117,59,235,29 + + .globl lj_vm_inshook + .hidden lj_vm_inshook + .type lj_vm_inshook, @function + .size lj_vm_inshook, 73 +lj_vm_inshook: + .byte 65,15,182,134,105,241,255,255,168,16,117,45,168,12,116,41 + .byte 65,255,142,216,241,255,255,116,4,168,4,116,28,72,139,108 + .byte 36,16,72,137,85,32,72,137,222,72,137,239 + call lj_dispatch_ins + .byte 72,139,85,32,15,182,75,253,15,182,107,252,15,183,67,254 + .byte 65,255,164,238,208,4,0,0 + + .globl lj_cont_hook + .hidden lj_cont_hook + .type lj_cont_hook, @function + .size lj_cont_hook, 13 +lj_cont_hook: + .byte 72,131,195,4,72,139,77,216,137,12,36,235,223 + + .globl lj_vm_hotloop + .hidden lj_vm_hotloop + .type lj_vm_hotloop, @function + .size lj_vm_hotloop, 66 +lj_vm_hotloop: + .byte 72,139,106,240,72,193,229,17,72,193,237,17,72,139,109,32 + .byte 15,182,69,155,72,141,4,194,72,139,108,36,16,72,137,85 + .byte 32,72,137,69,40,72,137,222,73,141,190,72,243,255,255,73 + .byte 137,174,216,243,255,255,72,137,92,36,24 + call lj_trace_hot + .byte 235,153 + + .globl lj_vm_callhook + .hidden lj_vm_callhook + .type lj_vm_callhook, @function + .size lj_vm_callhook, 7 +lj_vm_callhook: + .byte 72,137,92,36,24,235,9 + + .globl lj_vm_hotcall + .hidden lj_vm_hotcall + .type lj_vm_hotcall, @function + .size lj_vm_hotcall, 80 +lj_vm_hotcall: + .byte 72,137,92,36,24,72,131,203,1,72,141,68,194,248,72,139 + .byte 108,36,16,72,137,85,32,72,137,69,40,72,137,222,72,137 + .byte 239 + call lj_dispatch_call + .byte 72,199,68,36,24,0,0,0,0,72,131,227,254,72,139,85 + .byte 32,72,137,193,72,139,69,40,72,41,208,72,137,205,15,182 + .byte 75,253,193,232,3,131,192,1,255,229 + + .globl lj_cont_stitch + .hidden lj_cont_stitch + .type lj_cont_stitch, @function + .size lj_cont_stitch, 166 +lj_cont_stitch: + .byte 76,139,93,216,73,193,227,17,73,193,235,17,68,139,20,36 + .byte 15,182,75,253,72,141,12,202,65,131,234,1,116,20,72,139 + .byte 40,72,137,41,72,131,192,8,72,131,193,8,65,131,234,1 + .byte 117,236,15,182,67,253,15,182,107,255,72,1,232,72,141,68 + .byte 194,248,72,57,200,119,82,77,133,219,15,132,121,231,255,255 + .byte 65,15,183,107,116,65,15,183,67,118,57,232,15,132,103,231 + .byte 255,255,133,192,15,133,153,223,255,255,73,137,174,52,255,255 + .byte 255,72,139,108,36,16,72,137,85,32,72,137,222,73,141,190 + .byte 72,243,255,255,73,137,174,216,243,255,255 + call lj_dispatch_stitch + .byte 72,139,85,32,233,48,231,255,255,72,199,1,255,255,255,255 + .byte 72,131,193,8,235,156 + + .globl lj_vm_exit_handler + .hidden lj_vm_exit_handler + .type lj_vm_exit_handler, @function + .size lj_vm_exit_handler, 251 +lj_vm_exit_handler: + .byte 65,85,65,84,65,83,65,82,65,81,65,80,87,86,85,72 + .byte 141,108,36,88,85,83,82,81,80,15,182,69,248,138,101,240 + .byte 76,137,125,248,76,137,117,240,65,139,142,32,241,255,255,65 + .byte 199,134,32,241,255,255,252,255,255,255,65,137,134,52,255,255 + .byte 255,65,137,142,48,255,255,255,65,137,142,40,241,255,255,72 + .byte 129,236,128,0,0,0,72,131,197,128,242,68,15,17,125,248 + .byte 242,68,15,17,117,240,242,68,15,17,109,232,242,68,15,17 + .byte 101,224,242,68,15,17,93,216,242,68,15,17,85,208,242,68 + .byte 15,17,77,200,242,68,15,17,69,192,242,15,17,125,184,242 + .byte 15,17,117,176,242,15,17,109,168,242,15,17,101,160,242,15 + .byte 17,93,152,242,15,17,85,144,242,15,17,77,136,242,15,17 + .byte 69,128,73,139,174,0,242,255,255,73,139,150,8,242,255,255 + .byte 73,137,174,216,243,255,255,72,137,85,32,72,137,230,73,141 + .byte 190,72,243,255,255,73,199,134,8,242,255,255,0,0,0,0 + call lj_trace_exit + .byte 72,139,77,80,72,131,225,252,72,137,105,16,72,139,85,32 + .byte 72,139,89,24,235,19 + + .globl lj_vm_exit_interp + .hidden lj_vm_exit_interp + .type lj_vm_exit_interp, @function + .size lj_vm_exit_interp, 14 +lj_vm_exit_interp: + .byte 69,139,150,32,241,255,255,69,137,150,40,241,255,255 + + .globl lj_vm_exit_interp_notrack + .hidden lj_vm_exit_interp_notrack + .type lj_vm_exit_interp_notrack, @function + .size lj_vm_exit_interp_notrack, 171 +lj_vm_exit_interp_notrack: + .byte 72,141,76,36,16,76,139,105,248,76,139,33,72,137,204,133 + .byte 192,15,136,134,0,0,0,72,139,108,36,16,137,4,36,76 + .byte 139,122,240,73,193,231,17,73,193,239,17,77,139,127,32,77 + .byte 139,127,176,72,137,85,32,73,199,134,8,242,255,255,0,0 + .byte 0,0,69,139,150,32,241,255,255,65,199,134,32,241,255,255 + .byte 255,255,255,255,139,3,15,182,204,15,182,232,72,131,195,4 + .byte 193,232,16,131,253,89,114,8,131,253,97,115,7,139,4,36 + .byte 65,255,36,238,72,139,66,248,169,3,0,0,0,117,238,15 + .byte 182,64,253,72,247,216,76,139,124,194,224,73,193,231,17,73 + .byte 193,239,17,77,139,127,32,77,139,127,176,235,208,72,247,216 + .byte 72,137,239,72,137,198 + call lj_err_throw + + .globl lj_vm_floor_sse + .hidden lj_vm_floor_sse + .type lj_vm_floor_sse, @function + .size lj_vm_floor_sse, 0 +lj_vm_floor_sse: + + .globl lj_vm_floor + .hidden lj_vm_floor + .type lj_vm_floor, @function + .size lj_vm_floor, 91 +lj_vm_floor: + .byte 72,184,255,255,255,255,255,255,255,127,102,72,15,110,208,72 + .byte 184,0,0,0,0,0,0,48,67,102,72,15,110,216,15,40 + .byte 200,102,15,84,202,102,15,46,217,118,47,102,15,85,208,242 + .byte 15,88,203,242,15,92,203,102,15,86,202,72,184,0,0,0 + .byte 0,0,0,240,63,102,72,15,110,208,242,15,194,193,1,102 + .byte 15,84,194,242,15,92,200,15,40,193,195 + + .globl lj_vm_ceil_sse + .hidden lj_vm_ceil_sse + .type lj_vm_ceil_sse, @function + .size lj_vm_ceil_sse, 0 +lj_vm_ceil_sse: + + .globl lj_vm_ceil + .hidden lj_vm_ceil + .type lj_vm_ceil, @function + .size lj_vm_ceil, 91 +lj_vm_ceil: + .byte 72,184,255,255,255,255,255,255,255,127,102,72,15,110,208,72 + .byte 184,0,0,0,0,0,0,48,67,102,72,15,110,216,15,40 + .byte 200,102,15,84,202,102,15,46,217,118,47,102,15,85,208,242 + .byte 15,88,203,242,15,92,203,102,15,86,202,72,184,0,0,0 + .byte 0,0,0,240,191,102,72,15,110,208,242,15,194,193,6,102 + .byte 15,84,194,242,15,92,200,15,40,193,195 + + .globl lj_vm_trunc + .hidden lj_vm_trunc + .type lj_vm_trunc, @function + .size lj_vm_trunc, 0 +lj_vm_trunc: + + .globl lj_vm_trunc_sse + .hidden lj_vm_trunc_sse + .type lj_vm_trunc_sse, @function + .size lj_vm_trunc_sse, 94 +lj_vm_trunc_sse: + .byte 72,184,255,255,255,255,255,255,255,127,102,72,15,110,208,72 + .byte 184,0,0,0,0,0,0,48,67,102,72,15,110,216,15,40 + .byte 200,102,15,84,202,102,15,46,217,118,50,102,15,85,208,15 + .byte 40,193,242,15,88,203,242,15,92,203,72,184,0,0,0,0 + .byte 0,0,240,63,102,72,15,110,216,242,15,194,193,1,102,15 + .byte 84,195,242,15,92,200,102,15,86,202,15,40,193,195 + + .globl lj_vm_mod + .hidden lj_vm_mod + .type lj_vm_mod, @function + .size lj_vm_mod, 118 +lj_vm_mod: + .byte 15,40,232,242,15,94,193,72,184,255,255,255,255,255,255,255 + .byte 127,102,72,15,110,208,72,184,0,0,0,0,0,0,48,67 + .byte 102,72,15,110,216,15,40,224,102,15,84,226,102,15,46,220 + .byte 118,56,102,15,85,208,242,15,88,227,242,15,92,227,102,15 + .byte 86,226,72,184,0,0,0,0,0,0,240,63,102,72,15,110 + .byte 208,242,15,194,196,1,102,15,84,194,242,15,92,224,15,40 + .byte 197,242,15,89,204,242,15,92,193,195,242,15,89,200,15,40 + .byte 197,242,15,92,193,195 + + .globl lj_vm_powi_sse + .hidden lj_vm_powi_sse + .type lj_vm_powi_sse, @function + .size lj_vm_powi_sse, 98 +lj_vm_powi_sse: + .byte 131,248,1,126,43,169,1,0,0,0,117,8,242,15,89,192 + .byte 209,232,235,241,209,232,116,23,15,40,200,242,15,89,192,209 + .byte 232,116,8,115,246,242,15,89,200,235,240,242,15,89,193,195 + .byte 116,253,114,30,247,216,232,202,255,255,255,72,184,0,0,0 + .byte 0,0,0,240,63,102,72,15,110,200,242,15,94,200,15,40 + .byte 193,195,72,184,0,0,0,0,0,0,240,63,102,72,15,110 + .byte 192,195 + + .globl lj_vm_cpuid + .hidden lj_vm_cpuid + .type lj_vm_cpuid, @function + .size lj_vm_cpuid, 20 +lj_vm_cpuid: + .byte 137,248,83,49,201,15,162,137,6,137,94,4,137,78,8,137 + .byte 86,12,91,195 + + .globl lj_assert_bad_for_arg_type + .hidden lj_assert_bad_for_arg_type + .type lj_assert_bad_for_arg_type, @function + .size lj_assert_bad_for_arg_type, 1 +lj_assert_bad_for_arg_type: + .byte 204 + + .globl lj_vm_ffi_callback + .hidden lj_vm_ffi_callback + .type lj_vm_ffi_callback, @function + .size lj_vm_ffi_callback, 205 +lj_vm_ffi_callback: + .byte 83,65,87,65,86,65,85,65,84,72,131,236,40,76,141,181 + .byte 96,15,0,0,72,139,157,112,1,0,0,15,183,192,137,131 + .byte 216,0,0,0,72,137,123,120,72,137,179,128,0,0,0,72 + .byte 137,147,136,0,0,0,72,137,139,144,0,0,0,242,15,17 + .byte 67,56,242,15,17,75,64,242,15,17,83,72,242,15,17,91 + .byte 80,72,141,68,36,96,76,137,131,152,0,0,0,76,137,139 + .byte 160,0,0,0,242,15,17,99,88,242,15,17,107,96,242,15 + .byte 17,115,104,242,15,17,123,112,72,137,131,184,0,0,0,72 + .byte 137,230,72,137,92,36,24,72,137,223 + call lj_ccallback_enter + .byte 65,199,134,32,241,255,255,255,255,255,255,72,139,80,32,72 + .byte 139,64,40,72,41,208,72,139,106,240,72,193,229,17,72,193 + .byte 237,17,72,193,232,3,72,131,192,1,72,139,93,32,139,11 + .byte 15,182,233,15,182,205,72,131,195,4,65,255,36,238 + + .globl lj_cont_ffi_callback + .hidden lj_cont_ffi_callback + .type lj_cont_ffi_callback, @function + .size lj_cont_ffi_callback, 49 +lj_cont_ffi_callback: + .byte 72,139,76,36,16,73,139,158,16,242,255,255,72,137,75,24 + .byte 72,137,81,32,72,137,105,40,72,137,223,72,137,198 + call lj_ccallback_leave + .byte 72,139,67,120,242,15,16,67,56,233,146,221,255,255 + + .globl lj_vm_ffi_call + .hidden lj_vm_ffi_call + .type lj_vm_ffi_call, @function + .size lj_vm_ffi_call, 160 +lj_vm_ffi_call: + .byte 85,72,137,229,83,72,137,251,139,67,8,72,41,196,15,182 + .byte 75,12,131,233,1,120,17,72,139,132,203,192,0,0,0,72 + .byte 137,4,204,131,233,1,121,239,15,182,67,15,72,139,187,144 + .byte 0,0,0,72,139,179,152,0,0,0,72,139,147,160,0,0 + .byte 0,72,139,139,168,0,0,0,76,139,131,176,0,0,0,76 + .byte 139,139,184,0,0,0,133,192,116,40,15,40,67,16,15,40 + .byte 75,32,15,40,83,48,15,40,91,64,131,248,4,118,19,15 + .byte 40,99,80,15,40,107,96,15,40,115,112,15,40,187,128,0 + .byte 0,0,255,19,72,137,131,144,0,0,0,15,41,67,16,72 + .byte 137,147,152,0,0,0,15,41,75,32,72,139,93,248,201,195 + + .section .note.GNU-stack,"",@progbits + .ident "DynASM 1.4.0" + + .section .debug_frame,"",@progbits +.Lframe0: + .long .LECIE0-.LSCIE0 +.LSCIE0: + .long 0xffffffff + .byte 0x1 + .string "" + .uleb128 0x1 + .sleb128 -8 + .byte 0x10 + .byte 0xc + .uleb128 0x7 + .uleb128 8 + .byte 0x80+0x10 + .uleb128 0x1 + .align 8 +.LECIE0: + +.LSFDE0: + .long .LEFDE0-.LASFDE0 +.LASFDE0: + .long .Lframe0 + .quad .Lbegin + .quad 16432 + .byte 0xe + .uleb128 96 + .byte 0x86 + .uleb128 0x2 + .byte 0x83 + .uleb128 0x3 + .byte 0x8f + .uleb128 0x4 + .byte 0x8e + .uleb128 0x5 + .byte 0x8d + .uleb128 0x6 + .byte 0x8c + .uleb128 0x7 + .align 8 +.LEFDE0: + +.LSFDE1: + .long .LEFDE1-.LASFDE1 +.LASFDE1: + .long .Lframe0 + .quad lj_vm_ffi_call + .quad 160 + .byte 0xe + .uleb128 16 + .byte 0x86 + .uleb128 0x2 + .byte 0xd + .uleb128 0x6 + .byte 0x83 + .uleb128 0x3 + .align 8 +.LEFDE1: + diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc deleted file mode 100644 index 780cc16e6d..0000000000 --- a/src/vm_arm.dasc +++ /dev/null @@ -1,4593 +0,0 @@ -|// Low-level VM code for ARM CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -| -|.arch arm -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// Fixed register assignments for the interpreter. -| -|// The following must be C callee-save. -|.define MASKR8, r4 // 255*8 constant for fast bytecode decoding. -|.define KBASE, r5 // Constants of current Lua function. -|.define PC, r6 // Next PC. -|.define DISPATCH, r7 // Opcode dispatch table. -|.define LREG, r8 // Register holding lua_State (also in SAVE_L). -| -|// C callee-save in EABI, but often refetched. Temporary in iOS 3.0+. -|.define BASE, r9 // Base of current Lua stack frame. -| -|// The following temporaries are not saved across C calls, except for RA/RC. -|.define RA, r10 // Callee-save. -|.define RC, r11 // Callee-save. -|.define RB, r12 -|.define OP, r12 // Overlaps RB, must not be lr. -|.define INS, lr -| -|// Calling conventions. Also used as temporaries. -|.define CARG1, r0 -|.define CARG2, r1 -|.define CARG3, r2 -|.define CARG4, r3 -|.define CARG12, r0 // For 1st soft-fp double. -|.define CARG34, r2 // For 2nd soft-fp double. -| -|.define CRET1, r0 -|.define CRET2, r1 -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|.define SAVE_R4, [sp, #28] -|.define CFRAME_SPACE, #28 -|.define SAVE_ERRF, [sp, #24] -|.define SAVE_NRES, [sp, #20] -|.define SAVE_CFRAME, [sp, #16] -|.define SAVE_L, [sp, #12] -|.define SAVE_PC, [sp, #8] -|.define SAVE_MULTRES, [sp, #4] -|.define ARG5, [sp] -| -|.define TMPDhi, [sp, #4] -|.define TMPDlo, [sp] -|.define TMPD, [sp] -|.define TMPDp, sp -| -|.if FPU -|.macro saveregs -| push {r5, r6, r7, r8, r9, r10, r11, lr} -| vpush {d8-d15} -| sub sp, sp, CFRAME_SPACE+4 -| str r4, SAVE_R4 -|.endmacro -|.macro restoreregs_ret -| ldr r4, SAVE_R4 -| add sp, sp, CFRAME_SPACE+4 -| vpop {d8-d15} -| pop {r5, r6, r7, r8, r9, r10, r11, pc} -|.endmacro -|.else -|.macro saveregs -| push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -| sub sp, sp, CFRAME_SPACE -|.endmacro -|.macro restoreregs_ret -| add sp, sp, CFRAME_SPACE -| pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -|.endmacro -|.endif -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; ud; .endmacro -| -|//----------------------------------------------------------------------- -| -|// Access to frame relative to BASE. -|.define FRAME_FUNC, #-8 -|.define FRAME_PC, #-4 -| -|.macro decode_RA8, dst, ins; and dst, MASKR8, ins, lsr #5; .endmacro -|.macro decode_RB8, dst, ins; and dst, MASKR8, ins, lsr #21; .endmacro -|.macro decode_RC8, dst, ins; and dst, MASKR8, ins, lsr #13; .endmacro -|.macro decode_RD, dst, ins; lsr dst, ins, #16; .endmacro -|.macro decode_OP, dst, ins; and dst, ins, #255; .endmacro -| -|// Instruction fetch. -|.macro ins_NEXT1 -| ldrb OP, [PC] -|.endmacro -|.macro ins_NEXT2 -| ldr INS, [PC], #4 -|.endmacro -|// Instruction decode+dispatch. -|.macro ins_NEXT3 -| ldr OP, [DISPATCH, OP, lsl #2] -| decode_RA8 RA, INS -| decode_RD RC, INS -| bx OP -|.endmacro -|.macro ins_NEXT -| ins_NEXT1 -| ins_NEXT2 -| ins_NEXT3 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -| .define ins_next1, ins_NEXT1 -| .define ins_next2, ins_NEXT2 -| .define ins_next3, ins_NEXT3 -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next1 -| .endmacro -| .macro ins_next2 -| .endmacro -| .macro ins_next3 -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Avoid register name substitution for field name. -#define field_pc pc -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| ldr PC, LFUNC:CARG3->field_pc -| ldrb OP, [PC] // STALL: load PC. early PC. -| ldr INS, [PC], #4 -| ldr OP, [DISPATCH, OP, lsl #2] // STALL: load OP. early OP. -| decode_RA8 RA, INS -| add RA, RA, BASE -| bx OP -|.endmacro -| -|.macro ins_call -| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| str PC, [BASE, FRAME_PC] -| ins_callt // STALL: locked PC. -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Macros to test operand types. -|.macro checktp, reg, tp; cmn reg, #-tp; .endmacro -|.macro checktpeq, reg, tp; cmneq reg, #-tp; .endmacro -|.macro checktpne, reg, tp; cmnne reg, #-tp; .endmacro -|.macro checkstr, reg, target; checktp reg, LJ_TSTR; bne target; .endmacro -|.macro checktab, reg, target; checktp reg, LJ_TTAB; bne target; .endmacro -|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC; bne target; .endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro hotcheck, delta -| lsr CARG1, PC, #1 -| and CARG1, CARG1, #126 -| sub CARG1, CARG1, #-GG_DISP2HOT -| ldrh CARG2, [DISPATCH, CARG1] -| subs CARG2, CARG2, #delta -| strh CARG2, [DISPATCH, CARG1] -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP -| blo ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL -| blo ->vm_hotcall -|.endmacro -| -|// Set current VM state. -|.macro mv_vmstate, reg, st; mvn reg, #LJ_VMST_..st; .endmacro -|.macro st_vmstate, reg; str reg, [DISPATCH, #DISPATCH_GL(vmstate)]; .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp -| ldr tmp, [DISPATCH, #DISPATCH_GL(gc.grayagain)] -| bic mark, mark, #LJ_GC_BLACK // black2gray(tab) -| str tab, [DISPATCH, #DISPATCH_GL(gc.grayagain)] -| strb mark, tab->marked -| str tmp, tab->gclist -|.endmacro -| -|.macro .IOS, a, b -|.if IOS -| a, b -|.endif -|.endmacro -| -|//----------------------------------------------------------------------- - -#if !LJ_DUALNUM -#error "Only dual-number mode supported for ARM target" -#endif - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: RB = previous base. - | tst PC, #FRAME_P - | beq ->cont_dispatch - | - | // Return from pcall or xpcall fast func. - | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame. - | mvn CARG2, #~LJ_TTRUE - | mov BASE, RB - | // Prepending may overwrite the pcall frame, so do it at the end. - | str CARG2, [RA, FRAME_PC] // Prepend true to results. - | sub RA, RA, #8 - | - |->vm_returnc: - | adds RC, RC, #8 // RC = (nresults+1)*8. - | mov CRET1, #LUA_YIELD - | beq ->vm_unwind_c_eh - | str RC, SAVE_MULTRES - | ands CARG1, PC, #FRAME_TYPE - | beq ->BC_RET_Z // Handle regular return to Lua. - | - |->vm_return: - | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return - | // CARG1 = PC & FRAME_TYPE - | bic RB, PC, #FRAME_TYPEP - | cmp CARG1, #FRAME_C - | sub RB, BASE, RB // RB = previous base. - | bne ->vm_returnp - | - | str RB, L->base - | ldr KBASE, SAVE_NRES - | mv_vmstate CARG4, C - | sub BASE, BASE, #8 - | subs CARG3, RC, #8 - | lsl KBASE, KBASE, #3 // KBASE = (nresults_wanted+1)*8 - | st_vmstate CARG4 - | beq >2 - |1: - | subs CARG3, CARG3, #8 - | ldrd CARG12, [RA], #8 - | strd CARG12, [BASE], #8 - | bne <1 - |2: - | cmp KBASE, RC // More/less results wanted? - | bne >6 - |3: - | str BASE, L->top // Store new top. - | - |->vm_leave_cp: - | ldr RC, SAVE_CFRAME // Restore previous C frame. - | mov CRET1, #0 // Ok return status for vm_pcall. - | str RC, L->cframe - | - |->vm_leave_unw: - | restoreregs_ret - | - |6: - | blt >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - | ldr CARG3, L->maxstack - | mvn CARG2, #~LJ_TNIL - | cmp BASE, CARG3 - | bhs >8 - | str CARG2, [BASE, #4] - | add RC, RC, #8 - | add BASE, BASE, #8 - | b <2 - | - |7: // Less results wanted. - | sub CARG1, RC, KBASE - | cmp KBASE, #0 // LUA_MULTRET+1 case? - | subne BASE, BASE, CARG1 // Either keep top or shrink it. - | b <3 - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | str BASE, L->top // Save current top held in BASE (yes). - | lsr CARG2, KBASE, #3 - | mov CARG1, L - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->top // Need the (realloced) L->top in BASE. - | b <2 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | mov sp, CARG1 - | mov CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | ldr L, SAVE_L - | mv_vmstate CARG4, C - | ldr GL:CARG3, L->glref - | str CARG4, GL:CARG3->vmstate - | b ->vm_leave_unw - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - | bic CARG1, CARG1, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated. - | mov sp, CARG1 - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | ldr L, SAVE_L - | mov MASKR8, #255 - | mov RC, #16 // 2 results: false + error message. - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | ldr BASE, L->base - | ldr DISPATCH, L->glref // Setup pointer to dispatch table. - | mvn CARG1, #~LJ_TFALSE - | sub RA, BASE, #8 // Results start at BASE-8. - | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame. - | add DISPATCH, DISPATCH, #GG_G2DISP - | mv_vmstate CARG2, INTERP - | str CARG1, [BASE, #-4] // Prepend false to error message. - | st_vmstate CARG2 - | b ->vm_returnc - | - |->vm_unwind_ext: // Complete external unwind. -#if !LJ_NO_UNWIND - | push {r0, r1, r2, lr} - | bl extern _Unwind_Complete - | ldr r0, [sp] - | bl extern _Unwind_DeleteException - | pop {r0, r1, r2, lr} - | mov r0, r1 - | bx r2 -#endif - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | // CARG1 = L - | mov CARG2, #LUA_MINSTACK - | b >2 - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | add RC, BASE, RC - | sub RA, RA, BASE - | mov CARG1, L - | str BASE, L->base - | add PC, PC, #4 // Must point after first instruction. - | str RC, L->top - | lsr CARG2, RA, #3 - |2: - | // L->base = new base, L->top = top - | str PC, SAVE_PC - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->base - | ldr RC, L->top - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | sub NARGS8:RC, RC, BASE - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | mov L, CARG1 - | ldr DISPATCH, L:CARG1->glref // Setup pointer to dispatch table. - | mov BASE, CARG2 - | add DISPATCH, DISPATCH, #GG_G2DISP - | str L, SAVE_L - | mov PC, #FRAME_CP - | str CARG3, SAVE_NRES - | add CARG2, sp, #CFRAME_RESUME - | ldrb CARG1, L->status - | str CARG3, SAVE_ERRF - | str L, SAVE_PC // Any value outside of bytecode is ok. - | str CARG3, SAVE_CFRAME - | cmp CARG1, #0 - | str CARG2, L->cframe - | beq >3 - | - | // Resume after yield (like a return). - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | mov RA, BASE - | ldr BASE, L->base - | ldr CARG1, L->top - | mov MASKR8, #255 - | strb CARG3, L->status - | sub RC, CARG1, BASE - | ldr PC, [BASE, FRAME_PC] - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | mv_vmstate CARG2, INTERP - | add RC, RC, #8 - | ands CARG1, PC, #FRAME_TYPE - | st_vmstate CARG2 - | str RC, SAVE_MULTRES - | beq ->BC_RET_Z - | b ->vm_return - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | mov PC, #FRAME_CP - | str CARG4, SAVE_ERRF - | b >1 - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | mov PC, #FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | ldr RC, L:CARG1->cframe - | str CARG3, SAVE_NRES - | mov L, CARG1 - | str CARG1, SAVE_L - | ldr DISPATCH, L->glref // Setup pointer to dispatch table. - | mov BASE, CARG2 - | str CARG1, SAVE_PC // Any value outside of bytecode is ok. - | str RC, SAVE_CFRAME - | add DISPATCH, DISPATCH, #GG_G2DISP - | str sp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | ldr RB, L->base // RB = old base (for vmeta_call). - | ldr CARG1, L->top - | mov MASKR8, #255 - | add PC, PC, BASE - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | sub PC, PC, RB // PC = frame delta + frame type - | mv_vmstate CARG2, INTERP - | sub NARGS8:RC, CARG1, BASE - | st_vmstate CARG2 - | - |->vm_call_dispatch: - | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC - | ldrd CARG34, [BASE, FRAME_FUNC] - | checkfunc CARG4, ->vmeta_call - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | mov L, CARG1 - | ldr RA, L:CARG1->stack - | str CARG1, SAVE_L - | ldr DISPATCH, L->glref // Setup pointer to dispatch table. - | ldr RB, L->top - | str CARG1, SAVE_PC // Any value outside of bytecode is ok. - | ldr RC, L->cframe - | add DISPATCH, DISPATCH, #GG_G2DISP - | sub RA, RA, RB // Compute -savestack(L, L->top). - | mov RB, #0 - | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. - | str RB, SAVE_ERRF // No error function. - | str RC, SAVE_CFRAME - | str sp, L->cframe // Add our C frame to cframe chain. - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) - | movs BASE, CRET1 - | mov PC, #FRAME_CP - | bne <3 // Else continue with the call. - | b ->vm_leave_cp // No base? Just remove C frame. - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8 - | ldr LFUNC:CARG3, [RB, FRAME_FUNC] - | ldr CARG1, [BASE, #-16] // Get continuation. - | mov CARG4, BASE - | mov BASE, RB // Restore caller BASE. - |.if FFI - | cmp CARG1, #1 - |.endif - | ldr PC, [CARG4, #-12] // Restore PC from [cont|PC]. - | ldr CARG3, LFUNC:CARG3->field_pc - | mvn INS, #~LJ_TNIL - | add CARG2, RA, RC - | str INS, [CARG2, #-4] // Ensure one valid arg. - |.if FFI - | bls >1 - |.endif - | ldr KBASE, [CARG3, #PC2PROTO(k)] - | // BASE = base, RA = resultptr, CARG4 = meta base - | bx CARG1 - | - |.if FFI - |1: - | beq ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - | sub CARG4, CARG4, #16 - | sub RC, CARG4, BASE - | b ->vm_call_tail - |.endif - | - |->cont_cat: // RA = resultptr, CARG4 = meta base - | ldr INS, [PC, #-4] - | sub CARG2, CARG4, #16 - | ldrd CARG34, [RA] - | str BASE, L->base - | decode_RB8 RC, INS - | decode_RA8 RA, INS - | add CARG1, BASE, RC - | subs CARG1, CARG2, CARG1 - | strdne CARG34, [CARG2] - | movne CARG3, CARG1 - | bne ->BC_CAT_Z - | strd CARG34, [BASE, RA] - | b ->cont_nop - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | add CARG2, BASE, RB - | b >2 - | - |->vmeta_tgets: - | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv) - | mvn CARG4, #~LJ_TTAB - | str TAB:RB, [CARG2] - | str CARG4, [CARG2, #4] - |2: - | mvn CARG4, #~LJ_TSTR - | str STR:RC, TMPDlo - | str CARG4, TMPDhi - | mov CARG3, TMPDp - | b >1 - | - |->vmeta_tgetb: // RC = index - | decode_RB8 RB, INS - | str RC, TMPDlo - | mvn CARG4, #~LJ_TISNUM - | add CARG2, BASE, RB - | str CARG4, TMPDhi - | mov CARG3, TMPDp - | b >1 - | - |->vmeta_tgetv: - | add CARG2, BASE, RB - | add CARG3, BASE, RC - |1: - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | .IOS ldr BASE, L->base - | cmp CRET1, #0 - | beq >3 - | ldrd CARG34, [CRET1] - | ins_next1 - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | rsb CARG1, BASE, #FRAME_CONT - | ldr BASE, L->top - | mov NARGS8:RC, #16 // 2 args for func(t, k). - | str PC, [BASE, #-12] // [cont|PC] - | add PC, CARG1, BASE - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | b ->vm_call_dispatch_f - | - |->vmeta_tgetr: - | .IOS mov RC, BASE - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | .IOS mov BASE, RC - | cmp CRET1, #0 - | ldrdne CARG12, [CRET1] - | mvneq CARG2, #~LJ_TNIL - | b ->BC_TGETR_Z - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | add CARG2, BASE, RB - | b >2 - | - |->vmeta_tsets: - | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv) - | mvn CARG4, #~LJ_TTAB - | str TAB:RB, [CARG2] - | str CARG4, [CARG2, #4] - |2: - | mvn CARG4, #~LJ_TSTR - | str STR:RC, TMPDlo - | str CARG4, TMPDhi - | mov CARG3, TMPDp - | b >1 - | - |->vmeta_tsetb: // RC = index - | decode_RB8 RB, INS - | str RC, TMPDlo - | mvn CARG4, #~LJ_TISNUM - | add CARG2, BASE, RB - | str CARG4, TMPDhi - | mov CARG3, TMPDp - | b >1 - | - |->vmeta_tsetv: - | add CARG2, BASE, RB - | add CARG3, BASE, RC - |1: - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | .IOS ldr BASE, L->base - | cmp CRET1, #0 - | ldrd CARG34, [BASE, RA] - | beq >3 - | ins_next1 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | strd CARG34, [CRET1] - | ins_next2 - | ins_next3 - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | rsb CARG1, BASE, #FRAME_CONT - | ldr BASE, L->top - | mov NARGS8:RC, #24 // 3 args for func(t, k, v). - | strd CARG34, [BASE, #16] // Copy value to third argument. - | str PC, [BASE, #-12] // [cont|PC] - | add PC, CARG1, BASE - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | b ->vm_call_dispatch_f - | - |->vmeta_tsetr: - | str BASE, L->base - | .IOS mov RC, BASE - | str PC, SAVE_PC - | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - | // Returns TValue *. - | .IOS mov BASE, RC - | b ->BC_TSETR_Z - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | mov CARG1, L - | sub PC, PC, #4 - | mov CARG2, RA - | str BASE, L->base - | mov CARG3, RC - | str PC, SAVE_PC - | decode_OP CARG4, INS - | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - | // Returns 0/1 or TValue * (metamethod). - |3: - | .IOS ldr BASE, L->base - | cmp CRET1, #1 - | bhi ->vmeta_binop - |4: - | ldrh RB, [PC, #2] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | subhs PC, RB, #0x20000 - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | ldr INS, [PC, #-4] - | ldrd CARG12, [RA] - | decode_RA8 CARG3, INS - | strd CARG12, [BASE, CARG3] - | b ->cont_nop - | - |->cont_condt: // RA = resultptr - | ldr CARG2, [RA, #4] - | mvn CARG1, #~LJ_TTRUE - | cmp CARG1, CARG2 // Branch if result is true. - | b <4 - | - |->cont_condf: // RA = resultptr - | ldr CARG2, [RA, #4] - | checktp CARG2, LJ_TFALSE // Branch if result is false. - | b <4 - | - |->vmeta_equal: - | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - | - |->vmeta_equal_cd: - |.if FFI - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | mov CARG2, INS - | str PC, SAVE_PC - | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |.endif - | - |->vmeta_istype: - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | lsr CARG2, RA, #3 - | mov CARG3, RC - | str PC, SAVE_PC - | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - | .IOS ldr BASE, L->base - | b ->cont_nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_arith_vn: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG3, BASE, RB - | add CARG4, KBASE, RC - | b >1 - | - |->vmeta_arith_nv: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG4, BASE, RB - | add CARG3, KBASE, RC - | b >1 - | - |->vmeta_unm: - | ldr INS, [PC, #-8] - | sub PC, PC, #4 - | add CARG3, BASE, RC - | add CARG4, BASE, RC - | b >1 - | - |->vmeta_arith_vv: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG3, BASE, RB - | add CARG4, BASE, RC - |1: - | decode_OP OP, INS - | add CARG2, BASE, RA - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | str OP, ARG5 - | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - | // Returns NULL (finished) or TValue * (metamethod). - | .IOS ldr BASE, L->base - | cmp CRET1, #0 - | beq ->cont_nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | sub CARG2, CRET1, BASE - | str PC, [CRET1, #-12] // [cont|PC] - | add PC, CARG2, #FRAME_CONT - | mov BASE, CRET1 - | mov NARGS8:RC, #16 // 2 args for func(o1, o2). - | b ->vm_call_dispatch - | - |->vmeta_len: - | add CARG2, BASE, RC - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_len // (lua_State *L, TValue *o) - | // Returns NULL (retry) or TValue * (metamethod base). - | .IOS ldr BASE, L->base -#if LJ_52 - | cmp CRET1, #0 - | bne ->vmeta_binop // Binop call for compatibility. - | ldr TAB:CARG1, [BASE, RC] - | b ->BC_LEN_Z -#else - | b ->vmeta_binop // Binop call for compatibility. -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // RB = old base, BASE = new base, RC = nargs*8 - | mov CARG1, L - | str RB, L->base // This is the callers base! - | sub CARG2, BASE, #8 - | str PC, SAVE_PC - | add CARG3, BASE, NARGS8:RC - | .IOS mov RA, BASE - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | .IOS mov BASE, RA - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | mov CARG1, L - | str BASE, L->base - | sub CARG2, RA, #8 - | str PC, SAVE_PC - | add CARG3, RA, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | .IOS ldr BASE, L->base - | ldr LFUNC:CARG3, [RA, FRAME_FUNC] // Guaranteed to be a function here. - | ldr PC, [BASE, FRAME_PC] - | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. - | b ->BC_CALLT2_Z - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | mov CARG1, L - | str BASE, L->base - | mov CARG2, RA - | str PC, SAVE_PC - | bl extern lj_meta_for // (lua_State *L, TValue *base) - | .IOS ldr BASE, L->base - |.if JIT - | ldrb OP, [PC, #-4] - |.endif - | ldr INS, [PC, #-4] - |.if JIT - | cmp OP, #BC_JFORI - |.endif - | decode_RA8 RA, INS - | decode_RD RC, INS - |.if JIT - | beq =>BC_JFORI - |.endif - | b =>BC_FORI - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | ldrd CARG12, [BASE] - | cmp NARGS8:RC, #8 - | blo ->fff_fallback - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | ldrd CARG12, [BASE] - | ldrd CARG34, [BASE, #8] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - |.endmacro - | - |.macro .ffunc_n, name - | .ffunc_1 name - | checktp CARG2, LJ_TISNUM - | bhs ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name - | .ffunc_2 name - | checktp CARG2, LJ_TISNUM - | cmnlo CARG4, #-LJ_TISNUM - | bhs ->fff_fallback - |.endmacro - | - |.macro .ffunc_d, name - | .ffunc name - | ldr CARG2, [BASE, #4] - | cmp NARGS8:RC, #8 - | vldr d0, [BASE] - | blo ->fff_fallback - | checktp CARG2, LJ_TISNUM - | bhs ->fff_fallback - |.endmacro - | - |.macro .ffunc_dd, name - | .ffunc name - | ldr CARG2, [BASE, #4] - | ldr CARG4, [BASE, #12] - | cmp NARGS8:RC, #16 - | vldr d0, [BASE] - | vldr d1, [BASE, #8] - | blo ->fff_fallback - | checktp CARG2, LJ_TISNUM - | cmnlo CARG4, #-LJ_TISNUM - | bhs ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. - |.macro ffgccheck - | ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)] - | ldr CARG2, [DISPATCH, #DISPATCH_GL(gc.threshold)] - | cmp CARG1, CARG2 - | blge ->fff_gcstep - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | checktp CARG2, LJ_TTRUE - | bhi ->fff_fallback - | ldr PC, [BASE, FRAME_PC] - | strd CARG12, [BASE, #-8] - | mov RB, BASE - | subs RA, NARGS8:RC, #8 - | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8. - | beq ->fff_res // Done if exactly 1 argument. - |1: - | ldrd CARG12, [RB, #8] - | subs RA, RA, #8 - | strd CARG12, [RB], #8 - | bne <1 - | b ->fff_res - | - |.ffunc type - | ldr CARG2, [BASE, #4] - | cmp NARGS8:RC, #8 - | blo ->fff_fallback - | checktp CARG2, LJ_TISNUM - | mvnlo CARG2, #~LJ_TISNUM - | rsb CARG4, CARG2, #(int)(offsetof(GCfuncC, upvalue)>>3)-1 - | lsl CARG4, CARG4, #3 - | ldrd CARG12, [CFUNC:CARG3, CARG4] - | b ->fff_restv - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | checktp CARG2, LJ_TTAB - | cmnne CARG2, #-LJ_TUDATA - | bne >6 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | ldr TAB:RB, TAB:CARG1->metatable - |2: - | mvn CARG2, #~LJ_TNIL - | ldr STR:RC, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])] - | cmp TAB:RB, #0 - | beq ->fff_restv - | ldr CARG3, TAB:RB->hmask - | ldr CARG4, STR:RC->hash - | ldr NODE:INS, TAB:RB->node - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask - | add CARG3, CARG3, CARG3, lsl #1 - | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 - |3: // Rearranged logic, because we expect _not_ to find the key. - | ldrd CARG34, NODE:INS->key // STALL: early NODE:INS. - | ldrd CARG12, NODE:INS->val - | ldr NODE:INS, NODE:INS->next - | checktp CARG4, LJ_TSTR - | cmpeq CARG3, STR:RC - | beq >5 - | cmp NODE:INS, #0 - | bne <3 - |4: - | mov CARG1, RB // Use metatable as default result. - | mvn CARG2, #~LJ_TTAB - | b ->fff_restv - |5: - | checktp CARG2, LJ_TNIL - | bne ->fff_restv - | b <4 - | - |6: - | checktp CARG2, LJ_TISNUM - | mvnhs CARG2, CARG2 - | movlo CARG2, #~LJ_TISNUM - | add CARG4, DISPATCH, CARG2, lsl #2 - | ldr TAB:RB, [CARG4, #DISPATCH_GL(gcroot[GCROOT_BASEMT])] - | b <2 - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | checktp CARG2, LJ_TTAB - | ldreq TAB:RB, TAB:CARG1->metatable - | checktpeq CARG4, LJ_TTAB - | ldrbeq CARG4, TAB:CARG1->marked - | cmpeq TAB:RB, #0 - | bne ->fff_fallback - | tst CARG4, #LJ_GC_BLACK // isblack(table) - | str TAB:CARG3, TAB:CARG1->metatable - | beq ->fff_restv - | barrierback TAB:CARG1, CARG4, CARG3 - | b ->fff_restv - | - |.ffunc rawget - | ldrd CARG34, [BASE] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | mov CARG2, CARG3 - | checktab CARG4, ->fff_fallback - | mov CARG1, L - | add CARG3, BASE, #8 - | .IOS mov RA, BASE - | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // Returns cTValue *. - | .IOS mov BASE, RA - | ldrd CARG12, [CRET1] - | b ->fff_restv - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | ldrd CARG12, [BASE] - | cmp NARGS8:RC, #8 - | bne ->fff_fallback - | checktp CARG2, LJ_TISNUM - | bls ->fff_restv - | b ->fff_fallback - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | checktp CARG2, LJ_TSTR - | // A __tostring method in the string base metatable is ignored. - | beq ->fff_restv - | // Handle numbers inline, unless a number base metatable is present. - | ldr CARG4, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])] - | str BASE, L->base - | checktp CARG2, LJ_TISNUM - | cmpls CARG4, #0 - | str PC, SAVE_PC // Redundant (but a defined value). - | bhi ->fff_fallback - | ffgccheck - | mov CARG1, L - | mov CARG2, BASE - | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) - | // Returns GCstr *. - | ldr BASE, L->base - | mvn CARG2, #~LJ_TSTR - | b ->fff_restv - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc_1 next - | mvn CARG4, #~LJ_TNIL - | checktab CARG2, ->fff_fallback - | strd CARG34, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. - | ldr PC, [BASE, FRAME_PC] - | mov CARG2, CARG1 - | str BASE, L->base // Add frame since C call can throw. - | mov CARG1, L - | str BASE, L->top // Dummy frame length is ok. - | add CARG3, BASE, #8 - | str PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. - | .IOS ldr BASE, L->base - | cmp CRET1, #0 - | mvneq CRET2, #~LJ_TNIL - | beq ->fff_restv // End of traversal: return nil. - | ldrd CARG12, [BASE, #8] // Copy key and value to results. - | ldrd CARG34, [BASE, #16] - | mov RC, #(2+1)*8 - | strd CARG12, [BASE, #-8] - | strd CARG34, [BASE] - | b ->fff_res - | - |.ffunc_1 pairs - | checktab CARG2, ->fff_fallback -#if LJ_52 - | ldr TAB:RB, TAB:CARG1->metatable -#endif - | ldrd CFUNC:CARG34, CFUNC:CARG3->upvalue[0] - | ldr PC, [BASE, FRAME_PC] -#if LJ_52 - | cmp TAB:RB, #0 - | bne ->fff_fallback -#endif - | mvn CARG2, #~LJ_TNIL - | mov RC, #(3+1)*8 - | strd CFUNC:CARG34, [BASE, #-8] - | str CARG2, [BASE, #12] - | b ->fff_res - | - |.ffunc_2 ipairs_aux - | checktp CARG2, LJ_TTAB - | checktpeq CARG4, LJ_TISNUM - | bne ->fff_fallback - | ldr RB, TAB:CARG1->asize - | ldr RC, TAB:CARG1->array - | add CARG3, CARG3, #1 - | ldr PC, [BASE, FRAME_PC] - | cmp CARG3, RB - | add RC, RC, CARG3, lsl #3 - | strd CARG34, [BASE, #-8] - | ldrdlo CARG12, [RC] - | mov RC, #(0+1)*8 - | bhs >2 // Not in array part? - |1: - | checktp CARG2, LJ_TNIL - | movne RC, #(2+1)*8 - | strdne CARG12, [BASE] - | b ->fff_res - |2: // Check for empty hash part first. Otherwise call C function. - | ldr RB, TAB:CARG1->hmask - | mov CARG2, CARG3 - | cmp RB, #0 - | beq ->fff_res - | .IOS mov RA, BASE - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | .IOS mov BASE, RA - | cmp CRET1, #0 - | beq ->fff_res - | ldrd CARG12, [CRET1] - | b <1 - | - |.ffunc_1 ipairs - | checktab CARG2, ->fff_fallback -#if LJ_52 - | ldr TAB:RB, TAB:CARG1->metatable -#endif - | ldrd CFUNC:CARG34, CFUNC:CARG3->upvalue[0] - | ldr PC, [BASE, FRAME_PC] -#if LJ_52 - | cmp TAB:RB, #0 - | bne ->fff_fallback -#endif - | mov CARG1, #0 - | mvn CARG2, #~LJ_TISNUM - | mov RC, #(3+1)*8 - | strd CFUNC:CARG34, [BASE, #-8] - | strd CARG12, [BASE, #8] - | b ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)] - | cmp NARGS8:RC, #8 - | blo ->fff_fallback - | tst RA, #HOOK_ACTIVE // Remember active hook before pcall. - | mov RB, BASE - | add BASE, BASE, #8 - | moveq PC, #8+FRAME_PCALL - | movne PC, #8+FRAME_PCALLH - | sub NARGS8:RC, NARGS8:RC, #8 - | b ->vm_call_dispatch - | - |.ffunc_2 xpcall - | ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)] - | checkfunc CARG4, ->fff_fallback // Traceback must be a function. - | mov RB, BASE - | strd CARG12, [BASE, #8] // Swap function and traceback. - | strd CARG34, [BASE] - | tst RA, #HOOK_ACTIVE // Remember active hook before pcall. - | add BASE, BASE, #16 - | moveq PC, #16+FRAME_PCALL - | movne PC, #16+FRAME_PCALLH - | sub NARGS8:RC, NARGS8:RC, #16 - | b ->vm_call_dispatch - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | checktp CARG2, LJ_TTHREAD - | bne ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr - |.endif - | ldr PC, [BASE, FRAME_PC] - | str BASE, L->base - | ldr CARG2, L:CARG1->top - | ldrb RA, L:CARG1->status - | ldr RB, L:CARG1->base - | add CARG3, CARG2, NARGS8:RC - | add CARG4, CARG2, RA - | str PC, SAVE_PC - | cmp CARG4, RB - | beq ->fff_fallback - | ldr CARG4, L:CARG1->maxstack - | ldr RB, L:CARG1->cframe - | cmp RA, #LUA_YIELD - | cmpls CARG3, CARG4 - | cmpls RB, #0 - | bhi ->fff_fallback - |1: - |.if resume - | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC. - | add BASE, BASE, #8 - | sub NARGS8:RC, NARGS8:RC, #8 - |.endif - | str CARG3, L:CARG1->top - | str BASE, L->top - |2: // Move args to coroutine. - | ldrd CARG34, [BASE, RB] - | cmp RB, NARGS8:RC - | strdne CARG34, [CARG2, RB] - | add RB, RB, #8 - | bne <2 - | - | mov CARG3, #0 - | mov L:RA, L:CARG1 - | mov CARG4, #0 - | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) - | // Returns thread status. - |4: - | ldr CARG3, L:RA->base - | mv_vmstate CARG2, INTERP - | ldr CARG4, L:RA->top - | cmp CRET1, #LUA_YIELD - | ldr BASE, L->base - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | st_vmstate CARG2 - | bhi >8 - | subs RC, CARG4, CARG3 - | ldr CARG1, L->maxstack - | add CARG2, BASE, RC - | beq >6 // No results? - | cmp CARG2, CARG1 - | mov RB, #0 - | bhi >9 // Need to grow stack? - | - | sub CARG4, RC, #8 - | str CARG3, L:RA->top // Clear coroutine stack. - |5: // Move results from coroutine. - | ldrd CARG12, [CARG3, RB] - | cmp RB, CARG4 - | strd CARG12, [BASE, RB] - | add RB, RB, #8 - | bne <5 - |6: - |.if resume - | mvn CARG3, #~LJ_TTRUE - | add RC, RC, #16 - |7: - | str CARG3, [BASE, #-4] // Prepend true/false to results. - | sub RA, BASE, #8 - |.else - | mov RA, BASE - | add RC, RC, #8 - |.endif - | ands CARG1, PC, #FRAME_TYPE - | str PC, SAVE_PC - | str RC, SAVE_MULTRES - | beq ->BC_RET_Z - | b ->vm_return - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | ldrd CARG12, [CARG4, #-8]! - | mvn CARG3, #~LJ_TFALSE - | mov RC, #(2+1)*8 - | str CARG4, L:RA->top // Remove error from coroutine stack. - | strd CARG12, [BASE] // Copy error message. - | b <7 - |.else - | mov CARG1, L - | mov CARG2, L:RA - | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - | // Never returns. - |.endif - | - |9: // Handle stack expansion on return from yield. - | mov CARG1, L - | lsr CARG2, RC, #3 - | bl extern lj_state_growstack // (lua_State *L, int n) - | mov CRET1, #0 - | b <4 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | ldr CARG1, L->cframe - | add CARG2, BASE, NARGS8:RC - | str BASE, L->base - | tst CARG1, #CFRAME_RESUME - | str CARG2, L->top - | mov CRET1, #LUA_YIELD - | mov CARG3, #0 - | beq ->fff_fallback - | str CARG3, L->cframe - | strb CRET1, L->status - | b ->vm_leave_unw - | - |//-- Math library ------------------------------------------------------- - | - |.macro math_round, func - | .ffunc_1 math_ .. func - | checktp CARG2, LJ_TISNUM - | beq ->fff_restv - | bhi ->fff_fallback - | // Round FP value and normalize result. - | lsl CARG3, CARG2, #1 - | adds RB, CARG3, #0x00200000 - | bpl >2 // |x| < 1? - | mvn CARG4, #0x3e0 - | subs RB, CARG4, RB, asr #21 - | lsl CARG4, CARG2, #11 - | lsl CARG3, CARG1, #11 - | orr CARG4, CARG4, #0x80000000 - | rsb INS, RB, #32 - | orr CARG4, CARG4, CARG1, lsr #21 - | bls >3 // |x| >= 2^31? - | orr CARG3, CARG3, CARG4, lsl INS - | lsr CARG1, CARG4, RB - |.if "func" == "floor" - | tst CARG3, CARG2, asr #31 - | addne CARG1, CARG1, #1 - |.else - | bics CARG3, CARG3, CARG2, asr #31 - | addsne CARG1, CARG1, #1 - | ldrdvs CARG12, >9 - | bvs ->fff_restv - |.endif - | cmp CARG2, #0 - | rsblt CARG1, CARG1, #0 - |1: - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - | - |2: // |x| < 1 - | bcs ->fff_restv // |x| is not finite. - | orr CARG3, CARG3, CARG1 // ztest = abs(hi) | lo - |.if "func" == "floor" - | tst CARG3, CARG2, asr #31 // return (ztest & sign) == 0 ? 0 : -1 - | moveq CARG1, #0 - | mvnne CARG1, #0 - |.else - | bics CARG3, CARG3, CARG2, asr #31 // return (ztest & ~sign) == 0 ? 0 : 1 - | moveq CARG1, #0 - | movne CARG1, #1 - |.endif - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - | - |3: // |x| >= 2^31. Check for x == -(2^31). - | cmpeq CARG4, #0x80000000 - |.if "func" == "floor" - | cmpeq CARG3, #0 - |.endif - | bne >4 - | cmp CARG2, #0 - | movmi CARG1, #0x80000000 - | bmi <1 - |4: - | bl ->vm_..func.._sf - | b ->fff_restv - |.endmacro - | - | math_round floor - | math_round ceil - | - |.align 8 - |9: - | .long 0x00000000, 0x41e00000 // 2^31. - | - |.ffunc_1 math_abs - | checktp CARG2, LJ_TISNUM - | bhi ->fff_fallback - | bicne CARG2, CARG2, #0x80000000 - | bne ->fff_restv - | cmp CARG1, #0 - | rsbslt CARG1, CARG1, #0 - | ldrdvs CARG12, <9 - | // Fallthrough. - | - |->fff_restv: - | // CARG12 = TValue result. - | ldr PC, [BASE, FRAME_PC] - | strd CARG12, [BASE, #-8] - |->fff_res1: - | // PC = return. - | mov RC, #(1+1)*8 - |->fff_res: - | // RC = (nresults+1)*8, PC = return. - | ands CARG1, PC, #FRAME_TYPE - | ldreq INS, [PC, #-4] - | str RC, SAVE_MULTRES - | sub RA, BASE, #8 - | bne ->vm_return - | decode_RB8 RB, INS - |5: - | cmp RB, RC // More results expected? - | bhi >6 - | decode_RA8 CARG1, INS - | ins_next1 - | ins_next2 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | sub BASE, RA, CARG1 - | ins_next3 - | - |6: // Fill up results with nil. - | add CARG2, RA, RC - | mvn CARG1, #~LJ_TNIL - | add RC, RC, #8 - | str CARG1, [CARG2, #-4] - | b <5 - | - |.macro math_extern, func - |.if HFABI - | .ffunc_d math_ .. func - |.else - | .ffunc_n math_ .. func - |.endif - | .IOS mov RA, BASE - | bl extern func - | .IOS mov BASE, RA - |.if HFABI - | b ->fff_resd - |.else - | b ->fff_restv - |.endif - |.endmacro - | - |.macro math_extern2, func - |.if HFABI - | .ffunc_dd math_ .. func - |.else - | .ffunc_nn math_ .. func - |.endif - | .IOS mov RA, BASE - | bl extern func - | .IOS mov BASE, RA - |.if HFABI - | b ->fff_resd - |.else - | b ->fff_restv - |.endif - |.endmacro - | - |.if FPU - | .ffunc_d math_sqrt - | vsqrt.f64 d0, d0 - |->fff_resd: - | ldr PC, [BASE, FRAME_PC] - | vstr d0, [BASE, #-8] - | b ->fff_res1 - |.else - | math_extern sqrt - |.endif - | - |.ffunc math_log - |.if HFABI - | ldr CARG2, [BASE, #4] - | cmp NARGS8:RC, #8 // Need exactly 1 argument. - | vldr d0, [BASE] - | bne ->fff_fallback - |.else - | ldrd CARG12, [BASE] - | cmp NARGS8:RC, #8 // Need exactly 1 argument. - | bne ->fff_fallback - |.endif - | checktp CARG2, LJ_TISNUM - | bhs ->fff_fallback - | .IOS mov RA, BASE - | bl extern log - | .IOS mov BASE, RA - |.if HFABI - | b ->fff_resd - |.else - | b ->fff_restv - |.endif - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.if HFABI - | .ffunc math_ldexp - | ldr CARG4, [BASE, #4] - | ldrd CARG12, [BASE, #8] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | vldr d0, [BASE] - | checktp CARG4, LJ_TISNUM - | bhs ->fff_fallback - | checktp CARG2, LJ_TISNUM - | bne ->fff_fallback - | .IOS mov RA, BASE - | bl extern ldexp // (double x, int exp) - | .IOS mov BASE, RA - | b ->fff_resd - |.else - |.ffunc_2 math_ldexp - | checktp CARG2, LJ_TISNUM - | bhs ->fff_fallback - | checktp CARG4, LJ_TISNUM - | bne ->fff_fallback - | .IOS mov RA, BASE - | bl extern ldexp // (double x, int exp) - | .IOS mov BASE, RA - | b ->fff_restv - |.endif - | - |.if HFABI - |.ffunc_d math_frexp - | mov CARG1, sp - | .IOS mov RA, BASE - | bl extern frexp - | .IOS mov BASE, RA - | ldr CARG3, [sp] - | mvn CARG4, #~LJ_TISNUM - | ldr PC, [BASE, FRAME_PC] - | vstr d0, [BASE, #-8] - | mov RC, #(2+1)*8 - | strd CARG34, [BASE] - | b ->fff_res - |.else - |.ffunc_n math_frexp - | mov CARG3, sp - | .IOS mov RA, BASE - | bl extern frexp - | .IOS mov BASE, RA - | ldr CARG3, [sp] - | mvn CARG4, #~LJ_TISNUM - | ldr PC, [BASE, FRAME_PC] - | strd CARG12, [BASE, #-8] - | mov RC, #(2+1)*8 - | strd CARG34, [BASE] - | b ->fff_res - |.endif - | - |.if HFABI - |.ffunc_d math_modf - | sub CARG1, BASE, #8 - | ldr PC, [BASE, FRAME_PC] - | .IOS mov RA, BASE - | bl extern modf - | .IOS mov BASE, RA - | mov RC, #(2+1)*8 - | vstr d0, [BASE] - | b ->fff_res - |.else - |.ffunc_n math_modf - | sub CARG3, BASE, #8 - | ldr PC, [BASE, FRAME_PC] - | .IOS mov RA, BASE - | bl extern modf - | .IOS mov BASE, RA - | mov RC, #(2+1)*8 - | strd CARG12, [BASE] - | b ->fff_res - |.endif - | - |.macro math_minmax, name, cond, fcond - |.if FPU - | .ffunc_1 name - | add RB, BASE, RC - | checktp CARG2, LJ_TISNUM - | add RA, BASE, #8 - | bne >4 - |1: // Handle integers. - | ldrd CARG34, [RA] - | cmp RA, RB - | bhs ->fff_restv - | checktp CARG4, LJ_TISNUM - | bne >3 - | cmp CARG1, CARG3 - | add RA, RA, #8 - | mov..cond CARG1, CARG3 - | b <1 - |3: // Convert intermediate result to number and continue below. - | vmov s4, CARG1 - | bhi ->fff_fallback - | vldr d1, [RA] - | vcvt.f64.s32 d0, s4 - | b >6 - | - |4: - | vldr d0, [BASE] - | bhi ->fff_fallback - |5: // Handle numbers. - | ldrd CARG34, [RA] - | vldr d1, [RA] - | cmp RA, RB - | bhs ->fff_resd - | checktp CARG4, LJ_TISNUM - | bhs >7 - |6: - | vcmp.f64 d0, d1 - | vmrs - | add RA, RA, #8 - | vmov..fcond.f64 d0, d1 - | b <5 - |7: // Convert integer to number and continue above. - | vmov s4, CARG3 - | bhi ->fff_fallback - | vcvt.f64.s32 d1, s4 - | b <6 - | - |.else - | - | .ffunc_1 name - | checktp CARG2, LJ_TISNUM - | mov RA, #8 - | bne >4 - |1: // Handle integers. - | ldrd CARG34, [BASE, RA] - | cmp RA, RC - | bhs ->fff_restv - | checktp CARG4, LJ_TISNUM - | bne >3 - | cmp CARG1, CARG3 - | add RA, RA, #8 - | mov..cond CARG1, CARG3 - | b <1 - |3: // Convert intermediate result to number and continue below. - | bhi ->fff_fallback - | bl extern __aeabi_i2d - | ldrd CARG34, [BASE, RA] - | b >6 - | - |4: - | bhi ->fff_fallback - |5: // Handle numbers. - | ldrd CARG34, [BASE, RA] - | cmp RA, RC - | bhs ->fff_restv - | checktp CARG4, LJ_TISNUM - | bhs >7 - |6: - | bl extern __aeabi_cdcmple - | add RA, RA, #8 - | mov..fcond CARG1, CARG3 - | mov..fcond CARG2, CARG4 - | b <5 - |7: // Convert integer to number and continue above. - | bhi ->fff_fallback - | strd CARG12, TMPD - | mov CARG1, CARG3 - | bl extern __aeabi_i2d - | ldrd CARG34, TMPD - | b <6 - |.endif - |.endmacro - | - | math_minmax math_min, gt, hi - | math_minmax math_max, lt, lo - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | ldrd CARG12, [BASE] - | ldr PC, [BASE, FRAME_PC] - | cmp NARGS8:RC, #8 - | checktpeq CARG2, LJ_TSTR // Need exactly 1 argument. - | bne ->fff_fallback - | ldr CARG3, STR:CARG1->len - | ldrb CARG1, STR:CARG1[1] // Access is always ok (NUL at end). - | mvn CARG2, #~LJ_TISNUM - | cmp CARG3, #0 - | moveq RC, #(0+1)*8 - | movne RC, #(1+1)*8 - | strd CARG12, [BASE, #-8] - | b ->fff_res - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - | ldrd CARG12, [BASE] - | ldr PC, [BASE, FRAME_PC] - | cmp NARGS8:RC, #8 // Need exactly 1 argument. - | checktpeq CARG2, LJ_TISNUM - | bicseq CARG4, CARG1, #255 - | mov CARG3, #1 - | bne ->fff_fallback - | str CARG1, TMPD - | mov CARG2, TMPDp // Points to stack. Little-endian. - |->fff_newstr: - | // CARG2 = str, CARG3 = len. - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_str_new // (lua_State *L, char *str, size_t l) - |->fff_resstr: - | // Returns GCstr *. - | ldr BASE, L->base - | mvn CARG2, #~LJ_TSTR - | b ->fff_restv - | - |.ffunc string_sub - | ffgccheck - | ldrd CARG12, [BASE] - | ldrd CARG34, [BASE, #16] - | cmp NARGS8:RC, #16 - | mvn RB, #0 - | beq >1 - | blo ->fff_fallback - | checktp CARG4, LJ_TISNUM - | mov RB, CARG3 - | bne ->fff_fallback - |1: - | ldrd CARG34, [BASE, #8] - | checktp CARG2, LJ_TSTR - | ldreq CARG2, STR:CARG1->len - | checktpeq CARG4, LJ_TISNUM - | bne ->fff_fallback - | // CARG1 = str, CARG2 = str->len, CARG3 = start, RB = end - | add CARG4, CARG2, #1 - | cmp CARG3, #0 // if (start < 0) start += len+1 - | addlt CARG3, CARG3, CARG4 - | cmp CARG3, #1 // if (start < 1) start = 1 - | movlt CARG3, #1 - | cmp RB, #0 // if (end < 0) end += len+1 - | addlt RB, RB, CARG4 - | bic RB, RB, RB, asr #31 // if (end < 0) end = 0 - | cmp RB, CARG2 // if (end > len) end = len - | add CARG1, STR:CARG1, #sizeof(GCstr)-1 - | movgt RB, CARG2 - | add CARG2, CARG1, CARG3 - | subs CARG3, RB, CARG3 // len = end - start - | add CARG3, CARG3, #1 // len += 1 - | bge ->fff_newstr - |->fff_emptystr: - | sub STR:CARG1, DISPATCH, #-DISPATCH_GL(strempty) - | mvn CARG2, #~LJ_TSTR - | b ->fff_restv - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - | ldr CARG3, [BASE, #4] - | cmp NARGS8:RC, #8 - | ldr STR:CARG2, [BASE] - | blo ->fff_fallback - | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf) - | checkstr CARG3, ->fff_fallback - | ldr CARG4, SBUF:CARG1->b - | str BASE, L->base - | str PC, SAVE_PC - | str L, SBUF:CARG1->L - | str CARG4, SBUF:CARG1->p - | bl extern lj_buf_putstr_ .. name - | bl extern lj_buf_tostr - | b ->fff_resstr - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |// FP number to bit conversion for soft-float. Clobbers r0-r3. - |->vm_tobit_fb: - | bhi ->fff_fallback - |->vm_tobit: - | lsl RB, CARG2, #1 - | adds RB, RB, #0x00200000 - | movpl CARG1, #0 // |x| < 1? - | bxpl lr - | mvn CARG4, #0x3e0 - | subs RB, CARG4, RB, asr #21 - | bmi >1 // |x| >= 2^32? - | lsl CARG4, CARG2, #11 - | orr CARG4, CARG4, #0x80000000 - | orr CARG4, CARG4, CARG1, lsr #21 - | cmp CARG2, #0 - | lsr CARG1, CARG4, RB - | rsblt CARG1, CARG1, #0 - | bx lr - |1: - | add RB, RB, #21 - | lsr CARG4, CARG1, RB - | rsb RB, RB, #20 - | lsl CARG1, CARG2, #12 - | cmp CARG2, #0 - | orr CARG1, CARG4, CARG1, lsl RB - | rsblt CARG1, CARG1, #0 - | bx lr - | - |.macro .ffunc_bit, name - | .ffunc_1 bit_..name - | checktp CARG2, LJ_TISNUM - | blne ->vm_tobit_fb - |.endmacro - | - |.ffunc_bit tobit - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name - | mov CARG3, CARG1 - | mov RA, #8 - |1: - | ldrd CARG12, [BASE, RA] - | cmp RA, NARGS8:RC - | add RA, RA, #8 - | bge >2 - | checktp CARG2, LJ_TISNUM - | blne ->vm_tobit_fb - | ins CARG3, CARG3, CARG1 - | b <1 - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, orr - |.ffunc_bit_op bxor, eor - | - |2: - | mvn CARG4, #~LJ_TISNUM - | ldr PC, [BASE, FRAME_PC] - | strd CARG34, [BASE, #-8] - | b ->fff_res1 - | - |.ffunc_bit bswap - | eor CARG3, CARG1, CARG1, ror #16 - | bic CARG3, CARG3, #0x00ff0000 - | ror CARG1, CARG1, #8 - | mvn CARG2, #~LJ_TISNUM - | eor CARG1, CARG1, CARG3, lsr #8 - | b ->fff_restv - | - |.ffunc_bit bnot - | mvn CARG1, CARG1 - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - | - |.macro .ffunc_bit_sh, name, ins, shmod - | .ffunc bit_..name - | ldrd CARG12, [BASE, #8] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | checktp CARG2, LJ_TISNUM - | blne ->vm_tobit_fb - |.if shmod == 0 - | and RA, CARG1, #31 - |.else - | rsb RA, CARG1, #0 - |.endif - | ldrd CARG12, [BASE] - | checktp CARG2, LJ_TISNUM - | blne ->vm_tobit_fb - | ins CARG1, CARG1, RA - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - |.endmacro - | - |.ffunc_bit_sh lshift, lsl, 0 - |.ffunc_bit_sh rshift, lsr, 0 - |.ffunc_bit_sh arshift, asr, 0 - |.ffunc_bit_sh rol, ror, 1 - |.ffunc_bit_sh ror, ror, 0 - | - |//----------------------------------------------------------------------- - | - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RC = nargs*8 - | ldr CARG3, [BASE, FRAME_FUNC] - | ldr CARG2, L->maxstack - | add CARG1, BASE, NARGS8:RC - | ldr PC, [BASE, FRAME_PC] // Fallback may overwrite PC. - | str CARG1, L->top - | ldr CARG3, CFUNC:CARG3->f - | str BASE, L->base - | add CARG1, CARG1, #8*LUA_MINSTACK - | str PC, SAVE_PC // Redundant (but a defined value). - | cmp CARG1, CARG2 - | mov CARG1, L - | bhi >5 // Need to grow stack. - | blx CARG3 // (lua_State *L) - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | ldr BASE, L->base - | cmp CRET1, #0 - | lsl RC, CRET1, #3 - | sub RA, BASE, #8 - | bgt ->fff_res // Returned nresults+1? - |1: // Returned 0 or -1: retry fast path. - | ldr CARG1, L->top - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | sub NARGS8:RC, CARG1, BASE - | bne ->vm_call_tail // Returned -1? - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | ands CARG1, PC, #FRAME_TYPE - | bic CARG2, PC, #FRAME_TYPEP - | ldreq INS, [PC, #-4] - | andeq CARG2, MASKR8, INS, lsr #5 // Conditional decode_RA8. - | addeq CARG2, CARG2, #8 - | sub RB, BASE, CARG2 - | b ->vm_call_dispatch // Resolve again for tailcall. - | - |5: // Grow stack for fallback handler. - | mov CARG2, #LUA_MINSTACK - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->base - | cmp CARG1, CARG1 // Set zero-flag to force retry. - | b <1 - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | mov RA, lr - | str BASE, L->base - | add CARG2, BASE, NARGS8:RC - | str PC, SAVE_PC // Redundant (but a defined value). - | str CARG2, L->top - | mov CARG1, L - | bl extern lj_gc_step // (lua_State *L) - | ldr BASE, L->base - | mov lr, RA // Help return address predictor. - | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] - | bx lr - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)] - | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. - | bne >5 - | // Decrement the hookcount for consistency, but always do the call. - | ldr CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] - | tst CARG1, #HOOK_ACTIVE - | bne >1 - | sub CARG2, CARG2, #1 - | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT - | strne CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] - | b >1 - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)] - | tst CARG1, #HOOK_ACTIVE // Hook already active? - | beq >1 - |5: // Re-dispatch to static ins. - | decode_OP OP, INS - | add OP, DISPATCH, OP, lsl #2 - | ldr pc, [OP, #GG_DISP2STATIC] - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)] - | ldr CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] - | tst CARG1, #HOOK_ACTIVE // Hook already active? - | bne <5 - | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT - | beq <5 - | subs CARG2, CARG2, #1 - | str CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] - | beq >1 - | tst CARG1, #LUA_MASKLINE - | beq <5 - |1: - | mov CARG1, L - | str BASE, L->base - | mov CARG2, PC - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |3: - | ldr BASE, L->base - |4: // Re-dispatch to static ins. - | ldrb OP, [PC, #-4] - | ldr INS, [PC, #-4] - | add OP, DISPATCH, OP, lsl #2 - | ldr OP, [OP, #GG_DISP2STATIC] - | decode_RA8 RA, INS - | decode_RD RC, INS - | bx OP - | - |->cont_hook: // Continue from hook yield. - | ldr CARG1, [CARG4, #-24] - | add PC, PC, #4 - | str CARG1, SAVE_MULTRES // Restore MULTRES for *M ins. - | b <4 - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). - | sub CARG1, DISPATCH, #-GG_DISP2J - | str PC, SAVE_PC - | ldr CARG3, LFUNC:CARG3->field_pc - | mov CARG2, PC - | str L, [DISPATCH, #DISPATCH_J(L)] - | ldrb CARG3, [CARG3, #PC2PROTO(framesize)] - | str BASE, L->base - | add CARG3, BASE, CARG3, lsl #3 - | str CARG3, L->top - | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) - | b <3 - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - | mov CARG2, PC - |.if JIT - | b >1 - |.endif - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | orr CARG2, PC, #1 - |1: - |.endif - | add CARG4, BASE, RC - | str PC, SAVE_PC - | mov CARG1, L - | str BASE, L->base - | sub RA, RA, BASE - | str CARG4, L->top - | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) - | // Returns ASMFunction. - | ldr BASE, L->base - | ldr CARG4, L->top - | mov CARG2, #0 - | add RA, BASE, RA - | sub NARGS8:RC, CARG4, BASE - | str CARG2, SAVE_PC // Invalidate for subsequent line hook. - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | ldr INS, [PC, #-4] - | bx CRET1 - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, CARG4 = meta base - | ldr RB, SAVE_MULTRES - | ldr INS, [PC, #-4] - | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace. - | subs RB, RB, #8 - | decode_RA8 RC, INS // Call base. - | beq >2 - |1: // Move results down. - | ldrd CARG12, [RA] - | add RA, RA, #8 - | subs RB, RB, #8 - | strd CARG12, [BASE, RC] - | add RC, RC, #8 - | bne <1 - |2: - | decode_RA8 RA, INS - | decode_RB8 RB, INS - | add RA, RA, RB - |3: - | cmp RA, RC - | mvn CARG2, #~LJ_TNIL - | bhi >9 // More results wanted? - | - | ldrh RA, TRACE:CARG3->traceno - | ldrh RC, TRACE:CARG3->link - | cmp RC, RA - | beq ->cont_nop // Blacklisted. - | cmp RC, #0 - | bne =>BC_JLOOP // Jump to stitched trace. - | - | // Stitch a new trace to the previous trace. - | str RA, [DISPATCH, #DISPATCH_J(exitno)] - | str L, [DISPATCH, #DISPATCH_J(L)] - | str BASE, L->base - | sub CARG1, DISPATCH, #-GG_DISP2J - | mov CARG2, PC - | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - | ldr BASE, L->base - | b ->cont_nop - | - |9: // Fill up results with nil. - | strd CARG12, [BASE, RC] - | add RC, RC, #8 - | b <3 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | mov CARG1, L - | str BASE, L->base - | mov CARG2, PC - | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | ldr BASE, L->base - | sub PC, PC, #4 - | b ->cont_nop -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_exit_handler: - |.if JIT - | sub sp, sp, #12 - | push {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12} - | ldr CARG1, [sp, #64] // Load original value of lr. - | ldr DISPATCH, [lr] // Load DISPATCH. - | add CARG3, sp, #64 // Recompute original value of sp. - | mv_vmstate CARG4, EXIT - | str CARG3, [sp, #52] // Store sp in RID_SP - | st_vmstate CARG4 - | ldr CARG2, [CARG1, #-4]! // Get exit instruction. - | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC. - | str CARG1, [sp, #60] - |.if FPU - | vpush {d0-d15} - |.endif - | lsl CARG2, CARG2, #8 - | add CARG1, CARG1, CARG2, asr #6 - | ldr CARG2, [lr, #4] // Load exit stub group offset. - | sub CARG1, CARG1, lr - | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)] - | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. - | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] - | str CARG1, [DISPATCH, #DISPATCH_J(exitno)] - | mov CARG4, #0 - | str BASE, L->base - | str L, [DISPATCH, #DISPATCH_J(L)] - | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)] - | sub CARG1, DISPATCH, #-GG_DISP2J - | mov CARG2, sp - | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) - | // Returns MULTRES (unscaled) or negated error code. - | ldr CARG2, L->cframe - | ldr BASE, L->base - | bic CARG2, CARG2, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated. - | mov sp, CARG2 - | ldr PC, SAVE_PC // Get SAVE_PC. - | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). - | b >1 - |.endif - |->vm_exit_interp: - | // CARG1 = MULTRES or negated error code, BASE, PC and DISPATCH set. - |.if JIT - | ldr L, SAVE_L - |1: - | cmp CARG1, #0 - | blt >9 // Check for error from exit. - | lsl RC, CARG1, #3 - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | str RC, SAVE_MULTRES - | mov CARG3, #0 - | str BASE, L->base - | ldr CARG2, LFUNC:CARG2->field_pc - | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)] - | mv_vmstate CARG4, INTERP - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | // Modified copy of ins_next which handles function header dispatch, too. - | ldrb OP, [PC] - | mov MASKR8, #255 - | ldr INS, [PC], #4 - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | st_vmstate CARG4 - | cmp OP, #BC_FUNCC+2 // Fast function? - | bhs >4 - |2: - | cmp OP, #BC_FUNCF // Function header? - | ldr OP, [DISPATCH, OP, lsl #2] - | decode_RA8 RA, INS - | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. - | subhs RC, RC, #8 - | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 - | ldrhs CARG3, [BASE, FRAME_FUNC] - | bx OP - | - |4: // Check frame below fast function. - | ldr CARG1, [BASE, FRAME_PC] - | ands CARG2, CARG1, #FRAME_TYPE - | bne <2 // Trace stitching continuation? - | // Otherwise set KBASE for Lua function below fast function. - | ldr CARG3, [CARG1, #-4] - | decode_RA8 CARG1, CARG3 - | sub CARG2, BASE, CARG1 - | ldr LFUNC:CARG3, [CARG2, #-16] - | ldr CARG3, LFUNC:CARG3->field_pc - | ldr KBASE, [CARG3, #PC2PROTO(k)] - | b <2 - | - |9: // Rethrow error from the right C frame. - | rsb CARG2, CARG1, #0 - | mov CARG1, L - | bl extern lj_err_throw // (lua_State *L, int errcode) - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// FP value rounding. Called from JIT code. - |// - |// double lj_vm_floor/ceil/trunc(double x); - |.macro vm_round, func, hf - |.if hf == 1 - | vmov CARG1, CARG2, d0 - |.endif - | lsl CARG3, CARG2, #1 - | adds RB, CARG3, #0x00200000 - | bpl >2 // |x| < 1? - | mvn CARG4, #0x3cc - | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0. - | bxlo lr // |x| >= 2^52: done. - | mvn CARG4, #1 - | bic CARG3, CARG1, CARG4, lsl RB // ztest = lo & ~lomask - | and CARG1, CARG1, CARG4, lsl RB // lo &= lomask - | subs RB, RB, #32 - | bicpl CARG4, CARG2, CARG4, lsl RB // |x| <= 2^20: ztest |= hi & ~himask - | orrpl CARG3, CARG3, CARG4 - | mvnpl CARG4, #1 - | andpl CARG2, CARG2, CARG4, lsl RB // |x| <= 2^20: hi &= himask - |.if "func" == "floor" - | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0) - |.else - | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0) - |.endif - |.if hf == 1 - | vmoveq d0, CARG1, CARG2 - |.endif - | bxeq lr // iszero: done. - | mvn CARG4, #1 - | cmp RB, #0 - | lslpl CARG3, CARG4, RB - | mvnmi CARG3, #0 - | add RB, RB, #32 - | subs CARG1, CARG1, CARG4, lsl RB // lo = lo-lomask - | sbc CARG2, CARG2, CARG3 // hi = hi-himask+carry - |.if hf == 1 - | vmov d0, CARG1, CARG2 - |.endif - | bx lr - | - |2: // |x| < 1: - | bxcs lr // |x| is not finite. - | orr CARG3, CARG3, CARG1 // ztest = (2*hi) | lo - |.if "func" == "floor" - | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0) - |.else - | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0) - |.endif - | mov CARG1, #0 // lo = 0 - | and CARG2, CARG2, #0x80000000 - | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0) - | orrne CARG2, CARG2, CARG4 - |.if hf == 1 - | vmov d0, CARG1, CARG2 - |.endif - | bx lr - |.endmacro - | - |9: - | .long 0x3ff00000 // hiword(+1.0) - | - |->vm_floor: - |.if HFABI - | vm_round floor, 1 - |.endif - |->vm_floor_sf: - | vm_round floor, 0 - | - |->vm_ceil: - |.if HFABI - | vm_round ceil, 1 - |.endif - |->vm_ceil_sf: - | vm_round ceil, 0 - | - |.macro vm_trunc, hf - |.if JIT - |.if hf == 1 - | vmov CARG1, CARG2, d0 - |.endif - | lsl CARG3, CARG2, #1 - | adds RB, CARG3, #0x00200000 - | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0. - | movpl CARG1, #0 - |.if hf == 1 - | vmovpl d0, CARG1, CARG2 - |.endif - | bxpl lr - | mvn CARG4, #0x3cc - | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0. - | bxlo lr // |x| >= 2^52: already done. - | mvn CARG4, #1 - | and CARG1, CARG1, CARG4, lsl RB // lo &= lomask - | subs RB, RB, #32 - | andpl CARG2, CARG2, CARG4, lsl RB // |x| <= 2^20: hi &= himask - |.if hf == 1 - | vmov d0, CARG1, CARG2 - |.endif - | bx lr - |.endif - |.endmacro - | - |->vm_trunc: - |.if HFABI - | vm_trunc 1 - |.endif - |->vm_trunc_sf: - | vm_trunc 0 - | - | // double lj_vm_mod(double dividend, double divisor); - |->vm_mod: - |.if FPU - | // Special calling convention. Also, RC (r11) is not preserved. - | vdiv.f64 d0, d6, d7 - | mov RC, lr - | vmov CARG1, CARG2, d0 - | bl ->vm_floor_sf - | vmov d0, CARG1, CARG2 - | vmul.f64 d0, d0, d7 - | mov lr, RC - | vsub.f64 d6, d6, d0 - | bx lr - |.else - | push {r0, r1, r2, r3, r4, lr} - | bl extern __aeabi_ddiv - | bl ->vm_floor_sf - | ldrd CARG34, [sp, #8] - | bl extern __aeabi_dmul - | ldrd CARG34, [sp] - | eor CARG2, CARG2, #0x80000000 - | bl extern __aeabi_dadd - | add sp, sp, #20 - | pop {pc} - |.endif - | - | // int lj_vm_modi(int dividend, int divisor); - |->vm_modi: - | ands RB, CARG1, #0x80000000 - | rsbmi CARG1, CARG1, #0 // a = |dividend| - | eor RB, RB, CARG2, asr #1 // Keep signdiff and sign(divisor). - | cmp CARG2, #0 - | rsbmi CARG2, CARG2, #0 // b = |divisor| - | subs CARG4, CARG2, #1 - | cmpne CARG1, CARG2 - | moveq CARG1, #0 // if (b == 1 || a == b) a = 0 - | tsthi CARG2, CARG4 - | andeq CARG1, CARG1, CARG4 // else if ((b & (b-1)) == 0) a &= b-1 - | bls >1 - | // Use repeated subtraction to get the remainder. - | clz CARG3, CARG1 - | clz CARG4, CARG2 - | sub CARG4, CARG4, CARG3 - | rsbs CARG3, CARG4, #31 // entry = (31-(clz(b)-clz(a)))*8 - | addne pc, pc, CARG3, lsl #3 // Duff's device. - | nop - { - int i; - for (i = 31; i >= 0; i--) { - | cmp CARG1, CARG2, lsl #i - | subhs CARG1, CARG1, CARG2, lsl #i - } - } - |1: - | cmp CARG1, #0 - | cmpne RB, #0 - | submi CARG1, CARG1, CARG2 // if (y != 0 && signdiff) y = y - b - | eors CARG2, CARG1, RB, lsl #1 - | rsbmi CARG1, CARG1, #0 // if (sign(divisor) != sign(y)) y = -y - | bx lr - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. - |// Saveregs already performed. Callback slot number in [sp], g in r12. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | ldr CTSTATE, GL:r12->ctype_state - | add DISPATCH, r12, #GG_G2DISP - |.if FPU - | str r4, SAVE_R4 - | add r4, sp, CFRAME_SPACE+4+8*8 - | vstmdb r4!, {d8-d15} - |.endif - |.if HFABI - | add r12, CTSTATE, #offsetof(CTState, cb.fpr[8]) - |.endif - | strd CARG34, CTSTATE->cb.gpr[2] - | strd CARG12, CTSTATE->cb.gpr[0] - |.if HFABI - | vstmdb r12!, {d0-d7} - |.endif - | ldr CARG4, [sp] - | add CARG3, sp, #CFRAME_SIZE - | mov CARG1, CTSTATE - | lsr CARG4, CARG4, #3 - | str CARG3, CTSTATE->cb.stack - | mov CARG2, sp - | str CARG4, CTSTATE->cb.slot - | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok. - | bl extern lj_ccallback_enter // (CTState *cts, void *cf) - | // Returns lua_State *. - | ldr BASE, L:CRET1->base - | mv_vmstate CARG2, INTERP - | ldr RC, L:CRET1->top - | mov MASKR8, #255 - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | mov L, CRET1 - | sub RC, RC, BASE - | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. - | st_vmstate CARG2 - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | ldr CTSTATE, [DISPATCH, #DISPATCH_GL(ctype_state)] - | str BASE, L->base - | str CARG4, L->top - | str L, CTSTATE->L - | mov CARG1, CTSTATE - | mov CARG2, RA - | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) - | ldrd CARG12, CTSTATE->cb.gpr[0] - |.if HFABI - | vldr d0, CTSTATE->cb.fpr[0] - |.endif - | b ->vm_leave_unw - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, r4 - | push {CCSTATE, r5, r11, lr} - | mov CCSTATE, CARG1 - | ldr CARG1, CCSTATE:CARG1->spadj - | ldrb CARG2, CCSTATE->nsp - | add CARG3, CCSTATE, #offsetof(CCallState, stack) - |.if HFABI - | add RB, CCSTATE, #offsetof(CCallState, fpr[0]) - |.endif - | mov r11, sp - | sub sp, sp, CARG1 // Readjust stack. - | subs CARG2, CARG2, #1 - |.if HFABI - | vldm RB, {d0-d7} - |.endif - | ldr RB, CCSTATE->func - | bmi >2 - |1: // Copy stack slots. - | ldr CARG4, [CARG3, CARG2, lsl #2] - | str CARG4, [sp, CARG2, lsl #2] - | subs CARG2, CARG2, #1 - | bpl <1 - |2: - | ldrd CARG12, CCSTATE->gpr[0] - | ldrd CARG34, CCSTATE->gpr[2] - | blx RB - | mov sp, r11 - |.if HFABI - | add r12, CCSTATE, #offsetof(CCallState, fpr[4]) - |.endif - | strd CRET1, CCSTATE->gpr[0] - |.if HFABI - | vstmdb r12!, {d0-d3} - |.endif - | pop {CCSTATE, r5, r11, pc} - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RC = src2, JMP with RC = target - | lsl RC, RC, #3 - | ldrd CARG12, [RA, BASE]! - | ldrh RB, [PC, #2] - | ldrd CARG34, [RC, BASE]! - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | checktp CARG2, LJ_TISNUM - | bne >3 - | checktp CARG4, LJ_TISNUM - | bne >4 - | cmp CARG1, CARG3 - if (op == BC_ISLT) { - | sublt PC, RB, #0x20000 - } else if (op == BC_ISGE) { - | subge PC, RB, #0x20000 - } else if (op == BC_ISLE) { - | suble PC, RB, #0x20000 - } else { - | subgt PC, RB, #0x20000 - } - |1: - | ins_next - | - |3: // CARG12 is not an integer. - |.if FPU - | vldr d0, [RA] - | bhi ->vmeta_comp - | // d0 is a number. - | checktp CARG4, LJ_TISNUM - | vldr d1, [RC] - | blo >5 - | bhi ->vmeta_comp - | // d0 is a number, CARG3 is an integer. - | vmov s4, CARG3 - | vcvt.f64.s32 d1, s4 - | b >5 - |4: // CARG1 is an integer, CARG34 is not an integer. - | vldr d1, [RC] - | bhi ->vmeta_comp - | // CARG1 is an integer, d1 is a number. - | vmov s4, CARG1 - | vcvt.f64.s32 d0, s4 - |5: // d0 and d1 are numbers. - | vcmp.f64 d0, d1 - | vmrs - | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. - if (op == BC_ISLT) { - | sublo PC, RB, #0x20000 - } else if (op == BC_ISGE) { - | subhs PC, RB, #0x20000 - } else if (op == BC_ISLE) { - | subls PC, RB, #0x20000 - } else { - | subhi PC, RB, #0x20000 - } - | b <1 - |.else - | bhi ->vmeta_comp - | // CARG12 is a number. - | checktp CARG4, LJ_TISNUM - | movlo RA, RB // Save RB. - | blo >5 - | bhi ->vmeta_comp - | // CARG12 is a number, CARG3 is an integer. - | mov CARG1, CARG3 - | mov RC, RA - | mov RA, RB // Save RB. - | bl extern __aeabi_i2d - | mov CARG3, CARG1 - | mov CARG4, CARG2 - | ldrd CARG12, [RC] // Restore first operand. - | b >5 - |4: // CARG1 is an integer, CARG34 is not an integer. - | bhi ->vmeta_comp - | // CARG1 is an integer, CARG34 is a number. - | mov RA, RB // Save RB. - | bl extern __aeabi_i2d - | ldrd CARG34, [RC] // Restore second operand. - |5: // CARG12 and CARG34 are numbers. - | bl extern __aeabi_cdcmple - | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. - if (op == BC_ISLT) { - | sublo PC, RA, #0x20000 - } else if (op == BC_ISGE) { - | subhs PC, RA, #0x20000 - } else if (op == BC_ISLE) { - | subls PC, RA, #0x20000 - } else { - | subhi PC, RA, #0x20000 - } - | b <1 - |.endif - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1*8, RC = src2, JMP with RC = target - | lsl RC, RC, #3 - | ldrd CARG12, [RA, BASE]! - | ldrh RB, [PC, #2] - | ldrd CARG34, [RC, BASE]! - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | checktp CARG2, LJ_TISNUM - | cmnls CARG4, #-LJ_TISNUM - if (vk) { - | bls ->BC_ISEQN_Z - } else { - | bls ->BC_ISNEN_Z - } - | // Either or both types are not numbers. - |.if FFI - | checktp CARG2, LJ_TCDATA - | checktpne CARG4, LJ_TCDATA - | beq ->vmeta_equal_cd - |.endif - | cmp CARG2, CARG4 // Compare types. - | bne >2 // Not the same type? - | checktp CARG2, LJ_TISPRI - | bhs >1 // Same type and primitive type? - | - | // Same types and not a primitive type. Compare GCobj or pvalue. - | cmp CARG1, CARG3 - if (vk) { - | bne >3 // Different GCobjs or pvalues? - |1: // Branch if same. - | sub PC, RB, #0x20000 - |2: // Different. - | ins_next - |3: - | checktp CARG2, LJ_TISTABUD - | bhi <2 // Different objects and not table/ud? - } else { - | beq >1 // Same GCobjs or pvalues? - | checktp CARG2, LJ_TISTABUD - | bhi >2 // Different objects and not table/ud? - } - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | ldr TAB:RA, TAB:CARG1->metatable - | cmp TAB:RA, #0 - if (vk) { - | beq <2 // No metatable? - } else { - | beq >2 // No metatable? - } - | ldrb RA, TAB:RA->nomm - | mov CARG4, #1-vk // ne = 0 or 1. - | mov CARG2, CARG1 - | tst RA, #1<vmeta_equal // 'no __eq' flag not set? - if (vk) { - | b <2 - } else { - |2: // Branch if different. - | sub PC, RB, #0x20000 - |1: // Same. - | ins_next - } - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src*8, RC = str_const (~), JMP with RC = target - | mvn RC, RC - | ldrd CARG12, [BASE, RA] - | ldrh RB, [PC, #2] - | ldr STR:CARG3, [KBASE, RC, lsl #2] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | checktp CARG2, LJ_TSTR - |.if FFI - | bne >7 - | cmp CARG1, CARG3 - |.else - | cmpeq CARG1, CARG3 - |.endif - if (vk) { - | subeq PC, RB, #0x20000 - |1: - } else { - |1: - | subne PC, RB, #0x20000 - } - | ins_next - | - |.if FFI - |7: - | checktp CARG2, LJ_TCDATA - | bne <1 - | b ->vmeta_equal_cd - |.endif - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src*8, RC = num_const (~), JMP with RC = target - | lsl RC, RC, #3 - | ldrd CARG12, [RA, BASE]! - | ldrh RB, [PC, #2] - | ldrd CARG34, [RC, KBASE]! - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | checktp CARG2, LJ_TISNUM - | bne >3 - | checktp CARG4, LJ_TISNUM - | bne >4 - | cmp CARG1, CARG3 - if (vk) { - | subeq PC, RB, #0x20000 - |1: - } else { - |1: - | subne PC, RB, #0x20000 - } - |2: - | ins_next - | - |3: // CARG12 is not an integer. - |.if FFI - | bhi >7 - |.else - if (!vk) { - | subhi PC, RB, #0x20000 - } - | bhi <2 - |.endif - |.if FPU - | checktp CARG4, LJ_TISNUM - | vmov s4, CARG3 - | vldr d0, [RA] - | vldrlo d1, [RC] - | vcvths.f64.s32 d1, s4 - | b >5 - |4: // CARG1 is an integer, d1 is a number. - | vmov s4, CARG1 - | vldr d1, [RC] - | vcvt.f64.s32 d0, s4 - |5: // d0 and d1 are numbers. - | vcmp.f64 d0, d1 - | vmrs - if (vk) { - | subeq PC, RB, #0x20000 - } else { - | subne PC, RB, #0x20000 - } - | b <2 - |.else - | // CARG12 is a number. - | checktp CARG4, LJ_TISNUM - | movlo RA, RB // Save RB. - | blo >5 - | // CARG12 is a number, CARG3 is an integer. - | mov CARG1, CARG3 - | mov RC, RA - |4: // CARG1 is an integer, CARG34 is a number. - | mov RA, RB // Save RB. - | bl extern __aeabi_i2d - | ldrd CARG34, [RC] // Restore other operand. - |5: // CARG12 and CARG34 are numbers. - | bl extern __aeabi_cdcmpeq - if (vk) { - | subeq PC, RA, #0x20000 - } else { - | subne PC, RA, #0x20000 - } - | b <2 - |.endif - | - |.if FFI - |7: - | checktp CARG2, LJ_TCDATA - | bne <1 - | b ->vmeta_equal_cd - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src*8, RC = primitive_type (~), JMP with RC = target - | ldrd CARG12, [BASE, RA] - | ldrh RB, [PC, #2] - | add PC, PC, #4 - | mvn RC, RC - | add RB, PC, RB, lsl #2 - |.if FFI - | checktp CARG2, LJ_TCDATA - | beq ->vmeta_equal_cd - |.endif - | cmp CARG2, RC - if (vk) { - | subeq PC, RB, #0x20000 - } else { - | subne PC, RB, #0x20000 - } - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst*8 or unused, RC = src, JMP with RC = target - | add RC, BASE, RC, lsl #3 - | ldrh RB, [PC, #2] - | ldrd CARG12, [RC] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | checktp CARG2, LJ_TTRUE - if (op == BC_ISTC || op == BC_IST) { - | subls PC, RB, #0x20000 - if (op == BC_ISTC) { - | strdls CARG12, [BASE, RA] - } - } else { - | subhi PC, RB, #0x20000 - if (op == BC_ISFC) { - | strdhi CARG12, [BASE, RA] - } - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src*8, RC = -type - | ldrd CARG12, [BASE, RA] - | ins_next1 - | cmn CARG2, RC - | ins_next2 - | bne ->vmeta_istype - | ins_next3 - break; - case BC_ISNUM: - | // RA = src*8, RC = -(TISNUM-1) - | ldrd CARG12, [BASE, RA] - | ins_next1 - | checktp CARG2, LJ_TISNUM - | ins_next2 - | bhs ->vmeta_istype - | ins_next3 - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst*8, RC = src - | lsl RC, RC, #3 - | ins_next1 - | ldrd CARG12, [BASE, RC] - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - case BC_NOT: - | // RA = dst*8, RC = src - | add RC, BASE, RC, lsl #3 - | ins_next1 - | ldr CARG1, [RC, #4] - | add RA, BASE, RA - | ins_next2 - | checktp CARG1, LJ_TTRUE - | mvnls CARG2, #~LJ_TFALSE - | mvnhi CARG2, #~LJ_TTRUE - | str CARG2, [RA, #4] - | ins_next3 - break; - case BC_UNM: - | // RA = dst*8, RC = src - | lsl RC, RC, #3 - | ldrd CARG12, [BASE, RC] - | ins_next1 - | ins_next2 - | checktp CARG2, LJ_TISNUM - | bhi ->vmeta_unm - | eorne CARG2, CARG2, #0x80000000 - | bne >5 - | rsbseq CARG1, CARG1, #0 - | ldrdvs CARG12, >9 - |5: - | strd CARG12, [BASE, RA] - | ins_next3 - | - |.align 8 - |9: - | .long 0x00000000, 0x41e00000 // 2^31. - break; - case BC_LEN: - | // RA = dst*8, RC = src - | lsl RC, RC, #3 - | ldrd CARG12, [BASE, RC] - | checkstr CARG2, >2 - | ldr CARG1, STR:CARG1->len - |1: - | mvn CARG2, #~LJ_TISNUM - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |2: - | checktab CARG2, ->vmeta_len -#if LJ_52 - | ldr TAB:CARG3, TAB:CARG1->metatable - | cmp TAB:CARG3, #0 - | bne >9 - |3: -#endif - |->BC_LEN_Z: - | .IOS mov RC, BASE - | bl extern lj_tab_len // (GCtab *t) - | // Returns uint32_t (but less than 2^31). - | .IOS mov BASE, RC - | b <1 -#if LJ_52 - |9: - | ldrb CARG4, TAB:CARG3->nomm - | tst CARG4, #1<vmeta_len -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro ins_arithcheck, cond, ncond, target - ||if (vk == 1) { - | cmn CARG4, #-LJ_TISNUM - | cmn..cond CARG2, #-LJ_TISNUM - ||} else { - | cmn CARG2, #-LJ_TISNUM - | cmn..cond CARG4, #-LJ_TISNUM - ||} - | b..ncond target - |.endmacro - |.macro ins_arithcheck_int, target - | ins_arithcheck eq, ne, target - |.endmacro - |.macro ins_arithcheck_num, target - | ins_arithcheck lo, hs, target - |.endmacro - | - |.macro ins_arithpre - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | .if FPU - | ldrd CARG12, [RB, BASE]! - | ldrd CARG34, [RC, KBASE]! - | .else - | ldrd CARG12, [BASE, RB] - | ldrd CARG34, [KBASE, RC] - | .endif - || break; - ||case 1: - | .if FPU - | ldrd CARG34, [RB, BASE]! - | ldrd CARG12, [RC, KBASE]! - | .else - | ldrd CARG34, [BASE, RB] - | ldrd CARG12, [KBASE, RC] - | .endif - || break; - ||default: - | .if FPU - | ldrd CARG12, [RB, BASE]! - | ldrd CARG34, [RC, BASE]! - | .else - | ldrd CARG12, [BASE, RB] - | ldrd CARG34, [BASE, RC] - | .endif - || break; - ||} - |.endmacro - | - |.macro ins_arithpre_fpu, reg1, reg2 - |.if FPU - ||if (vk == 1) { - | vldr reg2, [RB] - | vldr reg1, [RC] - ||} else { - | vldr reg1, [RB] - | vldr reg2, [RC] - ||} - |.endif - |.endmacro - | - |.macro ins_arithpost_fpu, reg - | ins_next1 - | add RA, BASE, RA - | ins_next2 - | vstr reg, [RA] - | ins_next3 - |.endmacro - | - |.macro ins_arithfallback, ins - ||switch (vk) { - ||case 0: - | ins ->vmeta_arith_vn - || break; - ||case 1: - | ins ->vmeta_arith_nv - || break; - ||default: - | ins ->vmeta_arith_vv - || break; - ||} - |.endmacro - | - |.macro ins_arithdn, intins, fpins, fpcall - | ins_arithpre - |.if "intins" ~= "vm_modi" and not FPU - | ins_next1 - |.endif - | ins_arithcheck_int >5 - |.if "intins" == "smull" - | smull CARG1, RC, CARG3, CARG1 - | cmp RC, CARG1, asr #31 - | ins_arithfallback bne - |.elif "intins" == "vm_modi" - | movs CARG2, CARG3 - | ins_arithfallback beq - | bl ->vm_modi - | mvn CARG2, #~LJ_TISNUM - |.else - | intins CARG1, CARG1, CARG3 - | ins_arithfallback bvs - |.endif - |4: - |.if "intins" == "vm_modi" or FPU - | ins_next1 - |.endif - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |5: // FP variant. - | ins_arithpre_fpu d6, d7 - | ins_arithfallback ins_arithcheck_num - |.if FPU - |.if "intins" == "vm_modi" - | bl fpcall - |.else - | fpins d6, d6, d7 - |.endif - | ins_arithpost_fpu d6 - |.else - | bl fpcall - |.if "intins" ~= "vm_modi" - | ins_next1 - |.endif - | b <4 - |.endif - |.endmacro - | - |.macro ins_arithfp, fpins, fpcall - | ins_arithpre - |.if "fpins" ~= "extern" or HFABI - | ins_arithpre_fpu d0, d1 - |.endif - | ins_arithfallback ins_arithcheck_num - |.if "fpins" == "extern" - | .IOS mov RC, BASE - | bl fpcall - | .IOS mov BASE, RC - |.elif FPU - | fpins d0, d0, d1 - |.else - | bl fpcall - |.endif - |.if ("fpins" ~= "extern" or HFABI) and FPU - | ins_arithpost_fpu d0 - |.else - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |.endif - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arithdn adds, vadd.f64, extern __aeabi_dadd - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arithdn subs, vsub.f64, extern __aeabi_dsub - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arithdn smull, vmul.f64, extern __aeabi_dmul - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arithfp vdiv.f64, extern __aeabi_ddiv - break; - case BC_MODVN: case BC_MODNV: case BC_MODVV: - | ins_arithdn vm_modi, vm_mod, ->vm_mod - break; - case BC_POW: - | // NYI: (partial) integer arithmetic. - | ins_arithfp extern, extern pow - break; - - case BC_CAT: - | decode_RB8 RC, INS - | decode_RC8 RB, INS - | // RA = dst*8, RC = src_start*8, RB = src_end*8 (note: RB/RC swapped!) - | sub CARG3, RB, RC - | str BASE, L->base - | add CARG2, BASE, RB - |->BC_CAT_Z: - | // RA = dst*8, RC = src_start*8, CARG2 = top-1 - | mov CARG1, L - | str PC, SAVE_PC - | lsr CARG3, CARG3, #3 - | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) - | // Returns NULL (finished) or TValue * (metamethod). - | ldr BASE, L->base - | cmp CRET1, #0 - | bne ->vmeta_binop - | ldrd CARG34, [BASE, RC] - | ins_next1 - | ins_next2 - | strd CARG34, [BASE, RA] // Copy result to RA. - | ins_next3 - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst*8, RC = str_const (~) - | mvn RC, RC - | ins_next1 - | ldr CARG1, [KBASE, RC, lsl #2] - | mvn CARG2, #~LJ_TSTR - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - case BC_KCDATA: - |.if FFI - | // RA = dst*8, RC = cdata_const (~) - | mvn RC, RC - | ins_next1 - | ldr CARG1, [KBASE, RC, lsl #2] - | mvn CARG2, #~LJ_TCDATA - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |.endif - break; - case BC_KSHORT: - | // RA = dst*8, (RC = int16_literal) - | mov CARG1, INS, asr #16 // Refetch sign-extended reg. - | mvn CARG2, #~LJ_TISNUM - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - case BC_KNUM: - | // RA = dst*8, RC = num_const - | lsl RC, RC, #3 - | ins_next1 - | ldrd CARG12, [KBASE, RC] - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - case BC_KPRI: - | // RA = dst*8, RC = primitive_type (~) - | add RA, BASE, RA - | mvn RC, RC - | ins_next1 - | ins_next2 - | str RC, [RA, #4] - | ins_next3 - break; - case BC_KNIL: - | // RA = base*8, RC = end - | add RA, BASE, RA - | add RC, BASE, RC, lsl #3 - | mvn CARG1, #~LJ_TNIL - | str CARG1, [RA, #4] - | add RA, RA, #8 - |1: - | str CARG1, [RA, #4] - | cmp RA, RC - | add RA, RA, #8 - | blt <1 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst*8, RC = uvnum - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsl RC, RC, #2 - | add RC, RC, #offsetof(GCfuncL, uvptr) - | ldr UPVAL:CARG2, [LFUNC:CARG2, RC] - | ldr CARG2, UPVAL:CARG2->v - | ldrd CARG34, [CARG2] - | ins_next1 - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - break; - case BC_USETV: - | // RA = uvnum*8, RC = src - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsr RA, RA, #1 - | add RA, RA, #offsetof(GCfuncL, uvptr) - | lsl RC, RC, #3 - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] - | ldrd CARG34, [BASE, RC] - | ldrb RB, UPVAL:CARG2->marked - | ldrb RC, UPVAL:CARG2->closed - | ldr CARG2, UPVAL:CARG2->v - | tst RB, #LJ_GC_BLACK // isblack(uv) - | add RB, CARG4, #-LJ_TISGCV - | cmpne RC, #0 - | strd CARG34, [CARG2] - | bne >2 // Upvalue is closed and black? - |1: - | ins_next - | - |2: // Check if new value is collectable. - | cmn RB, #-(LJ_TNUMX - LJ_TISGCV) - | ldrbhi RC, GCOBJ:CARG3->gch.marked - | bls <1 // tvisgcv(v) - | sub CARG1, DISPATCH, #-GG_DISP2G - | tst RC, #LJ_GC_WHITES - | // Crossed a write barrier. Move the barrier forward. - |.if IOS - | beq <1 - | mov RC, BASE - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | mov BASE, RC - |.else - | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv) - |.endif - | b <1 - break; - case BC_USETS: - | // RA = uvnum*8, RC = str_const (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsr RA, RA, #1 - | add RA, RA, #offsetof(GCfuncL, uvptr) - | mvn RC, RC - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] - | ldr STR:CARG3, [KBASE, RC, lsl #2] - | ldrb RB, UPVAL:CARG2->marked - | ldrb RC, UPVAL:CARG2->closed - | ldr CARG2, UPVAL:CARG2->v - | mvn CARG4, #~LJ_TSTR - | tst RB, #LJ_GC_BLACK // isblack(uv) - | ldrb RB, STR:CARG3->marked - | strd CARG34, [CARG2] - | bne >2 - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | tst RB, #LJ_GC_WHITES // iswhite(str) - | cmpne RC, #0 - | sub CARG1, DISPATCH, #-GG_DISP2G - | // Crossed a write barrier. Move the barrier forward. - |.if IOS - | beq <1 - | mov RC, BASE - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | mov BASE, RC - |.else - | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv) - |.endif - | b <1 - break; - case BC_USETN: - | // RA = uvnum*8, RC = num_const - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsr RA, RA, #1 - | add RA, RA, #offsetof(GCfuncL, uvptr) - | lsl RC, RC, #3 - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] - | ldrd CARG34, [KBASE, RC] - | ldr CARG2, UPVAL:CARG2->v - | ins_next1 - | ins_next2 - | strd CARG34, [CARG2] - | ins_next3 - break; - case BC_USETP: - | // RA = uvnum*8, RC = primitive_type (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | lsr RA, RA, #1 - | add RA, RA, #offsetof(GCfuncL, uvptr) - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] - | mvn RC, RC - | ldr CARG2, UPVAL:CARG2->v - | ins_next1 - | ins_next2 - | str RC, [CARG2, #4] - | ins_next3 - break; - - case BC_UCLO: - | // RA = level*8, RC = target - | ldr CARG3, L->openupval - | add RC, PC, RC, lsl #2 - | str BASE, L->base - | cmp CARG3, #0 - | sub PC, RC, #0x20000 - | beq >1 - | mov CARG1, L - | add CARG2, BASE, RA - | bl extern lj_func_closeuv // (lua_State *L, TValue *level) - | ldr BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst*8, RC = proto_const (~) (holding function prototype) - | mvn RC, RC - | str BASE, L->base - | ldr CARG2, [KBASE, RC, lsl #2] - | str PC, SAVE_PC - | ldr CARG3, [BASE, FRAME_FUNC] - | mov CARG1, L - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | bl extern lj_func_newL_gc - | // Returns GCfuncL *. - | ldr BASE, L->base - | mvn CARG2, #~LJ_TFUNC - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst*8, RC = (hbits|asize) | tab_const (~) - if (op == BC_TDUP) { - | mvn RC, RC - } - | ldr CARG3, [DISPATCH, #DISPATCH_GL(gc.total)] - | ldr CARG4, [DISPATCH, #DISPATCH_GL(gc.threshold)] - | str BASE, L->base - | str PC, SAVE_PC - | cmp CARG3, CARG4 - | mov CARG1, L - | bhs >5 - |1: - if (op == BC_TNEW) { - | lsl CARG2, RC, #21 - | lsr CARG3, RC, #11 - | asr RC, CARG2, #21 - | lsr CARG2, CARG2, #21 - | cmn RC, #1 - | addeq CARG2, CARG2, #2 - | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) - | // Returns GCtab *. - } else { - | ldr CARG2, [KBASE, RC, lsl #2] - | bl extern lj_tab_dup // (lua_State *L, Table *kt) - | // Returns GCtab *. - } - | ldr BASE, L->base - | mvn CARG2, #~LJ_TTAB - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - |5: - | bl extern lj_gc_step_fixtop // (lua_State *L) - | mov CARG1, L - | b <1 - break; - - case BC_GGET: - | // RA = dst*8, RC = str_const (~) - case BC_GSET: - | // RA = dst*8, RC = str_const (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | mvn RC, RC - | ldr TAB:CARG1, LFUNC:CARG2->env - | ldr STR:RC, [KBASE, RC, lsl #2] - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - break; - - case BC_TGETV: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = dst*8, RB = table*8, RC = key*8 - | ldrd TAB:CARG12, [BASE, RB] - | ldrd CARG34, [BASE, RC] - | checktab CARG2, ->vmeta_tgetv // STALL: load CARG12. - | checktp CARG4, LJ_TISNUM // Integer key? - | ldreq CARG4, TAB:CARG1->array - | ldreq CARG2, TAB:CARG1->asize - | bne >9 - | - | add CARG4, CARG4, CARG3, lsl #3 - | cmp CARG3, CARG2 // In array part? - | ldrdlo CARG34, [CARG4] - | bhs ->vmeta_tgetv - | ins_next1 // Overwrites RB! - | checktp CARG4, LJ_TNIL - | beq >5 - |1: - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG2, TAB:CARG1->metatable - | cmp TAB:CARG2, #0 - | beq <1 // No metatable: done. - | ldrb CARG2, TAB:CARG2->nomm - | tst CARG2, #1<vmeta_tgetv - | - |9: - | checktp CARG4, LJ_TSTR // String key? - | moveq STR:RC, CARG3 - | beq ->BC_TGETS_Z - | b ->vmeta_tgetv - break; - case BC_TGETS: - | decode_RB8 RB, INS - | and RC, RC, #255 - | // RA = dst*8, RB = table*8, RC = str_const (~) - | ldrd CARG12, [BASE, RB] - | mvn RC, RC - | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC. - | checktab CARG2, ->vmeta_tgets1 - |->BC_TGETS_Z: - | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 - | ldr CARG3, TAB:CARG1->hmask - | ldr CARG4, STR:RC->hash - | ldr NODE:INS, TAB:CARG1->node - | mov TAB:RB, TAB:CARG1 - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask - | add CARG3, CARG3, CARG3, lsl #1 - | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 - |1: - | ldrd CARG12, NODE:INS->key // STALL: early NODE:INS. - | ldrd CARG34, NODE:INS->val - | ldr NODE:INS, NODE:INS->next - | checktp CARG2, LJ_TSTR - | cmpeq CARG1, STR:RC - | bne >4 - | checktp CARG4, LJ_TNIL - | beq >5 - |3: - | ins_next1 - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - | - |4: // Follow hash chain. - | cmp NODE:INS, #0 - | bne <1 - | // End of hash chain: key not found, nil result. - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG1, TAB:RB->metatable - | mov CARG3, #0 // Optional clear of undef. value (during load stall). - | mvn CARG4, #~LJ_TNIL - | cmp TAB:CARG1, #0 - | beq <3 // No metatable: done. - | ldrb CARG2, TAB:CARG1->nomm - | tst CARG2, #1<vmeta_tgets - break; - case BC_TGETB: - | decode_RB8 RB, INS - | and RC, RC, #255 - | // RA = dst*8, RB = table*8, RC = index - | ldrd CARG12, [BASE, RB] - | checktab CARG2, ->vmeta_tgetb // STALL: load CARG12. - | ldr CARG3, TAB:CARG1->asize - | ldr CARG4, TAB:CARG1->array - | lsl CARG2, RC, #3 - | cmp RC, CARG3 - | ldrdlo CARG34, [CARG4, CARG2] - | bhs ->vmeta_tgetb - | ins_next1 // Overwrites RB! - | checktp CARG4, LJ_TNIL - | beq >5 - |1: - | ins_next2 - | strd CARG34, [BASE, RA] - | ins_next3 - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG2, TAB:CARG1->metatable - | cmp TAB:CARG2, #0 - | beq <1 // No metatable: done. - | ldrb CARG2, TAB:CARG2->nomm - | tst CARG2, #1<vmeta_tgetb - break; - case BC_TGETR: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = dst*8, RB = table*8, RC = key*8 - | ldr TAB:CARG1, [BASE, RB] - | ldr CARG2, [BASE, RC] - | ldr CARG4, TAB:CARG1->array - | ldr CARG3, TAB:CARG1->asize - | add CARG4, CARG4, CARG2, lsl #3 - | cmp CARG2, CARG3 // In array part? - | bhs ->vmeta_tgetr - | ldrd CARG12, [CARG4] - |->BC_TGETR_Z: - | ins_next1 - | ins_next2 - | strd CARG12, [BASE, RA] - | ins_next3 - break; - - case BC_TSETV: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = src*8, RB = table*8, RC = key*8 - | ldrd TAB:CARG12, [BASE, RB] - | ldrd CARG34, [BASE, RC] - | checktab CARG2, ->vmeta_tsetv // STALL: load CARG12. - | checktp CARG4, LJ_TISNUM // Integer key? - | ldreq CARG2, TAB:CARG1->array - | ldreq CARG4, TAB:CARG1->asize - | bne >9 - | - | add CARG2, CARG2, CARG3, lsl #3 - | cmp CARG3, CARG4 // In array part? - | ldrlo INS, [CARG2, #4] - | bhs ->vmeta_tsetv - | ins_next1 // Overwrites RB! - | checktp INS, LJ_TNIL - | ldrb INS, TAB:CARG1->marked - | ldrd CARG34, [BASE, RA] - | beq >5 - |1: - | tst INS, #LJ_GC_BLACK // isblack(table) - | strd CARG34, [CARG2] - | bne >7 - |2: - | ins_next2 - | ins_next3 - | - |5: // Check for __newindex if previous value is nil. - | ldr TAB:RA, TAB:CARG1->metatable - | cmp TAB:RA, #0 - | beq <1 // No metatable: done. - | ldrb RA, TAB:RA->nomm - | tst RA, #1<vmeta_tsetv - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG1, INS, CARG3 - | b <2 - | - |9: - | checktp CARG4, LJ_TSTR // String key? - | moveq STR:RC, CARG3 - | beq ->BC_TSETS_Z - | b ->vmeta_tsetv - break; - case BC_TSETS: - | decode_RB8 RB, INS - | and RC, RC, #255 - | // RA = src*8, RB = table*8, RC = str_const (~) - | ldrd CARG12, [BASE, RB] - | mvn RC, RC - | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC. - | checktab CARG2, ->vmeta_tsets1 - |->BC_TSETS_Z: - | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 - | ldr CARG3, TAB:CARG1->hmask - | ldr CARG4, STR:RC->hash - | ldr NODE:INS, TAB:CARG1->node - | mov TAB:RB, TAB:CARG1 - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask - | add CARG3, CARG3, CARG3, lsl #1 - | mov CARG4, #0 - | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 - | strb CARG4, TAB:RB->nomm // Clear metamethod cache. - |1: - | ldrd CARG12, NODE:INS->key - | ldr CARG4, NODE:INS->val.it - | ldr NODE:CARG3, NODE:INS->next - | checktp CARG2, LJ_TSTR - | cmpeq CARG1, STR:RC - | bne >5 - | ldrb CARG2, TAB:RB->marked - | checktp CARG4, LJ_TNIL // Key found, but nil value? - | ldrd CARG34, [BASE, RA] - | beq >4 - |2: - | tst CARG2, #LJ_GC_BLACK // isblack(table) - | strd CARG34, NODE:INS->val - | bne >7 - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | ldr TAB:CARG1, TAB:RB->metatable - | cmp TAB:CARG1, #0 - | beq <2 // No metatable: done. - | ldrb CARG1, TAB:CARG1->nomm - | tst CARG1, #1<vmeta_tsets - | - |5: // Follow hash chain. - | movs NODE:INS, NODE:CARG3 - | bne <1 - | // End of hash chain: key not found, add a new one. - | - | // But check for __newindex first. - | ldr TAB:CARG1, TAB:RB->metatable - | mov CARG3, TMPDp - | str PC, SAVE_PC - | cmp TAB:CARG1, #0 // No metatable: continue. - | str BASE, L->base - | ldrbne CARG2, TAB:CARG1->nomm - | mov CARG1, L - | beq >6 - | tst CARG2, #1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |6: - | mvn CARG4, #~LJ_TSTR - | str STR:RC, TMPDlo - | mov CARG2, TAB:RB - | str CARG4, TMPDhi - | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Returns TValue *. - | ldr BASE, L->base - | ldrd CARG34, [BASE, RA] - | strd CARG34, [CRET1] - | b <3 // No 2nd write barrier needed. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, CARG2, CARG3 - | b <3 - break; - case BC_TSETB: - | decode_RB8 RB, INS - | and RC, RC, #255 - | // RA = src*8, RB = table*8, RC = index - | ldrd CARG12, [BASE, RB] - | checktab CARG2, ->vmeta_tsetb // STALL: load CARG12. - | ldr CARG3, TAB:CARG1->asize - | ldr RB, TAB:CARG1->array - | lsl CARG2, RC, #3 - | cmp RC, CARG3 - | ldrdlo CARG34, [CARG2, RB]! - | bhs ->vmeta_tsetb - | ins_next1 // Overwrites RB! - | checktp CARG4, LJ_TNIL - | ldrb INS, TAB:CARG1->marked - | ldrd CARG34, [BASE, RA] - | beq >5 - |1: - | tst INS, #LJ_GC_BLACK // isblack(table) - | strd CARG34, [CARG2] - | bne >7 - |2: - | ins_next2 - | ins_next3 - | - |5: // Check for __newindex if previous value is nil. - | ldr TAB:RA, TAB:CARG1->metatable - | cmp TAB:RA, #0 - | beq <1 // No metatable: done. - | ldrb RA, TAB:RA->nomm - | tst RA, #1<vmeta_tsetb - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG1, INS, CARG3 - | b <2 - break; - case BC_TSETR: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = src*8, RB = table*8, RC = key*8 - | ldr TAB:CARG2, [BASE, RB] - | ldr CARG3, [BASE, RC] - | ldrb INS, TAB:CARG2->marked - | ldr CARG1, TAB:CARG2->array - | ldr CARG4, TAB:CARG2->asize - | tst INS, #LJ_GC_BLACK // isblack(table) - | add CARG1, CARG1, CARG3, lsl #3 - | bne >7 - |2: - | cmp CARG3, CARG4 // In array part? - | bhs ->vmeta_tsetr - |->BC_TSETR_Z: - | ldrd CARG34, [BASE, RA] - | ins_next1 - | ins_next2 - | strd CARG34, [CARG1] - | ins_next3 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, INS, RB - | b <2 - break; - - case BC_TSETM: - | // RA = base*8 (table at base-1), RC = num_const (start index) - | add RA, BASE, RA - |1: - | ldr RB, SAVE_MULTRES - | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table. - | ldr CARG1, [KBASE, RC, lsl #3] // Integer constant is in lo-word. - | subs RB, RB, #8 - | ldr CARG4, TAB:CARG2->asize - | beq >4 // Nothing to copy? - | add CARG3, CARG1, RB, lsr #3 - | cmp CARG3, CARG4 - | ldr CARG4, TAB:CARG2->array - | add RB, RA, RB - | bhi >5 - | add INS, CARG4, CARG1, lsl #3 - | ldrb CARG1, TAB:CARG2->marked - |3: // Copy result slots to table. - | ldrd CARG34, [RA], #8 - | strd CARG34, [INS], #8 - | cmp RA, RB - | blo <3 - | tst CARG1, #LJ_GC_BLACK // isblack(table) - | bne >7 - |4: - | ins_next - | - |5: // Need to resize array part. - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - | // Must not reallocate the stack. - | .IOS ldr BASE, L->base - | b <1 - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, CARG1, CARG3 - | b <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base*8, (RB = nresults+1,) RC = extra_nargs - | ldr CARG1, SAVE_MULTRES - | decode_RC8 NARGS8:RC, INS - | add NARGS8:RC, NARGS8:RC, CARG1 - | b ->BC_CALL_Z - break; - case BC_CALL: - | decode_RC8 NARGS8:RC, INS - | // RA = base*8, (RB = nresults+1,) RC = (nargs+1)*8 - |->BC_CALL_Z: - | mov RB, BASE // Save old BASE for vmeta_call. - | ldrd CARG34, [BASE, RA]! - | sub NARGS8:RC, NARGS8:RC, #8 - | add BASE, BASE, #8 - | checkfunc CARG4, ->vmeta_call - | ins_call - break; - - case BC_CALLMT: - | // RA = base*8, (RB = 0,) RC = extra_nargs - | ldr CARG1, SAVE_MULTRES - | add NARGS8:RC, CARG1, RC, lsl #3 - | b ->BC_CALLT1_Z - break; - case BC_CALLT: - | lsl NARGS8:RC, RC, #3 - | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - |->BC_CALLT1_Z: - | ldrd LFUNC:CARG34, [RA, BASE]! - | sub NARGS8:RC, NARGS8:RC, #8 - | add RA, RA, #8 - | checkfunc CARG4, ->vmeta_callt - | ldr PC, [BASE, FRAME_PC] - |->BC_CALLT2_Z: - | mov RB, #0 - | ldrb CARG4, LFUNC:CARG3->ffid - | tst PC, #FRAME_TYPE - | bne >7 - |1: - | str LFUNC:CARG3, [BASE, FRAME_FUNC] // Copy function down, but keep PC. - | cmp NARGS8:RC, #0 - | beq >3 - |2: - | ldrd CARG12, [RA, RB] - | add INS, RB, #8 - | cmp INS, NARGS8:RC - | strd CARG12, [BASE, RB] - | mov RB, INS - | bne <2 - |3: - | cmp CARG4, #1 // (> FF_C) Calling a fast function? - | bhi >5 - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | ldr INS, [PC, #-4] - | decode_RA8 RA, INS - | sub CARG1, BASE, RA - | ldr LFUNC:CARG1, [CARG1, #-16] - | ldr CARG1, LFUNC:CARG1->field_pc - | ldr KBASE, [CARG1, #PC2PROTO(k)] - | b <4 - | - |7: // Tailcall from a vararg function. - | eor PC, PC, #FRAME_VARG - | tst PC, #FRAME_TYPEP // Vararg frame below? - | movne CARG4, #0 // Clear ffid if no Lua function below. - | bne <1 - | sub BASE, BASE, PC - | ldr PC, [BASE, FRAME_PC] - | tst PC, #FRAME_TYPE - | movne CARG4, #0 // Clear ffid if no Lua function below. - | b <1 - break; - - case BC_ITERC: - | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) - | add RA, BASE, RA - | mov RB, BASE // Save old BASE for vmeta_call. - | ldrd CARG34, [RA, #-16] - | ldrd CARG12, [RA, #-8] - | add BASE, RA, #8 - | strd CARG34, [RA, #8] // Copy state. - | strd CARG12, [RA, #16] // Copy control var. - | // STALL: locked CARG34. - | ldrd LFUNC:CARG34, [RA, #-24] - | mov NARGS8:RC, #16 // Iterators get 2 arguments. - | // STALL: load CARG34. - | strd LFUNC:CARG34, [RA] // Copy callable. - | checkfunc CARG4, ->vmeta_call - | ins_call - break; - - case BC_ITERN: - | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | add RA, BASE, RA - | ldr TAB:RB, [RA, #-16] - | ldr CARG1, [RA, #-8] // Get index from control var. - | ldr INS, TAB:RB->asize - | ldr CARG2, TAB:RB->array - | add PC, PC, #4 - |1: // Traverse array part. - | subs RC, CARG1, INS - | add CARG3, CARG2, CARG1, lsl #3 - | bhs >5 // Index points after array part? - | ldrd CARG34, [CARG3] - | checktp CARG4, LJ_TNIL - | addeq CARG1, CARG1, #1 // Skip holes in array part. - | beq <1 - | ldrh RC, [PC, #-2] - | mvn CARG2, #~LJ_TISNUM - | strd CARG34, [RA, #8] - | add RC, PC, RC, lsl #2 - | add RB, CARG1, #1 - | strd CARG12, [RA] - | sub PC, RC, #0x20000 - | str RB, [RA, #-8] // Update control var. - |3: - | ins_next - | - |5: // Traverse hash part. - | ldr CARG4, TAB:RB->hmask - | ldr NODE:RB, TAB:RB->node - |6: - | add CARG1, RC, RC, lsl #1 - | cmp RC, CARG4 // End of iteration? Branch to ITERL+1. - | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8 - | bhi <3 - | ldrd CARG12, NODE:CARG3->val - | checktp CARG2, LJ_TNIL - | add RC, RC, #1 - | beq <6 // Skip holes in hash part. - | ldrh RB, [PC, #-2] - | add RC, RC, INS - | ldrd CARG34, NODE:CARG3->key - | str RC, [RA, #-8] // Update control var. - | strd CARG12, [RA, #8] - | add RC, PC, RB, lsl #2 - | sub PC, RC, #0x20000 - | strd CARG34, [RA] - | b <3 - break; - - case BC_ISNEXT: - | // RA = base*8, RC = target (points to ITERN) - | add RA, BASE, RA - | add RC, PC, RC, lsl #2 - | ldrd CFUNC:CARG12, [RA, #-24] - | ldr CARG3, [RA, #-12] - | ldr CARG4, [RA, #-4] - | checktp CARG2, LJ_TFUNC - | ldrbeq CARG1, CFUNC:CARG1->ffid - | checktpeq CARG3, LJ_TTAB - | checktpeq CARG4, LJ_TNIL - | cmpeq CARG1, #FF_next_N - | subeq PC, RC, #0x20000 - | bne >5 - | ins_next1 - | ins_next2 - | mov CARG1, #0 - | mvn CARG2, #0x00018000 - | strd CARG1, [RA, #-8] // Initialize control var. - |1: - | ins_next3 - |5: // Despecialize bytecode if any of the checks fail. - | mov CARG1, #BC_JMP - | mov OP, #BC_ITERC - | strb CARG1, [PC, #-4] - | sub PC, RC, #0x20000 - | strb OP, [PC] // Subsumes ins_next1. - | ins_next2 - | b <1 - break; - - case BC_VARG: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 - | ldr CARG1, [BASE, FRAME_PC] - | add RC, BASE, RC - | add RA, BASE, RA - | add RC, RC, #FRAME_VARG - | add CARG4, RA, RB - | sub CARG3, BASE, #8 // CARG3 = vtop - | sub RC, RC, CARG1 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | cmp RB, #0 - | sub CARG1, CARG3, RC - | beq >5 // Copy all varargs? - | sub CARG4, CARG4, #16 - |1: // Copy vararg slots to destination slots. - | cmp RC, CARG3 - | ldrdlo CARG12, [RC], #8 - | mvnhs CARG2, #~LJ_TNIL - | cmp RA, CARG4 - | strd CARG12, [RA], #8 - | blo <1 - |2: - | ins_next - | - |5: // Copy all varargs. - | ldr CARG4, L->maxstack - | cmp CARG1, #0 - | movle RB, #8 // MULTRES = (0+1)*8 - | addgt RB, CARG1, #8 - | add CARG2, RA, CARG1 - | str RB, SAVE_MULTRES - | ble <2 - | cmp CARG2, CARG4 - | bhi >7 - |6: - | ldrd CARG12, [RC], #8 - | strd CARG12, [RA], #8 - | cmp RC, CARG3 - | blo <6 - | b <2 - | - |7: // Grow stack for varargs. - | lsr CARG2, CARG1, #3 - | str RA, L->top - | mov CARG1, L - | str BASE, L->base - | sub RC, RC, BASE // Need delta, because BASE may change. - | str PC, SAVE_PC - | sub RA, RA, BASE - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->base - | add RA, BASE, RA - | add RC, BASE, RC - | sub CARG3, BASE, #8 - | b <6 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results*8, RC = extra results - | ldr CARG1, SAVE_MULTRES - | ldr PC, [BASE, FRAME_PC] - | add RA, BASE, RA - | add RC, CARG1, RC, lsl #3 - | b ->BC_RETM_Z - break; - - case BC_RET: - | // RA = results*8, RC = nresults+1 - | ldr PC, [BASE, FRAME_PC] - | lsl RC, RC, #3 - | add RA, BASE, RA - |->BC_RETM_Z: - | str RC, SAVE_MULTRES - |1: - | ands CARG1, PC, #FRAME_TYPE - | eor CARG2, PC, #FRAME_VARG - | bne ->BC_RETV2_Z - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return - | ldr INS, [PC, #-4] - | subs CARG4, RC, #8 - | sub CARG3, BASE, #8 - | beq >3 - |2: - | ldrd CARG12, [RA], #8 - | add BASE, BASE, #8 - | subs CARG4, CARG4, #8 - | strd CARG12, [BASE, #-16] - | bne <2 - |3: - | decode_RA8 RA, INS - | sub CARG4, CARG3, RA - | decode_RB8 RB, INS - | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC] - |5: - | cmp RB, RC // More results expected? - | bhi >6 - | mov BASE, CARG4 - | ldr CARG2, LFUNC:CARG1->field_pc - | ins_next1 - | ins_next2 - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | ins_next3 - | - |6: // Fill up results with nil. - | mvn CARG2, #~LJ_TNIL - | add BASE, BASE, #8 - | add RC, RC, #8 - | str CARG2, [BASE, #-12] - | b <5 - | - |->BC_RETV1_Z: // Non-standard return case. - | add RA, BASE, RA - |->BC_RETV2_Z: - | tst CARG2, #FRAME_TYPEP - | bne ->vm_return - | // Return from vararg function: relocate BASE down. - | sub BASE, BASE, CARG2 - | ldr PC, [BASE, FRAME_PC] - | b <1 - break; - - case BC_RET0: case BC_RET1: - | // RA = results*8, RC = nresults+1 - | ldr PC, [BASE, FRAME_PC] - | lsl RC, RC, #3 - | str RC, SAVE_MULTRES - | ands CARG1, PC, #FRAME_TYPE - | eor CARG2, PC, #FRAME_VARG - | ldreq INS, [PC, #-4] - | bne ->BC_RETV1_Z - if (op == BC_RET1) { - | ldrd CARG12, [BASE, RA] - } - | sub CARG4, BASE, #8 - | decode_RA8 RA, INS - if (op == BC_RET1) { - | strd CARG12, [CARG4] - } - | sub BASE, CARG4, RA - | decode_RB8 RB, INS - | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] - |5: - | cmp RB, RC - | bhi >6 - | ldr CARG2, LFUNC:CARG1->field_pc - | ins_next1 - | ins_next2 - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | ins_next3 - | - |6: // Fill up results with nil. - | sub CARG2, CARG4, #4 - | mvn CARG3, #~LJ_TNIL - | str CARG3, [CARG2, RC] - | add RC, RC, #8 - | b <5 - break; - - /* -- Loops and branches ------------------------------------------------ */ - - |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4] - |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12] - |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20] - |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28] - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base*8, RC = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - | ldrd CARG12, [RA, BASE]! - if (op != BC_JFORL) { - | add RC, PC, RC, lsl #2 - } - if (!vk) { - | ldrd CARG34, FOR_STOP - | checktp CARG2, LJ_TISNUM - | ldr RB, FOR_TSTEP - | bne >5 - | checktp CARG4, LJ_TISNUM - | ldr CARG4, FOR_STEP - | checktpeq RB, LJ_TISNUM - | bne ->vmeta_for - | cmp CARG4, #0 - | blt >4 - | cmp CARG1, CARG3 - } else { - | ldrd CARG34, FOR_STEP - | checktp CARG2, LJ_TISNUM - | bne >5 - | adds CARG1, CARG1, CARG3 - | ldr CARG4, FOR_STOP - if (op == BC_IFORL) { - | addvs RC, PC, #0x20000 // Overflow: prevent branch. - } else { - | bvs >2 // Overflow: do not enter mcode. - } - | cmp CARG3, #0 - | blt >4 - | cmp CARG1, CARG4 - } - |1: - if (op == BC_FORI) { - | subgt PC, RC, #0x20000 - } else if (op == BC_JFORI) { - | sub PC, RC, #0x20000 - | ldrhle RC, [PC, #-2] - } else if (op == BC_IFORL) { - | suble PC, RC, #0x20000 - } - if (vk) { - | strd CARG12, FOR_IDX - } - |2: - | ins_next1 - | ins_next2 - | strd CARG12, FOR_EXT - if (op == BC_JFORI || op == BC_JFORL) { - | ble =>BC_JLOOP - } - |3: - | ins_next3 - | - |4: // Invert check for negative step. - if (!vk) { - | cmp CARG3, CARG1 - } else { - | cmp CARG4, CARG1 - } - | b <1 - | - |5: // FP loop. - if (!vk) { - | cmnlo CARG4, #-LJ_TISNUM - | cmnlo RB, #-LJ_TISNUM - | bhs ->vmeta_for - |.if FPU - | vldr d0, FOR_IDX - | vldr d1, FOR_STOP - | cmp RB, #0 - | vstr d0, FOR_EXT - |.else - | cmp RB, #0 - | strd CARG12, FOR_EXT - | blt >8 - |.endif - } else { - |.if FPU - | vldr d0, FOR_IDX - | vldr d2, FOR_STEP - | vldr d1, FOR_STOP - | cmp CARG4, #0 - | vadd.f64 d0, d0, d2 - |.else - | cmp CARG4, #0 - | blt >8 - | bl extern __aeabi_dadd - | strd CARG12, FOR_IDX - | ldrd CARG34, FOR_STOP - | strd CARG12, FOR_EXT - |.endif - } - |6: - |.if FPU - | vcmpge.f64 d0, d1 - | vcmplt.f64 d1, d0 - | vmrs - |.else - | bl extern __aeabi_cdcmple - |.endif - if (vk) { - |.if FPU - | vstr d0, FOR_IDX - | vstr d0, FOR_EXT - |.endif - } - if (op == BC_FORI) { - | subhi PC, RC, #0x20000 - } else if (op == BC_JFORI) { - | sub PC, RC, #0x20000 - | ldrhls RC, [PC, #-2] - | bls =>BC_JLOOP - } else if (op == BC_IFORL) { - | subls PC, RC, #0x20000 - } else { - | bls =>BC_JLOOP - } - | ins_next1 - | ins_next2 - | b <3 - | - |.if not FPU - |8: // Invert check for negative step. - if (vk) { - | bl extern __aeabi_dadd - | strd CARG12, FOR_IDX - | strd CARG12, FOR_EXT - } - | mov CARG3, CARG1 - | mov CARG4, CARG2 - | ldrd CARG12, FOR_STOP - | b <6 - |.endif - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base*8, RC = target - | ldrd CARG12, [RA, BASE]! - if (op == BC_JITERL) { - | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil. - | strdne CARG12, [RA, #-8] - | bne =>BC_JLOOP - } else { - | add RC, PC, RC, lsl #2 - | // STALL: load CARG12. - | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil. - | subne PC, RC, #0x20000 // Otherwise save control var + branch. - | strdne CARG12, [RA, #-8] - } - | ins_next - break; - - case BC_LOOP: - | // RA = base*8, RC = target (loop extent) - | // Note: RA/RC is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base*8, RC = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base (ignored), RC = traceno - | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)] - | mov CARG2, #0 // Traces on ARM don't store the trace number, so use 0. - | ldr TRACE:RC, [CARG1, RC, lsl #2] - | st_vmstate CARG2 - | ldr RA, TRACE:RC->mcode - | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] - | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)] - | bx RA - |.endif - break; - - case BC_JMP: - | // RA = base*8 (only used by trace recorder), RC = target - | add RC, PC, RC, lsl #2 - | sub PC, RC, #0x20000 - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 - | ldr CARG1, L->maxstack - | ldrb CARG2, [PC, #-4+PC2PROTO(numparams)] - | ldr KBASE, [PC, #-4+PC2PROTO(k)] - | cmp RA, CARG1 - | bhi ->vm_growstack_l - if (op != BC_JFUNCF) { - | ins_next1 - | ins_next2 - } - |2: - | cmp NARGS8:RC, CARG2, lsl #3 // Check for missing parameters. - | mvn CARG4, #~LJ_TNIL - | blo >3 - if (op == BC_JFUNCF) { - | decode_RD RC, INS - | b =>BC_JLOOP - } else { - | ins_next3 - } - | - |3: // Clear missing parameters. - | strd CARG34, [BASE, NARGS8:RC] - | add NARGS8:RC, NARGS8:RC, #8 - | b <2 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 - | ldr CARG1, L->maxstack - | add CARG4, BASE, RC - | add RA, RA, RC - | str LFUNC:CARG3, [CARG4] // Store copy of LFUNC. - | add CARG2, RC, #8+FRAME_VARG - | ldr KBASE, [PC, #-4+PC2PROTO(k)] - | cmp RA, CARG1 - | str CARG2, [CARG4, #4] // Store delta + FRAME_VARG. - | bhs ->vm_growstack_l - | ldrb RB, [PC, #-4+PC2PROTO(numparams)] - | mov RA, BASE - | mov RC, CARG4 - | cmp RB, #0 - | add BASE, CARG4, #8 - | beq >3 - | mvn CARG3, #~LJ_TNIL - |1: - | cmp RA, RC // Less args than parameters? - | ldrdlo CARG12, [RA], #8 - | movhs CARG2, CARG3 - | strlo CARG3, [RA, #-4] // Clear old fixarg slot (help the GC). - |2: - | subs RB, RB, #1 - | strd CARG12, [CARG4, #8]! - | bne <1 - |3: - | ins_next - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | ldr CARG4, CFUNC:CARG3->f - } else { - | ldr CARG4, [DISPATCH, #DISPATCH_GL(wrapf)] - } - | add CARG2, RA, NARGS8:RC - | ldr CARG1, L->maxstack - | add RC, BASE, NARGS8:RC - | str BASE, L->base - | cmp CARG2, CARG1 - | str RC, L->top - if (op == BC_FUNCCW) { - | ldr CARG2, CFUNC:CARG3->f - } - | mv_vmstate CARG3, C - | mov CARG1, L - | bhi ->vm_growstack_c // Need to grow stack. - | st_vmstate CARG3 - | blx CARG4 // (lua_State *L [, lua_CFunction f]) - | // Returns nresults. - | ldr BASE, L->base - | mv_vmstate CARG3, INTERP - | ldr CRET2, L->top - | str L, [DISPATCH, #DISPATCH_GL(cur_L)] - | lsl RC, CRET1, #3 - | st_vmstate CARG3 - | ldr PC, [BASE, FRAME_PC] - | sub RA, CRET2, RC // RA = L->top - nresults*8 - | b ->vm_returnc - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.long .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.long 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 0xe\n" /* Return address is in lr. */ - "\t.byte 0xc\n\t.uleb128 0xd\n\t.uleb128 0\n" /* def_cfa sp */ - "\t.align 2\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.long .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.long .Lframe0\n" - "\t.long .Lbegin\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */ - fcofs, CFRAME_SIZE); - for (i = 11; i >= (LJ_ARCH_HASFPU ? 5 : 4); i--) /* offset r4-r11 */ - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i)); -#if LJ_ARCH_HASFPU - for (i = 15; i >= 8; i--) /* offset d8-d15 */ - fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 %d, %d\n", - 64+2*i, 10+2*(15-i)); - fprintf(ctx->fp, "\t.byte 0x84\n\t.uleb128 %d\n", 25); /* offset r4 */ -#endif - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.long .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.long .Lframe0\n" - "\t.long lj_vm_ffi_call\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ - "\t.byte 0x8e\n\t.uleb128 1\n" /* offset lr */ - "\t.byte 0x8b\n\t.uleb128 2\n" /* offset r11 */ - "\t.byte 0x85\n\t.uleb128 3\n" /* offset r5 */ - "\t.byte 0x84\n\t.uleb128 4\n" /* offset r4 */ - "\t.byte 0xd\n\t.uleb128 0xb\n" /* def_cfa_register r11 */ - "\t.align 2\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif - break; - default: - break; - } -} - diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc deleted file mode 100644 index bb2496ab18..0000000000 --- a/src/vm_arm64.dasc +++ /dev/null @@ -1,3964 +0,0 @@ -|// Low-level VM code for ARM64 CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -| -|.arch arm64 -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// ARM64 registers and the AAPCS64 ABI 1.0 at a glance: -|// -|// x0-x17 temp, x19-x28 callee-saved, x29 fp, x30 lr -|// x18 is reserved on most platforms. Don't use it, save it or restore it. -|// x31 doesn't exist. Register number 31 either means xzr/wzr (zero) or sp, -|// depending on the instruction. -|// v0-v7 temp, v8-v15 callee-saved (only d8-d15 preserved), v16-v31 temp -|// -|// x0-x7/v0-v7 hold parameters and results. -| -|// Fixed register assignments for the interpreter. -| -|// The following must be C callee-save. -|.define BASE, x19 // Base of current Lua stack frame. -|.define KBASE, x20 // Constants of current Lua function. -|.define PC, x21 // Next PC. -|.define GLREG, x22 // Global state. -|.define LREG, x23 // Register holding lua_State (also in SAVE_L). -|.define TISNUM, x24 // Constant LJ_TISNUM << 47. -|.define TISNUMhi, x25 // Constant LJ_TISNUM << 15. -|.define TISNIL, x26 // Constant -1LL. -|.define fp, x29 // Yes, we have to maintain a frame pointer. -| -|.define ST_INTERP, w26 // Constant -1. -| -|// The following temporaries are not saved across C calls, except for RA/RC. -|.define RA, x27 -|.define RC, x28 -|.define RB, x17 -|.define RAw, w27 -|.define RCw, w28 -|.define RBw, w17 -|.define INS, x16 -|.define INSw, w16 -|.define ITYPE, x15 -|.define TMP0, x8 -|.define TMP1, x9 -|.define TMP2, x10 -|.define TMP3, x11 -|.define TMP0w, w8 -|.define TMP1w, w9 -|.define TMP2w, w10 -|.define TMP3w, w11 -| -|// Calling conventions. Also used as temporaries. -|.define CARG1, x0 -|.define CARG2, x1 -|.define CARG3, x2 -|.define CARG4, x3 -|.define CARG5, x4 -|.define CARG1w, w0 -|.define CARG2w, w1 -|.define CARG3w, w2 -|.define CARG4w, w3 -|.define CARG5w, w4 -| -|.define FARG1, d0 -|.define FARG2, d1 -| -|.define CRET1, x0 -|.define CRET1w, w0 -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -| -|.define CFRAME_SPACE, 208 -|//----- 16 byte aligned, <-- sp entering interpreter -|// Unused [sp, #204] // 32 bit values -|.define SAVE_NRES, [sp, #200] -|.define SAVE_ERRF, [sp, #196] -|.define SAVE_MULTRES, [sp, #192] -|.define TMPD, [sp, #184] // 64 bit values -|.define SAVE_L, [sp, #176] -|.define SAVE_PC, [sp, #168] -|.define SAVE_CFRAME, [sp, #160] -|.define SAVE_FPR_, 96 // 96+8*8: 64 bit FPR saves -|.define SAVE_GPR_, 16 // 16+10*8: 64 bit GPR saves -|.define SAVE_LR, [sp, #8] -|.define SAVE_FP, [sp] -|//----- 16 byte aligned, <-- sp while in interpreter. -| -|.define TMPDofs, #184 -| -|.macro save_, gpr1, gpr2, fpr1, fpr2 -| stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] -| stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] -|.endmacro -|.macro rest_, gpr1, gpr2, fpr1, fpr2 -| ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8] -| ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8] -|.endmacro -| -|.macro saveregs -| stp fp, lr, [sp, #-CFRAME_SPACE]! -| add fp, sp, #0 -| stp x19, x20, [sp, # SAVE_GPR_] -| save_ 21, 22, 8, 9 -| save_ 23, 24, 10, 11 -| save_ 25, 26, 12, 13 -| save_ 27, 28, 14, 15 -|.endmacro -|.macro restoreregs -| ldp x19, x20, [sp, # SAVE_GPR_] -| rest_ 21, 22, 8, 9 -| rest_ 23, 24, 10, 11 -| rest_ 25, 26, 12, 13 -| rest_ 27, 28, 14, 15 -| ldp fp, lr, [sp], # CFRAME_SPACE -|.endmacro -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State, GLREG -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; brk; .endmacro -| -|//----------------------------------------------------------------------- -| -|// Access to frame relative to BASE. -|.define FRAME_FUNC, #-16 -|.define FRAME_PC, #-8 -| -|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro -|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro -|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro -|.macro decode_RD, dst, ins; ubfx dst, ins, #16, #16; .endmacro -|.macro decode_RC8RD, dst, src; ubfiz dst, src, #3, #8; .endmacro -| -|// Instruction decode+dispatch. -|.macro ins_NEXT -| ldr INSw, [PC], #4 -| add TMP1, GL, INS, uxtb #3 -| decode_RA RA, INS -| ldr TMP0, [TMP1, #GG_G2DISP] -| decode_RD RC, INS -| br TMP0 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| ldr PC, LFUNC:CARG3->pc -| ldr INSw, [PC], #4 -| add TMP1, GL, INS, uxtb #3 -| decode_RA RA, INS -| ldr TMP0, [TMP1, #GG_G2DISP] -| add RA, BASE, RA, lsl #3 -| br TMP0 -|.endmacro -| -|.macro ins_call -| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| str PC, [BASE, FRAME_PC] -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Macros to check the TValue type and extract the GCobj. Branch on failure. -|.macro checktp, reg, tp, target -| asr ITYPE, reg, #47 -| cmn ITYPE, #-tp -| and reg, reg, #LJ_GCVMASK -| bne target -|.endmacro -|.macro checktp, dst, reg, tp, target -| asr ITYPE, reg, #47 -| cmn ITYPE, #-tp -| and dst, reg, #LJ_GCVMASK -| bne target -|.endmacro -|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro -|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro -|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro -|.macro checkint, reg, target -| cmp TISNUMhi, reg, lsr #32 -| bne target -|.endmacro -|.macro checknum, reg, target -| cmp TISNUMhi, reg, lsr #32 -| bls target -|.endmacro -|.macro checknumber, reg, target -| cmp TISNUMhi, reg, lsr #32 -| blo target -|.endmacro -| -|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro -|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro -| -#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro hotcheck, delta -| lsr CARG1, PC, #1 -| and CARG1, CARG1, #126 -| add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT -| ldrh CARG2w, [GL, CARG1] -| subs CARG2, CARG2, #delta -| strh CARG2w, [GL, CARG1] -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP -| blo ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL -| blo ->vm_hotcall -|.endmacro -| -|// Set current VM state. -|.macro mv_vmstate, reg, st; movn reg, #LJ_VMST_..st; .endmacro -|.macro st_vmstate, reg; str reg, GL->vmstate; .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp -| ldr tmp, GL->gc.grayagain -| and mark, mark, #~LJ_GC_BLACK // black2gray(tab) -| str tab, GL->gc.grayagain -| strb mark, tab->marked -| str tmp, tab->gclist -|.endmacro -| -|//----------------------------------------------------------------------- - -#if !LJ_DUALNUM -#error "Only dual-number mode supported for ARM64 target" -#endif - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: RB = previous base. - | tbz PC, #2, ->cont_dispatch // (PC & FRAME_P) == 0? - | - | // Return from pcall or xpcall fast func. - | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame. - | mov_true TMP0 - | mov BASE, RB - | // Prepending may overwrite the pcall frame, so do it at the end. - | str TMP0, [RA, #-8]! // Prepend true to results. - | - |->vm_returnc: - | adds RC, RC, #8 // RC = (nresults+1)*8. - | mov CRET1, #LUA_YIELD - | beq ->vm_unwind_c_eh - | str RCw, SAVE_MULTRES - | ands CARG1, PC, #FRAME_TYPE - | beq ->BC_RET_Z // Handle regular return to Lua. - | - |->vm_return: - | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return - | // CARG1 = PC & FRAME_TYPE - | and RB, PC, #~FRAME_TYPEP - | cmp CARG1, #FRAME_C - | sub RB, BASE, RB // RB = previous base. - | bne ->vm_returnp - | - | str RB, L->base - | ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1. - | mv_vmstate TMP0w, C - | sub BASE, BASE, #16 - | subs TMP2, RC, #8 - | st_vmstate TMP0w - | beq >2 - |1: - | subs TMP2, TMP2, #8 - | ldr TMP0, [RA], #8 - | str TMP0, [BASE], #8 - | bne <1 - |2: - | cmp RC, CARG2, lsl #3 // More/less results wanted? - | bne >6 - |3: - | str BASE, L->top // Store new top. - | - |->vm_leave_cp: - | ldr RC, SAVE_CFRAME // Restore previous C frame. - | mov CRET1, #0 // Ok return status for vm_pcall. - | str RC, L->cframe - | - |->vm_leave_unw: - | restoreregs - | ret - | - |6: - | bgt >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - | ldr CARG3, L->maxstack - | cmp BASE, CARG3 - | bhs >8 - | str TISNIL, [BASE], #8 - | add RC, RC, #8 - | b <2 - | - |7: // Less results wanted. - | cbz CARG2, <3 // LUA_MULTRET+1 case? - | sub CARG1, RC, CARG2, lsl #3 - | sub BASE, BASE, CARG1 // Shrink top. - | b <3 - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | str BASE, L->top // Save current top held in BASE (yes). - | mov CARG1, L - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->top // Need the (realloced) L->top in BASE. - | ldrsw CARG2, SAVE_NRES - | b <2 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | mov sp, CARG1 - | mov CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | ldr L, SAVE_L - | mv_vmstate TMP0w, C - | ldr GL, L->glref - | st_vmstate TMP0w - | b ->vm_leave_unw - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - | and sp, CARG1, #CFRAME_RAWMASK - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | ldr L, SAVE_L - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 - | mov RC, #16 // 2 results: false + error message. - | ldr BASE, L->base - | ldr GL, L->glref // Setup pointer to global state. - | mov_false TMP0 - | sub RA, BASE, #8 // Results start at BASE-8. - | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame. - | str TMP0, [BASE, #-8] // Prepend false to error message. - | st_vmstate ST_INTERP - | b ->vm_returnc - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | // CARG1 = L - | mov CARG2, #LUA_MINSTACK - | b >2 - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | add RC, BASE, RC - | sub RA, RA, BASE - | mov CARG1, L - | stp BASE, RC, L->base - | add PC, PC, #4 // Must point after first instruction. - | lsr CARG2, RA, #3 - |2: - | // L->base = new base, L->top = top - | str PC, SAVE_PC - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldp BASE, RC, L->base - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | sub NARGS8:RC, RC, BASE - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | mov L, CARG1 - | ldr GL, L->glref // Setup pointer to global state. - | mov BASE, CARG2 - | str L, SAVE_L - | mov PC, #FRAME_CP - | str wzr, SAVE_NRES - | add TMP0, sp, #CFRAME_RESUME - | ldrb TMP1w, L->status - | str wzr, SAVE_ERRF - | str L, SAVE_PC // Any value outside of bytecode is ok. - | str xzr, SAVE_CFRAME - | str TMP0, L->cframe - | cbz TMP1w, >3 - | - | // Resume after yield (like a return). - | str L, GL->cur_L - | mov RA, BASE - | ldp BASE, CARG1, L->base - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | ldr PC, [BASE, FRAME_PC] - | strb wzr, L->status - | movn TISNIL, #0 - | sub RC, CARG1, BASE - | ands CARG1, PC, #FRAME_TYPE - | add RC, RC, #8 - | st_vmstate ST_INTERP - | str RCw, SAVE_MULTRES - | beq ->BC_RET_Z - | b ->vm_return - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | mov PC, #FRAME_CP - | str CARG4w, SAVE_ERRF - | b >1 - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | mov PC, #FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | ldr RC, L:CARG1->cframe - | str CARG3w, SAVE_NRES - | mov L, CARG1 - | str CARG1, SAVE_L - | ldr GL, L->glref // Setup pointer to global state. - | mov BASE, CARG2 - | str CARG1, SAVE_PC // Any value outside of bytecode is ok. - | str RC, SAVE_CFRAME - | str fp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | str L, GL->cur_L - | ldp RB, CARG1, L->base // RB = old base (for vmeta_call). - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | add PC, PC, BASE - | movn TISNIL, #0 - | sub PC, PC, RB // PC = frame delta + frame type - | sub NARGS8:RC, CARG1, BASE - | st_vmstate ST_INTERP - | - |->vm_call_dispatch: - | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC - | ldr CARG3, [BASE, FRAME_FUNC] - | checkfunc CARG3, ->vmeta_call - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | mov L, CARG1 - | ldr RA, L:CARG1->stack - | str CARG1, SAVE_L - | ldr GL, L->glref // Setup pointer to global state. - | ldr RB, L->top - | str CARG1, SAVE_PC // Any value outside of bytecode is ok. - | ldr RC, L->cframe - | sub RA, RA, RB // Compute -savestack(L, L->top). - | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame. - | str wzr, SAVE_ERRF // No error function. - | str RC, SAVE_CFRAME - | str fp, L->cframe // Add our C frame to cframe chain. - | str L, GL->cur_L - | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud) - | mov BASE, CRET1 - | mov PC, #FRAME_CP - | cbnz BASE, <3 // Else continue with the call. - | b ->vm_leave_cp // No base? Just remove C frame. - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8 - | ldr LFUNC:CARG3, [RB, FRAME_FUNC] - | ldr CARG1, [BASE, #-32] // Get continuation. - | mov CARG4, BASE - | mov BASE, RB // Restore caller BASE. - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - |.if FFI - | cmp CARG1, #1 - |.endif - | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC]. - | ldr CARG3, LFUNC:CARG3->pc - | add TMP0, RA, RC - | str TISNIL, [TMP0, #-8] // Ensure one valid arg. - |.if FFI - | bls >1 - |.endif - | ldr KBASE, [CARG3, #PC2PROTO(k)] - | // BASE = base, RA = resultptr, CARG4 = meta base - | br CARG1 - | - |.if FFI - |1: - | beq ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - | sub CARG4, CARG4, #32 - | sub RC, CARG4, BASE - | b ->vm_call_tail - |.endif - | - |->cont_cat: // RA = resultptr, CARG4 = meta base - | ldr INSw, [PC, #-4] - | sub CARG2, CARG4, #32 - | ldr TMP0, [RA] - | str BASE, L->base - | decode_RB RB, INS - | decode_RA RA, INS - | add TMP1, BASE, RB, lsl #3 - | subs TMP1, CARG2, TMP1 - | beq >1 - | str TMP0, [CARG2] - | lsr CARG3, TMP1, #3 - | b ->BC_CAT_Z - | - |1: - | str TMP0, [BASE, RA, lsl #3] - | b ->cont_nop - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | movn CARG4, #~LJ_TSTR - | add CARG2, BASE, RB, lsl #3 - | add CARG4, STR:RC, CARG4, lsl #47 - | b >2 - | - |->vmeta_tgets: - | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48 - | str CARG2, GL->tmptv - | add CARG2, GL, #offsetof(global_State, tmptv) - |2: - | add CARG3, sp, TMPDofs - | str CARG4, TMPD - | b >1 - | - |->vmeta_tgetb: // RB = table, RC = index - | add RC, RC, TISNUM - | add CARG2, BASE, RB, lsl #3 - | add CARG3, sp, TMPDofs - | str RC, TMPD - | b >1 - | - |->vmeta_tgetv: // RB = table, RC = key - | add CARG2, BASE, RB, lsl #3 - | add CARG3, BASE, RC, lsl #3 - |1: - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | cbz CRET1, >3 - | ldr TMP0, [CRET1] - | str TMP0, [BASE, RA, lsl #3] - | ins_next - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | sub TMP1, BASE, #FRAME_CONT - | ldr BASE, L->top - | mov NARGS8:RC, #16 // 2 args for func(t, k). - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | str PC, [BASE, #-24] // [cont|PC] - | sub PC, BASE, TMP1 - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | b ->vm_call_dispatch_f - | - |->vmeta_tgetr: - | sxtw CARG2, TMP1w - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | mov TMP0, TISNIL - | cbz CRET1, ->BC_TGETR_Z - | ldr TMP0, [CRET1] - | b ->BC_TGETR_Z - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | movn CARG4, #~LJ_TSTR - | add CARG2, BASE, RB, lsl #3 - | add CARG4, STR:RC, CARG4, lsl #47 - | b >2 - | - |->vmeta_tsets: - | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48 - | str CARG2, GL->tmptv - | add CARG2, GL, #offsetof(global_State, tmptv) - |2: - | add CARG3, sp, TMPDofs - | str CARG4, TMPD - | b >1 - | - |->vmeta_tsetb: // RB = table, RC = index - | add RC, RC, TISNUM - | add CARG2, BASE, RB, lsl #3 - | add CARG3, sp, TMPDofs - | str RC, TMPD - | b >1 - | - |->vmeta_tsetv: - | add CARG2, BASE, RB, lsl #3 - | add CARG3, BASE, RC, lsl #3 - |1: - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | ldr TMP0, [BASE, RA, lsl #3] - | cbz CRET1, >3 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | str TMP0, [CRET1] - | ins_next - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | sub TMP1, BASE, #FRAME_CONT - | ldr BASE, L->top - | mov NARGS8:RC, #24 // 3 args for func(t, k, v). - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | str TMP0, [BASE, #16] // Copy value to third argument. - | str PC, [BASE, #-24] // [cont|PC] - | sub PC, BASE, TMP1 - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | b ->vm_call_dispatch_f - | - |->vmeta_tsetr: - | sxtw CARG3, TMP1w - | str BASE, L->base - | str PC, SAVE_PC - | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - | // Returns TValue *. - | b ->BC_TSETR_Z - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | add CARG2, BASE, RA, lsl #3 - | sub PC, PC, #4 - | add CARG3, BASE, RC, lsl #3 - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | uxtb CARG4w, INSw - | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - | // Returns 0/1 or TValue * (metamethod). - |3: - | cmp CRET1, #1 - | bhi ->vmeta_binop - |4: - | ldrh RBw, [PC, #2] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | sub RB, RB, #0x20000 - | csel PC, PC, RB, lo - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | ldr INSw, [PC, #-4] - | ldr TMP0, [RA] - | decode_RA TMP1, INS - | str TMP0, [BASE, TMP1, lsl #3] - | b ->cont_nop - | - |->cont_condt: // RA = resultptr - | ldr TMP0, [RA] - | mov_true TMP1 - | cmp TMP1, TMP0 // Branch if result is true. - | b <4 - | - |->cont_condf: // RA = resultptr - | ldr TMP0, [RA] - | mov_false TMP1 - | cmp TMP0, TMP1 // Branch if result is false. - | b <4 - | - |->vmeta_equal: - | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. - | and TAB:CARG3, CARG3, #LJ_GCVMASK - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - | - |->vmeta_equal_cd: - |.if FFI - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | mov CARG2, INS - | str PC, SAVE_PC - | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |.endif - | - |->vmeta_istype: - | sub PC, PC, #4 - | str BASE, L->base - | mov CARG1, L - | mov CARG2, RA - | mov CARG3, RC - | str PC, SAVE_PC - | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - | b ->cont_nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_arith_vn: - | add CARG3, BASE, RB, lsl #3 - | add CARG4, KBASE, RC, lsl #3 - | b >1 - | - |->vmeta_arith_nv: - | add CARG4, BASE, RB, lsl #3 - | add CARG3, KBASE, RC, lsl #3 - | b >1 - | - |->vmeta_unm: - | add CARG3, BASE, RC, lsl #3 - | mov CARG4, CARG3 - | b >1 - | - |->vmeta_arith_vv: - | add CARG3, BASE, RB, lsl #3 - | add CARG4, BASE, RC, lsl #3 - |1: - | uxtb CARG5w, INSw - | add CARG2, BASE, RA, lsl #3 - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - | // Returns NULL (finished) or TValue * (metamethod). - | cbz CRET1, ->cont_nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | sub TMP1, CRET1, BASE - | str PC, [CRET1, #-24] // [cont|PC] - | add PC, TMP1, #FRAME_CONT - | mov BASE, CRET1 - | mov NARGS8:RC, #16 // 2 args for func(o1, o2). - | b ->vm_call_dispatch - | - |->vmeta_len: - | add CARG2, BASE, RC, lsl #3 -#if LJ_52 - | mov TAB:RC, TAB:CARG1 // Save table (ignored for other types). -#endif - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_len // (lua_State *L, TValue *o) - | // Returns NULL (retry) or TValue * (metamethod base). -#if LJ_52 - | cbnz CRET1, ->vmeta_binop // Binop call for compatibility. - | mov TAB:CARG1, TAB:RC - | b ->BC_LEN_Z -#else - | b ->vmeta_binop // Binop call for compatibility. -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // RB = old base, BASE = new base, RC = nargs*8 - | mov CARG1, L - | str RB, L->base // This is the callers base! - | sub CARG2, BASE, #16 - | str PC, SAVE_PC - | add CARG3, BASE, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. - | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | mov CARG1, L - | str BASE, L->base - | sub CARG2, RA, #16 - | str PC, SAVE_PC - | add CARG3, RA, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | ldr TMP1, [RA, FRAME_FUNC] // Guaranteed to be a function here. - | ldr PC, [BASE, FRAME_PC] - | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. - | and LFUNC:CARG3, TMP1, #LJ_GCVMASK - | b ->BC_CALLT2_Z - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | mov CARG1, L - | str BASE, L->base - | mov CARG2, RA - | str PC, SAVE_PC - | bl extern lj_meta_for // (lua_State *L, TValue *base) - | ldr INSw, [PC, #-4] - |.if JIT - | uxtb TMP0w, INSw - |.endif - | decode_RA RA, INS - | decode_RD RC, INS - |.if JIT - | cmp TMP0, #BC_JFORI - | beq =>BC_JFORI - |.endif - | b =>BC_FORI - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | blo ->fff_fallback - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | ldp CARG1, CARG2, [BASE] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - |.endmacro - | - |.macro .ffunc_n, name - | .ffunc name - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | ldr FARG1, [BASE] - | blo ->fff_fallback - | checknum CARG1, ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name - | .ffunc name - | ldp CARG1, CARG2, [BASE] - | cmp NARGS8:RC, #16 - | ldp FARG1, FARG2, [BASE] - | blo ->fff_fallback - | checknum CARG1, ->fff_fallback - | checknum CARG2, ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. - |.macro ffgccheck - | ldp CARG1, CARG2, GL->gc.total // Assumes threshold follows total. - | cmp CARG1, CARG2 - | blt >1 - | bl ->fff_gcstep - |1: - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | ldr PC, [BASE, FRAME_PC] - | mov_false TMP1 - | cmp CARG1, TMP1 - | bhs ->fff_fallback - | str CARG1, [BASE, #-16] - | sub RB, BASE, #8 - | subs RA, NARGS8:RC, #8 - | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8. - | cbz RA, ->fff_res // Done if exactly 1 argument. - |1: - | ldr CARG1, [RB, #16] - | sub RA, RA, #8 - | str CARG1, [RB], #8 - | cbnz RA, <1 - | b ->fff_res - | - |.ffunc_1 type - | mov TMP0, #~LJ_TISNUM - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #~LJ_TISNUM - | csinv TMP1, TMP0, ITYPE, lo - | add TMP1, TMP1, #offsetof(GCfuncC, upvalue)/8 - | ldr CARG1, [CFUNC:CARG3, TMP1, lsl #3] - | b ->fff_restv - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #-LJ_TTAB - | ccmn ITYPE, #-LJ_TUDATA, #4, ne - | and TAB:CARG1, CARG1, #LJ_GCVMASK - | bne >6 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | ldr TAB:RB, TAB:CARG1->metatable - |2: - | mov CARG1, TISNIL - | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] - | cbz TAB:RB, ->fff_restv - | ldr TMP1w, TAB:RB->hmask - | ldr TMP2w, STR:RC->hash - | ldr NODE:CARG3, TAB:RB->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask - | add TMP1, TMP1, TMP1, lsl #1 - | movn CARG4, #~LJ_TSTR - | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 - | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. - |3: // Rearranged logic, because we expect _not_ to find the key. - | ldp CARG1, TMP0, NODE:CARG3->val - | ldr NODE:CARG3, NODE:CARG3->next - | cmp TMP0, CARG4 - | beq >5 - | cbnz NODE:CARG3, <3 - |4: - | mov CARG1, RB // Use metatable as default result. - | movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48 - | b ->fff_restv - |5: - | cmp TMP0, TISNIL - | bne ->fff_restv - | b <4 - | - |6: - | movn TMP0, #~LJ_TISNUM - | cmp ITYPE, TMP0 - | csel ITYPE, ITYPE, TMP0, hs - | sub TMP1, GL, ITYPE, lsl #3 - | ldr TAB:RB, [TMP1, #offsetof(global_State, gcroot[GCROOT_BASEMT])-8] - | b <2 - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback - | ldr TAB:TMP0, TAB:TMP1->metatable - | asr ITYPE, CARG2, #47 - | ldrb TMP2w, TAB:TMP1->marked - | cmn ITYPE, #-LJ_TTAB - | and TAB:CARG2, CARG2, #LJ_GCVMASK - | ccmp TAB:TMP0, #0, #0, eq - | bne ->fff_fallback - | str TAB:CARG2, TAB:TMP1->metatable - | tbz TMP2w, #2, ->fff_restv // isblack(table) - | barrierback TAB:TMP1, TMP2w, TMP0 - | b ->fff_restv - | - |.ffunc rawget - | ldr CARG2, [BASE] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | checktab CARG2, ->fff_fallback - | mov CARG1, L - | add CARG3, BASE, #8 - | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // Returns cTValue *. - | ldr CARG1, [CRET1] - | b ->fff_restv - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | bne ->fff_fallback - | checknumber CARG1, ->fff_fallback - | b ->fff_restv - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #-LJ_TSTR - | // A __tostring method in the string base metatable is ignored. - | beq ->fff_restv - | // Handle numbers inline, unless a number base metatable is present. - | ldr TMP1, GL->gcroot[GCROOT_BASEMT_NUM] - | str BASE, L->base - | cmn ITYPE, #-LJ_TISNUM - | ccmp TMP1, #0, #0, ls - | str PC, SAVE_PC // Redundant (but a defined value). - | bne ->fff_fallback - | ffgccheck - | mov CARG1, L - | mov CARG2, BASE - | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) - | // Returns GCstr *. - | movn TMP1, #~LJ_TSTR - | ldr BASE, L->base - | add CARG1, CARG1, TMP1, lsl #47 - | b ->fff_restv - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc_1 next - | checktp CARG2, CARG1, LJ_TTAB, ->fff_fallback - | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. - | ldr PC, [BASE, FRAME_PC] - | stp BASE, BASE, L->base // Add frame since C call can throw. - | mov CARG1, L - | add CARG3, BASE, #8 - | str PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. - | str TISNIL, [BASE, #-16] - | cbz CRET1, ->fff_res1 // End of traversal: return nil. - | ldp CARG1, CARG2, [BASE, #8] // Copy key and value to results. - | mov RC, #(2+1)*8 - | stp CARG1, CARG2, [BASE, #-16] - | b ->fff_res - | - |.ffunc_1 pairs - | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback -#if LJ_52 - | ldr TAB:CARG2, TAB:TMP1->metatable -#endif - | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0] - | ldr PC, [BASE, FRAME_PC] -#if LJ_52 - | cbnz TAB:CARG2, ->fff_fallback -#endif - | mov RC, #(3+1)*8 - | stp CARG1, TISNIL, [BASE, #-8] - | str CFUNC:CARG4, [BASE, #-16] - | b ->fff_res - | - |.ffunc_2 ipairs_aux - | checktab CARG1, ->fff_fallback - | checkint CARG2, ->fff_fallback - | ldr TMP1w, TAB:CARG1->asize - | ldr CARG3, TAB:CARG1->array - | ldr TMP0w, TAB:CARG1->hmask - | add CARG2w, CARG2w, #1 - | cmp CARG2w, TMP1w - | ldr PC, [BASE, FRAME_PC] - | add TMP2, CARG2, TISNUM - | mov RC, #(0+1)*8 - | str TMP2, [BASE, #-16] - | bhs >2 // Not in array part? - | ldr TMP0, [CARG3, CARG2, lsl #3] - |1: - | mov TMP1, #(2+1)*8 - | cmp TMP0, TISNIL - | str TMP0, [BASE, #-8] - | csel RC, RC, TMP1, eq - | b ->fff_res - |2: // Check for empty hash part first. Otherwise call C function. - | cbz TMP0w, ->fff_res - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | cbz CRET1, ->fff_res - | ldr TMP0, [CRET1] - | b <1 - | - |.ffunc_1 ipairs - | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback -#if LJ_52 - | ldr TAB:CARG2, TAB:TMP1->metatable -#endif - | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0] - | ldr PC, [BASE, FRAME_PC] -#if LJ_52 - | cbnz TAB:CARG2, ->fff_fallback -#endif - | mov RC, #(3+1)*8 - | stp CARG1, TISNUM, [BASE, #-8] - | str CFUNC:CARG4, [BASE, #-16] - | b ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | ldrb TMP0w, GL->hookmask - | subs NARGS8:RC, NARGS8:RC, #8 - | blo ->fff_fallback - | mov RB, BASE - | add BASE, BASE, #16 - | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 - | add PC, TMP0, #16+FRAME_PCALL - | beq ->vm_call_dispatch - |1: - | add TMP2, BASE, NARGS8:RC - |2: - | ldr TMP0, [TMP2, #-16] - | str TMP0, [TMP2, #-8]! - | cmp TMP2, BASE - | bne <2 - | b ->vm_call_dispatch - | - |.ffunc xpcall - | ldp CARG1, CARG2, [BASE] - | ldrb TMP0w, GL->hookmask - | subs NARGS8:RC, NARGS8:RC, #16 - | blo ->fff_fallback - | mov RB, BASE - | add BASE, BASE, #24 - | asr ITYPE, CARG2, #47 - | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 - | cmn ITYPE, #-LJ_TFUNC - | add PC, TMP0, #24+FRAME_PCALL - | bne ->fff_fallback // Traceback must be a function. - | stp CARG2, CARG1, [RB] // Swap function and traceback. - | cbz NARGS8:RC, ->vm_call_dispatch - | b <1 - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | checktp CARG1, LJ_TTHREAD, ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr - | and L:CARG1, CARG1, #LJ_GCVMASK - |.endif - | ldr PC, [BASE, FRAME_PC] - | str BASE, L->base - | ldp RB, CARG2, L:CARG1->base - | ldrb TMP1w, L:CARG1->status - | add TMP0, CARG2, TMP1 - | str PC, SAVE_PC - | cmp TMP0, RB - | beq ->fff_fallback - | cmp TMP1, #LUA_YIELD - | add TMP0, CARG2, #8 - | csel CARG2, CARG2, TMP0, hs - | ldr CARG4, L:CARG1->maxstack - | add CARG3, CARG2, NARGS8:RC - | ldr RB, L:CARG1->cframe - | ccmp CARG3, CARG4, #2, ls - | ccmp RB, #0, #2, ls - | bhi ->fff_fallback - |.if resume - | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC. - | add BASE, BASE, #8 - | sub NARGS8:RC, NARGS8:RC, #8 - |.endif - | str CARG3, L:CARG1->top - | str BASE, L->top - | cbz NARGS8:RC, >3 - |2: // Move args to coroutine. - | ldr TMP0, [BASE, RB] - | cmp RB, NARGS8:RC - | str TMP0, [CARG2, RB] - | add RB, RB, #8 - | bne <2 - |3: - | mov CARG3, #0 - | mov L:RA, L:CARG1 - | mov CARG4, #0 - | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) - | // Returns thread status. - |4: - | ldp CARG3, CARG4, L:RA->base - | cmp CRET1, #LUA_YIELD - | ldr BASE, L->base - | str L, GL->cur_L - | st_vmstate ST_INTERP - | bhi >8 - | sub RC, CARG4, CARG3 - | ldr CARG1, L->maxstack - | add CARG2, BASE, RC - | cbz RC, >6 // No results? - | cmp CARG2, CARG1 - | mov RB, #0 - | bhi >9 // Need to grow stack? - | - | sub CARG4, RC, #8 - | str CARG3, L:RA->top // Clear coroutine stack. - |5: // Move results from coroutine. - | ldr TMP0, [CARG3, RB] - | cmp RB, CARG4 - | str TMP0, [BASE, RB] - | add RB, RB, #8 - | bne <5 - |6: - |.if resume - | mov_true TMP1 - | add RC, RC, #16 - |7: - | str TMP1, [BASE, #-8] // Prepend true/false to results. - | sub RA, BASE, #8 - |.else - | mov RA, BASE - | add RC, RC, #8 - |.endif - | ands CARG1, PC, #FRAME_TYPE - | str PC, SAVE_PC - | str RCw, SAVE_MULTRES - | beq ->BC_RET_Z - | b ->vm_return - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | ldr TMP0, [CARG4, #-8]! - | mov_false TMP1 - | mov RC, #(2+1)*8 - | str CARG4, L:RA->top // Remove error from coroutine stack. - | str TMP0, [BASE] // Copy error message. - | b <7 - |.else - | mov CARG1, L - | mov CARG2, L:RA - | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - | // Never returns. - |.endif - | - |9: // Handle stack expansion on return from yield. - | mov CARG1, L - | lsr CARG2, RC, #3 - | bl extern lj_state_growstack // (lua_State *L, int n) - | mov CRET1, #0 - | b <4 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | ldr TMP0, L->cframe - | add TMP1, BASE, NARGS8:RC - | mov CRET1, #LUA_YIELD - | stp BASE, TMP1, L->base - | tbz TMP0, #0, ->fff_fallback - | str xzr, L->cframe - | strb CRET1w, L->status - | b ->vm_leave_unw - | - |//-- Math library ------------------------------------------------------- - | - |.macro math_round, func, round - | .ffunc math_ .. func - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | ldr d0, [BASE] - | blo ->fff_fallback - | cmp TISNUMhi, CARG1, lsr #32 - | beq ->fff_restv - | blo ->fff_fallback - | round d0, d0 - | b ->fff_resn - |.endmacro - | - | math_round floor, frintm - | math_round ceil, frintp - | - |.ffunc_1 math_abs - | checknumber CARG1, ->fff_fallback - | and CARG1, CARG1, #U64x(7fffffff,ffffffff) - | bne ->fff_restv - | eor CARG2w, CARG1w, CARG1w, asr #31 - | movz CARG3, #0x41e0, lsl #48 // 2^31. - | subs CARG1w, CARG2w, CARG1w, asr #31 - | add CARG1, CARG1, TISNUM - | csel CARG1, CARG1, CARG3, pl - | // Fallthrough. - | - |->fff_restv: - | // CARG1 = TValue result. - | ldr PC, [BASE, FRAME_PC] - | str CARG1, [BASE, #-16] - |->fff_res1: - | // PC = return. - | mov RC, #(1+1)*8 - |->fff_res: - | // RC = (nresults+1)*8, PC = return. - | ands CARG1, PC, #FRAME_TYPE - | str RCw, SAVE_MULTRES - | sub RA, BASE, #16 - | bne ->vm_return - | ldr INSw, [PC, #-4] - | decode_RB RB, INS - |5: - | cmp RC, RB, lsl #3 // More results expected? - | blo >6 - | decode_RA TMP1, INS - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | sub BASE, RA, TMP1, lsl #3 - | ins_next - | - |6: // Fill up results with nil. - | add TMP1, RA, RC - | add RC, RC, #8 - | str TISNIL, [TMP1, #-8] - | b <5 - | - |.macro math_extern, func - | .ffunc_n math_ .. func - | bl extern func - | b ->fff_resn - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nn math_ .. func - | bl extern func - | b ->fff_resn - |.endmacro - | - |.ffunc_n math_sqrt - | fsqrt d0, d0 - |->fff_resn: - | ldr PC, [BASE, FRAME_PC] - | str d0, [BASE, #-16] - | b ->fff_res1 - | - |.ffunc math_log - | ldr CARG1, [BASE] - | cmp NARGS8:RC, #8 - | ldr FARG1, [BASE] - | bne ->fff_fallback // Need exactly 1 argument. - | checknum CARG1, ->fff_fallback - | bl extern log - | b ->fff_resn - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.ffunc_2 math_ldexp - | ldr FARG1, [BASE] - | checknum CARG1, ->fff_fallback - | checkint CARG2, ->fff_fallback - | sxtw CARG1, CARG2w - | bl extern ldexp // (double x, int exp) - | b ->fff_resn - | - |.ffunc_n math_frexp - | add CARG1, sp, TMPDofs - | bl extern frexp - | ldr CARG2w, TMPD - | ldr PC, [BASE, FRAME_PC] - | str d0, [BASE, #-16] - | mov RC, #(2+1)*8 - | add CARG2, CARG2, TISNUM - | str CARG2, [BASE, #-8] - | b ->fff_res - | - |.ffunc_n math_modf - | sub CARG1, BASE, #16 - | ldr PC, [BASE, FRAME_PC] - | bl extern modf - | mov RC, #(2+1)*8 - | str d0, [BASE, #-8] - | b ->fff_res - | - |.macro math_minmax, name, cond, fcond - | .ffunc_1 name - | add RB, BASE, RC - | add RA, BASE, #8 - | checkint CARG1, >4 - |1: // Handle integers. - | ldr CARG2, [RA] - | cmp RA, RB - | bhs ->fff_restv - | checkint CARG2, >3 - | cmp CARG1w, CARG2w - | add RA, RA, #8 - | csel CARG1, CARG2, CARG1, cond - | b <1 - |3: // Convert intermediate result to number and continue below. - | scvtf d0, CARG1w - | blo ->fff_fallback - | ldr d1, [RA] - | b >6 - | - |4: - | ldr d0, [BASE] - | blo ->fff_fallback - |5: // Handle numbers. - | ldr CARG2, [RA] - | ldr d1, [RA] - | cmp RA, RB - | bhs ->fff_resn - | checknum CARG2, >7 - |6: - | fcmp d0, d1 - | add RA, RA, #8 - | fcsel d0, d1, d0, fcond - | b <5 - |7: // Convert integer to number and continue above. - | scvtf d1, CARG2w - | blo ->fff_fallback - | b <6 - |.endmacro - | - | math_minmax math_min, gt, hi - | math_minmax math_max, lt, lo - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | ldp PC, CARG1, [BASE, FRAME_PC] - | cmp NARGS8:RC, #8 - | asr ITYPE, CARG1, #47 - | ccmn ITYPE, #-LJ_TSTR, #0, eq - | and STR:CARG1, CARG1, #LJ_GCVMASK - | bne ->fff_fallback - | ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end). - | ldr CARG3w, STR:CARG1->len - | add TMP0, TMP0, TISNUM - | str TMP0, [BASE, #-16] - | mov RC, #(0+1)*8 - | cbz CARG3, ->fff_res - | b ->fff_res1 - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - | ldp PC, CARG1, [BASE, FRAME_PC] - | cmp CARG1w, #255 - | ccmp NARGS8:RC, #8, #0, ls // Need exactly 1 argument. - | bne ->fff_fallback - | checkint CARG1, ->fff_fallback - | mov CARG3, #1 - | mov CARG2, BASE // Points to stack. Little-endian. - |->fff_newstr: - | // CARG2 = str, CARG3 = len. - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_str_new // (lua_State *L, char *str, size_t l) - |->fff_resstr: - | // Returns GCstr *. - | ldr BASE, L->base - | movn TMP1, #~LJ_TSTR - | add CARG1, CARG1, TMP1, lsl #47 - | b ->fff_restv - | - |.ffunc string_sub - | ffgccheck - | ldr CARG1, [BASE] - | ldr CARG3, [BASE, #16] - | cmp NARGS8:RC, #16 - | movn RB, #0 - | beq >1 - | blo ->fff_fallback - | checkint CARG3, ->fff_fallback - | sxtw RB, CARG3w - |1: - | ldr CARG2, [BASE, #8] - | checkstr CARG1, ->fff_fallback - | ldr TMP1w, STR:CARG1->len - | checkint CARG2, ->fff_fallback - | sxtw CARG2, CARG2w - | // CARG1 = str, TMP1 = str->len, CARG2 = start, RB = end - | add TMP2, RB, TMP1 - | cmp RB, #0 - | add TMP0, CARG2, TMP1 - | csinc RB, RB, TMP2, ge // if (end < 0) end += len+1 - | cmp CARG2, #0 - | csinc CARG2, CARG2, TMP0, ge // if (start < 0) start += len+1 - | cmp RB, #0 - | csel RB, RB, xzr, ge // if (end < 0) end = 0 - | cmp CARG2, #1 - | csinc CARG2, CARG2, xzr, ge // if (start < 1) start = 1 - | cmp RB, TMP1 - | csel RB, RB, TMP1, le // if (end > len) end = len - | add CARG1, STR:CARG1, #sizeof(GCstr)-1 - | subs CARG3, RB, CARG2 // len = end - start - | add CARG2, CARG1, CARG2 - | add CARG3, CARG3, #1 // len += 1 - | bge ->fff_newstr - | add STR:CARG1, GL, #offsetof(global_State, strempty) - | movn TMP1, #~LJ_TSTR - | add CARG1, CARG1, TMP1, lsl #47 - | b ->fff_restv - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - | ldr CARG2, [BASE] - | cmp NARGS8:RC, #8 - | asr ITYPE, CARG2, #47 - | ccmn ITYPE, #-LJ_TSTR, #0, hs - | and STR:CARG2, CARG2, #LJ_GCVMASK - | bne ->fff_fallback - | ldr TMP0, GL->tmpbuf.b - | add SBUF:CARG1, GL, #offsetof(global_State, tmpbuf) - | str BASE, L->base - | str PC, SAVE_PC - | str L, GL->tmpbuf.L - | str TMP0, GL->tmpbuf.p - | bl extern lj_buf_putstr_ .. name - | bl extern lj_buf_tostr - | b ->fff_resstr - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |// FP number to bit conversion for soft-float. Clobbers CARG1-CARG3 - |->vm_tobit_fb: - | bls ->fff_fallback - | add CARG2, CARG1, CARG1 - | mov CARG3, #1076 - | sub CARG3, CARG3, CARG2, lsr #53 - | cmp CARG3, #53 - | bhi >1 - | and CARG2, CARG2, #U64x(001fffff,ffffffff) - | orr CARG2, CARG2, #U64x(00200000,00000000) - | cmp CARG1, #0 - | lsr CARG2, CARG2, CARG3 - | cneg CARG1w, CARG2w, mi - | br lr - |1: - | mov CARG1w, #0 - | br lr - | - |.macro .ffunc_bit, name - | .ffunc_1 bit_..name - | adr lr, >1 - | checkint CARG1, ->vm_tobit_fb - |1: - |.endmacro - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name - | mov RA, #8 - | mov TMP0w, CARG1w - | adr lr, >2 - |1: - | ldr CARG1, [BASE, RA] - | cmp RA, NARGS8:RC - | add RA, RA, #8 - | bge >9 - | checkint CARG1, ->vm_tobit_fb - |2: - | ins TMP0w, TMP0w, CARG1w - | b <1 - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, orr - |.ffunc_bit_op bxor, eor - | - |.ffunc_bit tobit - | mov TMP0w, CARG1w - |9: // Label reused by .ffunc_bit_op users. - | add CARG1, TMP0, TISNUM - | b ->fff_restv - | - |.ffunc_bit bswap - | rev TMP0w, CARG1w - | add CARG1, TMP0, TISNUM - | b ->fff_restv - | - |.ffunc_bit bnot - | mvn TMP0w, CARG1w - | add CARG1, TMP0, TISNUM - | b ->fff_restv - | - |.macro .ffunc_bit_sh, name, ins, shmod - | .ffunc bit_..name - | ldp TMP0, CARG1, [BASE] - | cmp NARGS8:RC, #16 - | blo ->fff_fallback - | adr lr, >1 - | checkint CARG1, ->vm_tobit_fb - |1: - |.if shmod == 0 - | mov TMP1, CARG1 - |.else - | neg TMP1, CARG1 - |.endif - | mov CARG1, TMP0 - | adr lr, >2 - | checkint CARG1, ->vm_tobit_fb - |2: - | ins TMP0w, CARG1w, TMP1w - | add CARG1, TMP0, TISNUM - | b ->fff_restv - |.endmacro - | - |.ffunc_bit_sh lshift, lsl, 0 - |.ffunc_bit_sh rshift, lsr, 0 - |.ffunc_bit_sh arshift, asr, 0 - |.ffunc_bit_sh rol, ror, 1 - |.ffunc_bit_sh ror, ror, 0 - | - |//----------------------------------------------------------------------- - | - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RC = nargs*8 - | ldp CFUNC:CARG3, PC, [BASE, FRAME_FUNC] // Fallback may overwrite PC. - | ldr TMP2, L->maxstack - | add TMP1, BASE, NARGS8:RC - | stp BASE, TMP1, L->base - | and CFUNC:CARG3, CARG3, #LJ_GCVMASK - | add TMP1, TMP1, #8*LUA_MINSTACK - | ldr CARG3, CFUNC:CARG3->f - | str PC, SAVE_PC // Redundant (but a defined value). - | cmp TMP1, TMP2 - | mov CARG1, L - | bhi >5 // Need to grow stack. - | blr CARG3 // (lua_State *L) - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | ldr BASE, L->base - | cmp CRET1w, #0 - | lsl RC, CRET1, #3 - | sub RA, BASE, #16 - | bgt ->fff_res // Returned nresults+1? - |1: // Returned 0 or -1: retry fast path. - | ldr CARG1, L->top - | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] - | sub NARGS8:RC, CARG1, BASE - | bne ->vm_call_tail // Returned -1? - | and CFUNC:CARG3, CARG3, #LJ_GCVMASK - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | ands TMP0, PC, #FRAME_TYPE - | and TMP1, PC, #~FRAME_TYPEP - | bne >3 - | ldrb RAw, [PC, #-3] - | lsl RA, RA, #3 - | add TMP1, RA, #16 - |3: - | sub RB, BASE, TMP1 - | b ->vm_call_dispatch // Resolve again for tailcall. - | - |5: // Grow stack for fallback handler. - | mov CARG2, #LUA_MINSTACK - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldr BASE, L->base - | cmp CARG1, CARG1 // Set zero-flag to force retry. - | b <1 - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | add CARG2, BASE, NARGS8:RC // Calculate L->top. - | mov RA, lr - | stp BASE, CARG2, L->base - | str PC, SAVE_PC // Redundant (but a defined value). - | mov CARG1, L - | bl extern lj_gc_step // (lua_State *L) - | ldp BASE, CARG2, L->base - | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] - | mov lr, RA // Help return address predictor. - | sub NARGS8:RC, CARG2, BASE // Calculate nargs*8. - | and CFUNC:CARG3, CARG3, #LJ_GCVMASK - | ret - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | ldrb CARG1w, GL->hookmask - | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. - | bne >5 - | // Decrement the hookcount for consistency, but always do the call. - | ldr CARG2w, GL->hookcount - | tst CARG1, #HOOK_ACTIVE - | bne >1 - | sub CARG2w, CARG2w, #1 - | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT - | beq >1 - | str CARG2w, GL->hookcount - | b >1 - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | ldrb TMP2w, GL->hookmask - | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active? - |5: // Re-dispatch to static ins. - | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] - | br TMP0 - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | ldrb TMP2w, GL->hookmask - | ldr TMP3w, GL->hookcount - | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active? - | tst TMP2w, #LUA_MASKLINE|LUA_MASKCOUNT - | beq <5 - | sub TMP3w, TMP3w, #1 - | str TMP3w, GL->hookcount - | cbz TMP3w, >1 - | tbz TMP2w, #LUA_HOOKLINE, <5 - |1: - | mov CARG1, L - | str BASE, L->base - | mov CARG2, PC - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |3: - | ldr BASE, L->base - |4: // Re-dispatch to static ins. - | ldr INSw, [PC, #-4] - | add TMP1, GL, INS, uxtb #3 - | decode_RA RA, INS - | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] - | decode_RD RC, INS - | br TMP0 - | - |->cont_hook: // Continue from hook yield. - | ldr CARG1, [CARG4, #-40] - | add PC, PC, #4 - | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins. - | b <4 - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). - | add CARG1, GL, #GG_G2DISP+GG_DISP2J - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | str PC, SAVE_PC - | ldr CARG3, LFUNC:CARG3->pc - | mov CARG2, PC - | str L, [GL, #GL_J(L)] - | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)] - | str BASE, L->base - | add CARG3, BASE, CARG3, lsl #3 - | str CARG3, L->top - | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) - | b <3 - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - | mov CARG2, PC - |.if JIT - | b >1 - |.endif - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | orr CARG2, PC, #1 - |1: - |.endif - | add TMP1, BASE, NARGS8:RC - | str PC, SAVE_PC - | mov CARG1, L - | sub RA, RA, BASE - | stp BASE, TMP1, L->base - | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) - | // Returns ASMFunction. - | ldp BASE, TMP1, L->base - | str xzr, SAVE_PC // Invalidate for subsequent line hook. - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | add RA, BASE, RA - | sub NARGS8:RC, TMP1, BASE - | ldr INSw, [PC, #-4] - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | br CRET1 - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, CARG4 = meta base - | ldr RB, SAVE_MULTRES - | ldr INSw, [PC, #-4] - | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. - | subs RB, RB, #8 - | decode_RA RC, INS // Call base. - | and CARG3, CARG3, #LJ_GCVMASK - | beq >2 - |1: // Move results down. - | ldr CARG1, [RA] - | add RA, RA, #8 - | subs RB, RB, #8 - | str CARG1, [BASE, RC, lsl #3] - | add RC, RC, #1 - | bne <1 - |2: - | decode_RA RA, INS - | decode_RB RB, INS - | add RA, RA, RB - |3: - | cmp RA, RC - | bhi >9 // More results wanted? - | - | ldrh RAw, TRACE:CARG3->traceno - | ldrh RCw, TRACE:CARG3->link - | cmp RCw, RAw - | beq ->cont_nop // Blacklisted. - | cmp RCw, #0 - | bne =>BC_JLOOP // Jump to stitched trace. - | - | // Stitch a new trace to the previous trace. - | mov CARG1, #GL_J(exitno) - | str RA, [GL, CARG1] - | mov CARG1, #GL_J(L) - | str L, [GL, CARG1] - | str BASE, L->base - | add CARG1, GL, #GG_G2J - | mov CARG2, PC - | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - | ldr BASE, L->base - | b ->cont_nop - | - |9: // Fill up results with nil. - | str TISNIL, [BASE, RC, lsl #3] - | add RC, RC, #1 - | b <3 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | mov CARG1, L - | str BASE, L->base - | mov CARG2, PC - | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | ldr BASE, L->base - | sub PC, PC, #4 - | b ->cont_nop -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro savex_, a, b - | stp d..a, d..b, [sp, #a*8] - | stp x..a, x..b, [sp, #32*8+a*8] - |.endmacro - | - |->vm_exit_handler: - |.if JIT - | sub sp, sp, #(64*8) - | savex_, 0, 1 - | savex_, 2, 3 - | savex_, 4, 5 - | savex_, 6, 7 - | savex_, 8, 9 - | savex_, 10, 11 - | savex_, 12, 13 - | savex_, 14, 15 - | savex_, 16, 17 - | savex_, 18, 19 - | savex_, 20, 21 - | savex_, 22, 23 - | savex_, 24, 25 - | savex_, 26, 27 - | savex_, 28, 29 - | stp d30, d31, [sp, #30*8] - | ldr CARG1, [sp, #64*8] // Load original value of lr. - | add CARG3, sp, #64*8 // Recompute original value of sp. - | mv_vmstate CARG4, EXIT - | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP. - | sub CARG1, CARG1, lr - | ldr L, GL->cur_L - | lsr CARG1, CARG1, #2 - | ldr BASE, GL->jit_base - | sub CARG1, CARG1, #2 - | ldr CARG2w, [lr] // Load trace number. - | st_vmstate CARG4 - | str BASE, L->base - | ubfx CARG2w, CARG2w, #5, #16 - | str CARG1w, [GL, #GL_J(exitno)] - | str CARG2w, [GL, #GL_J(parent)] - | str L, [GL, #GL_J(L)] - | str xzr, GL->jit_base - | add CARG1, GL, #GG_G2J - | mov CARG2, sp - | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) - | // Returns MULTRES (unscaled) or negated error code. - | ldr CARG2, L->cframe - | ldr BASE, L->base - | and sp, CARG2, #CFRAME_RAWMASK - | ldr PC, SAVE_PC // Get SAVE_PC. - | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). - | b >1 - |.endif - | - |->vm_exit_interp: - | // CARG1 = MULTRES or negated error code, BASE, PC and GL set. - |.if JIT - | ldr L, SAVE_L - |1: - | cmp CARG1w, #0 - | blt >9 // Check for error from exit. - | lsl RC, CARG1, #3 - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | str RC, SAVE_MULTRES - | str BASE, L->base - | ldr CARG2, LFUNC:CARG2->pc - | str xzr, GL->jit_base - | mv_vmstate CARG4, INTERP - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | // Modified copy of ins_next which handles function header dispatch, too. - | ldrb RBw, [PC] - | ldr INSw, [PC], #4 - | st_vmstate CARG4 - | cmp RBw, #BC_FUNCC+2 // Fast function? - | add TMP1, GL, INS, uxtb #3 - | bhs >4 - |2: - | cmp RBw, #BC_FUNCF // Function header? - | add TMP0, GL, RB, uxtb #3 - | ldr RB, [TMP0, #GG_G2DISP] - | decode_RA RA, INS - | lsr TMP0, INS, #16 - | csel RC, TMP0, RC, lo - | blo >5 - | ldr CARG3, [BASE, FRAME_FUNC] - | sub RC, RC, #8 - | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8 - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - |5: - | br RB - | - |4: // Check frame below fast function. - | ldr CARG1, [BASE, FRAME_PC] - | ands CARG2, CARG1, #FRAME_TYPE - | bne <2 // Trace stitching continuation? - | // Otherwise set KBASE for Lua function below fast function. - | ldr CARG3, [CARG1, #-4] - | decode_RA CARG1, CARG3 - | sub CARG2, BASE, CARG1, lsl #3 - | ldr LFUNC:CARG3, [CARG2, #-32] - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | ldr CARG3, LFUNC:CARG3->pc - | ldr KBASE, [CARG3, #PC2PROTO(k)] - | b <2 - | - |9: // Rethrow error from the right C frame. - | neg CARG2, CARG1 - | mov CARG1, L - | bl extern lj_err_throw // (lua_State *L, int errcode) - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - | // int lj_vm_modi(int dividend, int divisor); - |->vm_modi: - | eor CARG4w, CARG1w, CARG2w - | cmp CARG4w, #0 - | eor CARG3w, CARG1w, CARG1w, asr #31 - | eor CARG4w, CARG2w, CARG2w, asr #31 - | sub CARG3w, CARG3w, CARG1w, asr #31 - | sub CARG4w, CARG4w, CARG2w, asr #31 - | udiv CARG1w, CARG3w, CARG4w - | msub CARG1w, CARG1w, CARG4w, CARG3w - | ccmp CARG1w, #0, #4, mi - | sub CARG3w, CARG1w, CARG4w - | csel CARG1w, CARG1w, CARG3w, eq - | eor CARG3w, CARG1w, CARG2w - | cmp CARG3w, #0 - | cneg CARG1w, CARG1w, mi - | ret - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. - |// Saveregs already performed. Callback slot number in [sp], g in r12. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | saveregs - | ldr CTSTATE, GL:x10->ctype_state - | mov GL, x10 - | add x10, sp, # CFRAME_SPACE - | str w9, CTSTATE->cb.slot - | stp x0, x1, CTSTATE->cb.gpr[0] - | stp d0, d1, CTSTATE->cb.fpr[0] - | stp x2, x3, CTSTATE->cb.gpr[2] - | stp d2, d3, CTSTATE->cb.fpr[2] - | stp x4, x5, CTSTATE->cb.gpr[4] - | stp d4, d5, CTSTATE->cb.fpr[4] - | stp x6, x7, CTSTATE->cb.gpr[6] - | stp d6, d7, CTSTATE->cb.fpr[6] - | str x10, CTSTATE->cb.stack - | mov CARG1, CTSTATE - | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok. - | mov CARG2, sp - | bl extern lj_ccallback_enter // (CTState *cts, void *cf) - | // Returns lua_State *. - | ldp BASE, RC, L:CRET1->base - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 - | mov L, CRET1 - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | sub RC, RC, BASE - | st_vmstate ST_INTERP - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | ldr CTSTATE, GL->ctype_state - | stp BASE, CARG4, L->base - | str L, CTSTATE->L - | mov CARG1, CTSTATE - | mov CARG2, RA - | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) - | ldp x0, x1, CTSTATE->cb.gpr[0] - | ldp d0, d1, CTSTATE->cb.fpr[0] - | b ->vm_leave_unw - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, x19 - | stp fp, lr, [sp, #-32]! - | add fp, sp, #0 - | str CCSTATE, [sp, #16] - | mov CCSTATE, x0 - | ldr TMP0w, CCSTATE:x0->spadj - | ldrb TMP1w, CCSTATE->nsp - | add TMP2, CCSTATE, #offsetof(CCallState, stack) - | subs TMP1, TMP1, #1 - | ldr TMP3, CCSTATE->func - | sub sp, fp, TMP0 - | bmi >2 - |1: // Copy stack slots - | ldr TMP0, [TMP2, TMP1, lsl #3] - | str TMP0, [sp, TMP1, lsl #3] - | subs TMP1, TMP1, #1 - | bpl <1 - |2: - | ldp x0, x1, CCSTATE->gpr[0] - | ldp d0, d1, CCSTATE->fpr[0] - | ldp x2, x3, CCSTATE->gpr[2] - | ldp d2, d3, CCSTATE->fpr[2] - | ldp x4, x5, CCSTATE->gpr[4] - | ldp d4, d5, CCSTATE->fpr[4] - | ldp x6, x7, CCSTATE->gpr[6] - | ldp d6, d7, CCSTATE->fpr[6] - | ldr x8, CCSTATE->retp - | blr TMP3 - | mov sp, fp - | stp x0, x1, CCSTATE->gpr[0] - | stp d0, d1, CCSTATE->fpr[0] - | stp d2, d3, CCSTATE->fpr[2] - | ldr CCSTATE, [sp, #16] - | ldp fp, lr, [sp], #32 - | ret - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1, RC = src2, JMP with RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | ldrh RBw, [PC, #2] - | ldr CARG2, [BASE, RC, lsl #3] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | sub RB, RB, #0x20000 - | checkint CARG1, >3 - | checkint CARG2, >4 - | cmp CARG1w, CARG2w - if (op == BC_ISLT) { - | csel PC, RB, PC, lt - } else if (op == BC_ISGE) { - | csel PC, RB, PC, ge - } else if (op == BC_ISLE) { - | csel PC, RB, PC, le - } else { - | csel PC, RB, PC, gt - } - |1: - | ins_next - | - |3: // RA not int. - | ldr FARG1, [BASE, RA, lsl #3] - | blo ->vmeta_comp - | ldr FARG2, [BASE, RC, lsl #3] - | cmp TISNUMhi, CARG2, lsr #32 - | bhi >5 - | bne ->vmeta_comp - | // RA number, RC int. - | scvtf FARG2, CARG2w - | b >5 - | - |4: // RA int, RC not int - | ldr FARG2, [BASE, RC, lsl #3] - | blo ->vmeta_comp - | // RA int, RC number. - | scvtf FARG1, CARG1w - | - |5: // RA number, RC number - | fcmp FARG1, FARG2 - | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. - if (op == BC_ISLT) { - | csel PC, RB, PC, lo - } else if (op == BC_ISGE) { - | csel PC, RB, PC, hs - } else if (op == BC_ISLE) { - | csel PC, RB, PC, ls - } else { - | csel PC, RB, PC, hi - } - | b <1 - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1, RC = src2, JMP with RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | add RC, BASE, RC, lsl #3 - | ldrh RBw, [PC, #2] - | ldr CARG3, [RC] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | sub RB, RB, #0x20000 - | asr ITYPE, CARG3, #47 - | cmn ITYPE, #-LJ_TISNUM - if (vk) { - | bls ->BC_ISEQN_Z - } else { - | bls ->BC_ISNEN_Z - } - | // RC is not a number. - | asr TMP0, CARG1, #47 - |.if FFI - | // Check if RC or RA is a cdata. - | cmn ITYPE, #-LJ_TCDATA - | ccmn TMP0, #-LJ_TCDATA, #4, ne - | beq ->vmeta_equal_cd - |.endif - | cmp CARG1, CARG3 - | bne >2 - | // Tag and value are equal. - if (vk) { - |->BC_ISEQV_Z: - | mov PC, RB // Perform branch. - } - |1: - | ins_next - | - |2: // Check if the tags are the same and it's a table or userdata. - | cmp ITYPE, TMP0 - | ccmn ITYPE, #-LJ_TISTABUD, #2, eq - if (vk) { - | bhi <1 - } else { - | bhi ->BC_ISEQV_Z // Reuse code from opposite instruction. - } - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | and TAB:CARG2, CARG1, #LJ_GCVMASK - | ldr TAB:TMP2, TAB:CARG2->metatable - if (vk) { - | cbz TAB:TMP2, <1 // No metatable? - | ldrb TMP1w, TAB:TMP2->nomm - | mov CARG4, #0 // ne = 0 - | tbnz TMP1w, #MM_eq, <1 // 'no __eq' flag set: done. - } else { - | cbz TAB:TMP2, ->BC_ISEQV_Z // No metatable? - | ldrb TMP1w, TAB:TMP2->nomm - | mov CARG4, #1 // ne = 1. - | tbnz TMP1w, #MM_eq, ->BC_ISEQV_Z // 'no __eq' flag set: done. - } - | b ->vmeta_equal - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src, RC = str_const (~), JMP with RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | mvn RC, RC - | ldrh RBw, [PC, #2] - | ldr CARG2, [KBASE, RC, lsl #3] - | add PC, PC, #4 - | movn TMP0, #~LJ_TSTR - |.if FFI - | asr ITYPE, CARG1, #47 - |.endif - | add RB, PC, RB, lsl #2 - | add CARG2, CARG2, TMP0, lsl #47 - | sub RB, RB, #0x20000 - |.if FFI - | cmn ITYPE, #-LJ_TCDATA - | beq ->vmeta_equal_cd - |.endif - | cmp CARG1, CARG2 - if (vk) { - | csel PC, RB, PC, eq - } else { - | csel PC, RB, PC, ne - } - | ins_next - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src, RC = num_const (~), JMP with RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | add RC, KBASE, RC, lsl #3 - | ldrh RBw, [PC, #2] - | ldr CARG3, [RC] - | add PC, PC, #4 - | add RB, PC, RB, lsl #2 - | sub RB, RB, #0x20000 - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | checkint CARG1, >4 - | checkint CARG3, >6 - | cmp CARG1w, CARG3w - |1: - if (vk) { - | csel PC, RB, PC, eq - |2: - } else { - |2: - | csel PC, RB, PC, ne - } - |3: - | ins_next - | - |4: // RA not int. - |.if FFI - | blo >7 - |.else - | blo <2 - |.endif - | ldr FARG1, [BASE, RA, lsl #3] - | ldr FARG2, [RC] - | cmp TISNUMhi, CARG3, lsr #32 - | bne >5 - | // RA number, RC int. - | scvtf FARG2, CARG3w - |5: - | // RA number, RC number. - | fcmp FARG1, FARG2 - | b <1 - | - |6: // RA int, RC number - | ldr FARG2, [RC] - | scvtf FARG1, CARG1w - | fcmp FARG1, FARG2 - | b <1 - | - |.if FFI - |7: - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #-LJ_TCDATA - | bne <2 - | b ->vmeta_equal_cd - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src, RC = primitive_type (~), JMP with RC = target - | ldr TMP0, [BASE, RA, lsl #3] - | ldrh RBw, [PC, #2] - | add PC, PC, #4 - | add RC, RC, #1 - | add RB, PC, RB, lsl #2 - |.if FFI - | asr ITYPE, TMP0, #47 - | cmn ITYPE, #-LJ_TCDATA - | beq ->vmeta_equal_cd - | cmn RC, ITYPE - |.else - | cmn RC, TMP0, asr #47 - |.endif - | sub RB, RB, #0x20000 - if (vk) { - | csel PC, RB, PC, eq - } else { - | csel PC, RB, PC, ne - } - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst or unused, RC = src, JMP with RC = target - | ldrh RBw, [PC, #2] - | ldr TMP0, [BASE, RC, lsl #3] - | add PC, PC, #4 - | mov_false TMP1 - | add RB, PC, RB, lsl #2 - | cmp TMP0, TMP1 - | sub RB, RB, #0x20000 - if (op == BC_ISTC || op == BC_IST) { - if (op == BC_ISTC) { - | csel RA, RA, RC, lo - } - | csel PC, RB, PC, lo - } else { - if (op == BC_ISFC) { - | csel RA, RA, RC, hs - } - | csel PC, RB, PC, hs - } - if (op == BC_ISTC || op == BC_ISFC) { - | str TMP0, [BASE, RA, lsl #3] - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src, RC = -type - | ldr TMP0, [BASE, RA, lsl #3] - | cmn RC, TMP0, asr #47 - | bne ->vmeta_istype - | ins_next - break; - case BC_ISNUM: - | // RA = src, RC = -(TISNUM-1) - | ldr TMP0, [BASE, RA] - | checknum TMP0, ->vmeta_istype - | ins_next - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst, RC = src - | ldr TMP0, [BASE, RC, lsl #3] - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_NOT: - | // RA = dst, RC = src - | ldr TMP0, [BASE, RC, lsl #3] - | mov_false TMP1 - | mov_true TMP2 - | cmp TMP0, TMP1 - | csel TMP0, TMP1, TMP2, lo - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_UNM: - | // RA = dst, RC = src - | ldr TMP0, [BASE, RC, lsl #3] - | asr ITYPE, TMP0, #47 - | cmn ITYPE, #-LJ_TISNUM - | bhi ->vmeta_unm - | eor TMP0, TMP0, #U64x(80000000,00000000) - | bne >5 - | negs TMP0w, TMP0w - | movz CARG3, #0x41e0, lsl #48 // 2^31. - | add TMP0, TMP0, TISNUM - | csel TMP0, TMP0, CARG3, vc - |5: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_LEN: - | // RA = dst, RC = src - | ldr CARG1, [BASE, RC, lsl #3] - | asr ITYPE, CARG1, #47 - | cmn ITYPE, #-LJ_TSTR - | and CARG1, CARG1, #LJ_GCVMASK - | bne >2 - | ldr CARG1w, STR:CARG1->len - |1: - | add CARG1, CARG1, TISNUM - | str CARG1, [BASE, RA, lsl #3] - | ins_next - | - |2: - | cmn ITYPE, #-LJ_TTAB - | bne ->vmeta_len -#if LJ_52 - | ldr TAB:CARG2, TAB:CARG1->metatable - | cbnz TAB:CARG2, >9 - |3: -#endif - |->BC_LEN_Z: - | bl extern lj_tab_len // (GCtab *t) - | // Returns uint32_t (but less than 2^31). - | b <1 - | -#if LJ_52 - |9: - | ldrb TMP1w, TAB:CARG2->nomm - | tbnz TMP1w, #MM_len, <3 // 'no __len' flag set: done. - | b ->vmeta_len -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro ins_arithcheck_int, target - | checkint CARG1, target - | checkint CARG2, target - |.endmacro - | - |.macro ins_arithcheck_num, target - | checknum CARG1, target - | checknum CARG2, target - |.endmacro - | - |.macro ins_arithcheck_nzdiv, target - | cbz CARG2w, target - |.endmacro - | - |.macro ins_arithhead - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||if (vk == 1) { - | and RC, RC, #255 - | decode_RB RB, INS - ||} else { - | decode_RB RB, INS - | and RC, RC, #255 - ||} - |.endmacro - | - |.macro ins_arithload, reg1, reg2 - | // RA = dst, RB = src1, RC = src2 | num_const - ||switch (vk) { - ||case 0: - | ldr reg1, [BASE, RB, lsl #3] - | ldr reg2, [KBASE, RC, lsl #3] - || break; - ||case 1: - | ldr reg1, [KBASE, RC, lsl #3] - | ldr reg2, [BASE, RB, lsl #3] - || break; - ||default: - | ldr reg1, [BASE, RB, lsl #3] - | ldr reg2, [BASE, RC, lsl #3] - || break; - ||} - |.endmacro - | - |.macro ins_arithfallback, ins - ||switch (vk) { - ||case 0: - | ins ->vmeta_arith_vn - || break; - ||case 1: - | ins ->vmeta_arith_nv - || break; - ||default: - | ins ->vmeta_arith_vv - || break; - ||} - |.endmacro - | - |.macro ins_arithmod, res, reg1, reg2 - | fdiv d2, reg1, reg2 - | frintm d2, d2 - | fmsub res, d2, reg2, reg1 - |.endmacro - | - |.macro ins_arithdn, intins, fpins - | ins_arithhead - | ins_arithload CARG1, CARG2 - | ins_arithcheck_int >5 - |.if "intins" == "smull" - | smull CARG1, CARG1w, CARG2w - | cmp CARG1, CARG1, sxtw - | mov CARG1w, CARG1w - | ins_arithfallback bne - |.elif "intins" == "ins_arithmodi" - | ins_arithfallback ins_arithcheck_nzdiv - | bl ->vm_modi - |.else - | intins CARG1w, CARG1w, CARG2w - | ins_arithfallback bvs - |.endif - | add CARG1, CARG1, TISNUM - | str CARG1, [BASE, RA, lsl #3] - |4: - | ins_next - | - |5: // FP variant. - | ins_arithload FARG1, FARG2 - | ins_arithfallback ins_arithcheck_num - | fpins FARG1, FARG1, FARG2 - | str FARG1, [BASE, RA, lsl #3] - | b <4 - |.endmacro - | - |.macro ins_arithfp, fpins - | ins_arithhead - | ins_arithload CARG1, CARG2 - | ins_arithload FARG1, FARG2 - | ins_arithfallback ins_arithcheck_num - |.if "fpins" == "fpow" - | bl extern pow - |.else - | fpins FARG1, FARG1, FARG2 - |.endif - | str FARG1, [BASE, RA, lsl #3] - | ins_next - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arithdn adds, fadd - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arithdn subs, fsub - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arithdn smull, fmul - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arithfp fdiv - break; - case BC_MODVN: case BC_MODNV: case BC_MODVV: - | ins_arithdn ins_arithmodi, ins_arithmod - break; - case BC_POW: - | // NYI: (partial) integer arithmetic. - | ins_arithfp fpow - break; - - case BC_CAT: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = src_start, RC = src_end - | str BASE, L->base - | sub CARG3, RC, RB - | add CARG2, BASE, RC, lsl #3 - |->BC_CAT_Z: - | // RA = dst, CARG2 = top-1, CARG3 = left - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) - | // Returns NULL (finished) or TValue * (metamethod). - | ldrb RBw, [PC, #-1] - | ldr BASE, L->base - | cbnz CRET1, ->vmeta_binop - | ldr TMP0, [BASE, RB, lsl #3] - | str TMP0, [BASE, RA, lsl #3] // Copy result to RA. - | ins_next - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst, RC = str_const (~) - | mvn RC, RC - | ldr TMP0, [KBASE, RC, lsl #3] - | movn TMP1, #~LJ_TSTR - | add TMP0, TMP0, TMP1, lsl #47 - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_KCDATA: - |.if FFI - | // RA = dst, RC = cdata_const (~) - | mvn RC, RC - | ldr TMP0, [KBASE, RC, lsl #3] - | movn TMP1, #~LJ_TCDATA - | add TMP0, TMP0, TMP1, lsl #47 - | str TMP0, [BASE, RA, lsl #3] - | ins_next - |.endif - break; - case BC_KSHORT: - | // RA = dst, RC = int16_literal - | sxth RCw, RCw - | add TMP0, RC, TISNUM - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_KNUM: - | // RA = dst, RC = num_const - | ldr TMP0, [KBASE, RC, lsl #3] - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_KPRI: - | // RA = dst, RC = primitive_type (~) - | mvn TMP0, RC, lsl #47 - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_KNIL: - | // RA = base, RC = end - | add RA, BASE, RA, lsl #3 - | add RC, BASE, RC, lsl #3 - | str TISNIL, [RA], #8 - |1: - | cmp RA, RC - | str TISNIL, [RA], #8 - | blt <1 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst, RC = uvnum - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RC, RC, #offsetof(GCfuncL, uvptr)/8 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG2, [LFUNC:CARG2, RC, lsl #3] - | ldr CARG2, UPVAL:CARG2->v - | ldr TMP0, [CARG2] - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - case BC_USETV: - | // RA = uvnum, RC = src - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RA, RA, #offsetof(GCfuncL, uvptr)/8 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3] - | ldr CARG3, [BASE, RC, lsl #3] - | ldr CARG2, UPVAL:CARG1->v - | ldrb TMP2w, UPVAL:CARG1->marked - | ldrb TMP0w, UPVAL:CARG1->closed - | asr ITYPE, CARG3, #47 - | str CARG3, [CARG2] - | add ITYPE, ITYPE, #-LJ_TISGCV - | tst TMP2w, #LJ_GC_BLACK // isblack(uv) - | ccmp TMP0w, #0, #4, ne // && uv->closed - | ccmn ITYPE, #-(LJ_TNUMX - LJ_TISGCV), #0, ne // && tvisgcv(v) - | bhi >2 - |1: - | ins_next - | - |2: // Check if new value is white. - | and GCOBJ:CARG3, CARG3, #LJ_GCVMASK - | ldrb TMP1w, GCOBJ:CARG3->gch.marked - | tst TMP1w, #LJ_GC_WHITES // iswhite(str) - | beq <1 - | // Crossed a write barrier. Move the barrier forward. - | mov CARG1, GL - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | b <1 - break; - case BC_USETS: - | // RA = uvnum, RC = str_const (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RA, RA, #offsetof(GCfuncL, uvptr)/8 - | mvn RC, RC - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3] - | ldr STR:CARG3, [KBASE, RC, lsl #3] - | movn TMP0, #~LJ_TSTR - | ldr CARG2, UPVAL:CARG1->v - | ldrb TMP2w, UPVAL:CARG1->marked - | add TMP0, STR:CARG3, TMP0, lsl #47 - | ldrb TMP1w, STR:CARG3->marked - | str TMP0, [CARG2] - | tbnz TMP2w, #2, >2 // isblack(uv) - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | ldrb TMP0w, UPVAL:CARG1->closed - | tst TMP1w, #LJ_GC_WHITES // iswhite(str) - | ccmp TMP0w, #0, #0, ne - | beq <1 - | // Crossed a write barrier. Move the barrier forward. - | mov CARG1, GL - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | b <1 - break; - case BC_USETN: - | // RA = uvnum, RC = num_const - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RA, RA, #offsetof(GCfuncL, uvptr)/8 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3] - | ldr TMP0, [KBASE, RC, lsl #3] - | ldr CARG2, UPVAL:CARG2->v - | str TMP0, [CARG2] - | ins_next - break; - case BC_USETP: - | // RA = uvnum, RC = primitive_type (~) - | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | add RA, RA, #offsetof(GCfuncL, uvptr)/8 - | and LFUNC:CARG2, CARG2, #LJ_GCVMASK - | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3] - | mvn TMP0, RC, lsl #47 - | ldr CARG2, UPVAL:CARG2->v - | str TMP0, [CARG2] - | ins_next - break; - - case BC_UCLO: - | // RA = level, RC = target - | ldr CARG3, L->openupval - | add RC, PC, RC, lsl #2 - | str BASE, L->base - | sub PC, RC, #0x20000 - | cbz CARG3, >1 - | mov CARG1, L - | add CARG2, BASE, RA, lsl #3 - | bl extern lj_func_closeuv // (lua_State *L, TValue *level) - | ldr BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst, RC = proto_const (~) (holding function prototype) - | mvn RC, RC - | str BASE, L->base - | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] - | str PC, SAVE_PC - | ldr CARG2, [KBASE, RC, lsl #3] - | mov CARG1, L - | and LFUNC:CARG3, CARG3, #LJ_GCVMASK - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | bl extern lj_func_newL_gc - | // Returns GCfuncL *. - | ldr BASE, L->base - | movn TMP0, #~LJ_TFUNC - | add CRET1, CRET1, TMP0, lsl #47 - | str CRET1, [BASE, RA, lsl #3] - | ins_next - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst, RC = (hbits|asize) | tab_const (~) - | ldp CARG3, CARG4, GL->gc.total // Assumes threshold follows total. - | str BASE, L->base - | str PC, SAVE_PC - | mov CARG1, L - | cmp CARG3, CARG4 - | bhs >5 - |1: - if (op == BC_TNEW) { - | and CARG2, RC, #0x7ff - | lsr CARG3, RC, #11 - | cmp CARG2, #0x7ff - | mov TMP0, #0x801 - | csel CARG2, CARG2, TMP0, ne - | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) - | // Returns GCtab *. - } else { - | mvn RC, RC - | ldr CARG2, [KBASE, RC, lsl #3] - | bl extern lj_tab_dup // (lua_State *L, Table *kt) - | // Returns GCtab *. - } - | ldr BASE, L->base - | movk CRET1, #(LJ_TTAB>>1)&0xffff, lsl #48 - | str CRET1, [BASE, RA, lsl #3] - | ins_next - | - |5: - | bl extern lj_gc_step_fixtop // (lua_State *L) - | mov CARG1, L - | b <1 - break; - - case BC_GGET: - | // RA = dst, RC = str_const (~) - case BC_GSET: - | // RA = dst, RC = str_const (~) - | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] - | mvn RC, RC - | and LFUNC:CARG1, CARG1, #LJ_GCVMASK - | ldr TAB:CARG2, LFUNC:CARG1->env - | ldr STR:RC, [KBASE, RC, lsl #3] - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - break; - - case BC_TGETV: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = key - | ldr CARG2, [BASE, RB, lsl #3] - | ldr TMP1, [BASE, RC, lsl #3] - | checktab CARG2, ->vmeta_tgetv - | checkint TMP1, >9 // Integer key? - | ldr CARG3, TAB:CARG2->array - | ldr CARG1w, TAB:CARG2->asize - | add CARG3, CARG3, TMP1, uxtw #3 - | cmp TMP1w, CARG1w // In array part? - | bhs ->vmeta_tgetv - | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL - | beq >5 - |1: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <1 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done. - | b ->vmeta_tgetv - | - |9: - | asr ITYPE, TMP1, #47 - | cmn ITYPE, #-LJ_TSTR // String key? - | bne ->vmeta_tgetv - | and STR:RC, TMP1, #LJ_GCVMASK - | b ->BC_TGETS_Z - break; - case BC_TGETS: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = str_const (~) - | ldr CARG2, [BASE, RB, lsl #3] - | mvn RC, RC - | ldr STR:RC, [KBASE, RC, lsl #3] - | checktab CARG2, ->vmeta_tgets1 - |->BC_TGETS_Z: - | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst - | ldr TMP1w, TAB:CARG2->hmask - | ldr TMP2w, STR:RC->hash - | ldr NODE:CARG3, TAB:CARG2->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask - | add TMP1, TMP1, TMP1, lsl #1 - | movn CARG4, #~LJ_TSTR - | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 - | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. - |1: - | ldp TMP0, CARG1, NODE:CARG3->val - | ldr NODE:CARG3, NODE:CARG3->next - | cmp CARG1, CARG4 - | bne >4 - | cmp TMP0, TISNIL - | beq >5 - |3: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - | - |4: // Follow hash chain. - | cbnz NODE:CARG3, <1 - | // End of hash chain: key not found, nil result. - | mov TMP0, TISNIL - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <3 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_index, <3 // 'no __index' flag set: done. - | b ->vmeta_tgets - break; - case BC_TGETB: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = index - | ldr CARG2, [BASE, RB, lsl #3] - | checktab CARG2, ->vmeta_tgetb - | ldr CARG3, TAB:CARG2->array - | ldr CARG1w, TAB:CARG2->asize - | add CARG3, CARG3, RC, lsl #3 - | cmp RCw, CARG1w // In array part? - | bhs ->vmeta_tgetb - | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL - | beq >5 - |1: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - | - |5: // Check for __index if table value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <1 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done. - | b ->vmeta_tgetb - break; - case BC_TGETR: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = key - | ldr CARG1, [BASE, RB, lsl #3] - | ldr TMP1, [BASE, RC, lsl #3] - | and TAB:CARG1, CARG1, #LJ_GCVMASK - | ldr CARG3, TAB:CARG1->array - | ldr TMP2w, TAB:CARG1->asize - | add CARG3, CARG3, TMP1w, uxtw #3 - | cmp TMP1w, TMP2w // In array part? - | bhs ->vmeta_tgetr - | ldr TMP0, [CARG3] - |->BC_TGETR_Z: - | str TMP0, [BASE, RA, lsl #3] - | ins_next - break; - - case BC_TSETV: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = src, RB = table, RC = key - | ldr CARG2, [BASE, RB, lsl #3] - | ldr TMP1, [BASE, RC, lsl #3] - | checktab CARG2, ->vmeta_tsetv - | checkint TMP1, >9 // Integer key? - | ldr CARG3, TAB:CARG2->array - | ldr CARG1w, TAB:CARG2->asize - | add CARG3, CARG3, TMP1, uxtw #3 - | cmp TMP1w, CARG1w // In array part? - | bhs ->vmeta_tsetv - | ldr TMP1, [CARG3] - | ldr TMP0, [BASE, RA, lsl #3] - | ldrb TMP2w, TAB:CARG2->marked - | cmp TMP1, TISNIL // Previous value is nil? - | beq >5 - |1: - | str TMP0, [CARG3] - | tbnz TMP2w, #2, >7 // isblack(table) - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <1 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done. - | b ->vmeta_tsetv - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP1 - | b <2 - | - |9: - | asr ITYPE, TMP1, #47 - | cmn ITYPE, #-LJ_TSTR // String key? - | bne ->vmeta_tsetv - | and STR:RC, TMP1, #LJ_GCVMASK - | b ->BC_TSETS_Z - break; - case BC_TSETS: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = dst, RB = table, RC = str_const (~) - | ldr CARG2, [BASE, RB, lsl #3] - | mvn RC, RC - | ldr STR:RC, [KBASE, RC, lsl #3] - | checktab CARG2, ->vmeta_tsets1 - |->BC_TSETS_Z: - | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src - | ldr TMP1w, TAB:CARG2->hmask - | ldr TMP2w, STR:RC->hash - | ldr NODE:CARG3, TAB:CARG2->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask - | add TMP1, TMP1, TMP1, lsl #1 - | movn CARG4, #~LJ_TSTR - | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 - | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. - | strb wzr, TAB:CARG2->nomm // Clear metamethod cache. - |1: - | ldp TMP1, CARG1, NODE:CARG3->val - | ldr NODE:TMP3, NODE:CARG3->next - | ldrb TMP2w, TAB:CARG2->marked - | cmp CARG1, CARG4 - | bne >5 - | ldr TMP0, [BASE, RA, lsl #3] - | cmp TMP1, TISNIL // Previous value is nil? - | beq >4 - |2: - | str TMP0, NODE:CARG3->val - | tbnz TMP2w, #2, >7 // isblack(table) - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <2 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_newindex, <2 // 'no __newindex' flag set: done. - | b ->vmeta_tsets - | - |5: // Follow hash chain. - | mov NODE:CARG3, NODE:TMP3 - | cbnz NODE:TMP3, <1 - | // End of hash chain: key not found, add a new one. - | - | // But check for __newindex first. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, >6 // No metatable: continue. - | ldrb TMP1w, TAB:CARG1->nomm - | // 'no __newindex' flag NOT set: check. - | tbz TMP1w, #MM_newindex, ->vmeta_tsets - |6: - | movn TMP1, #~LJ_TSTR - | str PC, SAVE_PC - | add TMP0, STR:RC, TMP1, lsl #47 - | str BASE, L->base - | mov CARG1, L - | str TMP0, TMPD - | add CARG3, sp, TMPDofs - | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Returns TValue *. - | ldr BASE, L->base - | ldr TMP0, [BASE, RA, lsl #3] - | str TMP0, [CRET1] - | b <3 // No 2nd write barrier needed. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP1 - | b <3 - break; - case BC_TSETB: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = src, RB = table, RC = index - | ldr CARG2, [BASE, RB, lsl #3] - | checktab CARG2, ->vmeta_tsetb - | ldr CARG3, TAB:CARG2->array - | ldr CARG1w, TAB:CARG2->asize - | add CARG3, CARG3, RC, lsl #3 - | cmp RCw, CARG1w // In array part? - | bhs ->vmeta_tsetb - | ldr TMP1, [CARG3] - | ldr TMP0, [BASE, RA, lsl #3] - | ldrb TMP2w, TAB:CARG2->marked - | cmp TMP1, TISNIL // Previous value is nil? - | beq >5 - |1: - | str TMP0, [CARG3] - | tbnz TMP2w, #2, >7 // isblack(table) - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | ldr TAB:CARG1, TAB:CARG2->metatable - | cbz TAB:CARG1, <1 // No metatable: done. - | ldrb TMP1w, TAB:CARG1->nomm - | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done. - | b ->vmeta_tsetb - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP1 - | b <2 - break; - case BC_TSETR: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = src, RB = table, RC = key - | ldr CARG2, [BASE, RB, lsl #3] - | ldr TMP1, [BASE, RC, lsl #3] - | and TAB:CARG2, CARG2, #LJ_GCVMASK - | ldr CARG1, TAB:CARG2->array - | ldrb TMP2w, TAB:CARG2->marked - | ldr CARG4w, TAB:CARG2->asize - | add CARG1, CARG1, TMP1, uxtw #3 - | tbnz TMP2w, #2, >7 // isblack(table) - |2: - | cmp TMP1w, CARG4w // In array part? - | bhs ->vmeta_tsetr - |->BC_TSETR_Z: - | ldr TMP0, [BASE, RA, lsl #3] - | str TMP0, [CARG1] - | ins_next - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP0 - | b <2 - break; - - case BC_TSETM: - | // RA = base (table at base-1), RC = num_const (start index) - | add RA, BASE, RA, lsl #3 - |1: - | ldr RBw, SAVE_MULTRES - | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table. - | ldr TMP1, [KBASE, RC, lsl #3] // Integer constant is in lo-word. - | sub RB, RB, #8 - | cbz RB, >4 // Nothing to copy? - | and TAB:CARG2, CARG2, #LJ_GCVMASK - | ldr CARG1w, TAB:CARG2->asize - | add CARG3w, TMP1w, RBw, lsr #3 - | ldr CARG4, TAB:CARG2->array - | cmp CARG3, CARG1 - | add RB, RA, RB - | bhi >5 - | add TMP1, CARG4, TMP1w, uxtw #3 - | ldrb TMP2w, TAB:CARG2->marked - |3: // Copy result slots to table. - | ldr TMP0, [RA], #8 - | str TMP0, [TMP1], #8 - | cmp RA, RB - | blo <3 - | tbnz TMP2w, #2, >7 // isblack(table) - |4: - | ins_next - | - |5: // Need to resize array part. - | str BASE, L->base - | mov CARG1, L - | str PC, SAVE_PC - | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - | // Must not reallocate the stack. - | b <1 - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP2w, TMP1 - | b <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base, (RB = nresults+1,) RC = extra_nargs - | ldr TMP0w, SAVE_MULTRES - | decode_RC8RD NARGS8:RC, RC - | add NARGS8:RC, NARGS8:RC, TMP0 - | b ->BC_CALL_Z - break; - case BC_CALL: - | decode_RC8RD NARGS8:RC, RC - | // RA = base, (RB = nresults+1,) RC = (nargs+1)*8 - |->BC_CALL_Z: - | mov RB, BASE // Save old BASE for vmeta_call. - | add BASE, BASE, RA, lsl #3 - | ldr CARG3, [BASE] - | sub NARGS8:RC, NARGS8:RC, #8 - | add BASE, BASE, #16 - | checkfunc CARG3, ->vmeta_call - | ins_call - break; - - case BC_CALLMT: - | // RA = base, (RB = 0,) RC = extra_nargs - | ldr TMP0w, SAVE_MULTRES - | add NARGS8:RC, TMP0, RC, lsl #3 - | b ->BC_CALLT1_Z - break; - case BC_CALLT: - | lsl NARGS8:RC, RC, #3 - | // RA = base, (RB = 0,) RC = (nargs+1)*8 - |->BC_CALLT1_Z: - | add RA, BASE, RA, lsl #3 - | ldr TMP1, [RA] - | sub NARGS8:RC, NARGS8:RC, #8 - | add RA, RA, #16 - | checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt - | ldr PC, [BASE, FRAME_PC] - |->BC_CALLT2_Z: - | mov RB, #0 - | ldrb TMP2w, LFUNC:CARG3->ffid - | tst PC, #FRAME_TYPE - | bne >7 - |1: - | str TMP1, [BASE, FRAME_FUNC] // Copy function down, but keep PC. - | cbz NARGS8:RC, >3 - |2: - | ldr TMP0, [RA, RB] - | add TMP1, RB, #8 - | cmp TMP1, NARGS8:RC - | str TMP0, [BASE, RB] - | mov RB, TMP1 - | bne <2 - |3: - | cmp TMP2, #1 // (> FF_C) Calling a fast function? - | bhi >5 - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | ldrb RAw, [PC, #-3] - | sub CARG1, BASE, RA, lsl #3 - | ldr LFUNC:CARG1, [CARG1, #-32] - | and LFUNC:CARG1, CARG1, #LJ_GCVMASK - | ldr CARG1, LFUNC:CARG1->pc - | ldr KBASE, [CARG1, #PC2PROTO(k)] - | b <4 - | - |7: // Tailcall from a vararg function. - | eor PC, PC, #FRAME_VARG - | tst PC, #FRAME_TYPEP // Vararg frame below? - | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below. - | bne <1 - | sub BASE, BASE, PC - | ldr PC, [BASE, FRAME_PC] - | tst PC, #FRAME_TYPE - | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below. - | b <1 - break; - - case BC_ITERC: - | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) - | add RA, BASE, RA, lsl #3 - | ldr CARG3, [RA, #-24] - | mov RB, BASE // Save old BASE for vmeta_call. - | ldp CARG1, CARG2, [RA, #-16] - | add BASE, RA, #16 - | mov NARGS8:RC, #16 // Iterators get 2 arguments. - | str CARG3, [RA] // Copy callable. - | stp CARG1, CARG2, [RA, #16] // Copy state and control var. - | checkfunc CARG3, ->vmeta_call - | ins_call - break; - - case BC_ITERN: - | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | add RA, BASE, RA, lsl #3 - | ldr TAB:RB, [RA, #-16] - | ldrh TMP3w, [PC, #2] - | ldr CARG1w, [RA, #-8] // Get index from control var. - | add PC, PC, #4 - | add TMP3, PC, TMP3, lsl #2 - | and TAB:RB, RB, #LJ_GCVMASK - | sub TMP3, TMP3, #0x20000 - | ldr TMP1w, TAB:RB->asize - | ldr CARG2, TAB:RB->array - |1: // Traverse array part. - | subs RC, CARG1, TMP1 - | add CARG3, CARG2, CARG1, lsl #3 - | bhs >5 // Index points after array part? - | ldr TMP0, [CARG3] - | cmp TMP0, TISNIL - | cinc CARG1, CARG1, eq // Skip holes in array part. - | beq <1 - | add CARG1, CARG1, TISNUM - | stp CARG1, TMP0, [RA] - | add CARG1, CARG1, #1 - |3: - | str CARG1w, [RA, #-8] // Update control var. - | mov PC, TMP3 - |4: - | ins_next - | - |5: // Traverse hash part. - | ldr TMP2w, TAB:RB->hmask - | ldr NODE:RB, TAB:RB->node - |6: - | add CARG1, RC, RC, lsl #1 - | cmp RC, TMP2 // End of iteration? Branch to ITERN+1. - | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8 - | bhi <4 - | ldp TMP0, CARG1, NODE:CARG3->val - | cmp TMP0, TISNIL - | add RC, RC, #1 - | beq <6 // Skip holes in hash part. - | stp CARG1, TMP0, [RA] - | add CARG1, RC, TMP1 - | b <3 - break; - - case BC_ISNEXT: - | // RA = base, RC = target (points to ITERN) - | add RA, BASE, RA, lsl #3 - | ldr CFUNC:CARG1, [RA, #-24] - | add RC, PC, RC, lsl #2 - | ldp TAB:CARG3, CARG4, [RA, #-16] - | sub RC, RC, #0x20000 - | checkfunc CFUNC:CARG1, >5 - | asr TMP0, TAB:CARG3, #47 - | ldrb TMP1w, CFUNC:CARG1->ffid - | cmn TMP0, #-LJ_TTAB - | ccmp CARG4, TISNIL, #0, eq - | ccmp TMP1w, #FF_next_N, #0, eq - | bne >5 - | mov TMP0w, #0xfffe7fff - | lsl TMP0, TMP0, #32 - | str TMP0, [RA, #-8] // Initialize control var. - |1: - | mov PC, RC - | ins_next - | - |5: // Despecialize bytecode if any of the checks fail. - | mov TMP0, #BC_JMP - | mov TMP1, #BC_ITERC - | strb TMP0w, [PC, #-4] - | strb TMP1w, [RC] - | b <1 - break; - - case BC_VARG: - | decode_RB RB, INS - | and RC, RC, #255 - | // RA = base, RB = (nresults+1), RC = numparams - | ldr TMP1, [BASE, FRAME_PC] - | add RC, BASE, RC, lsl #3 - | add RA, BASE, RA, lsl #3 - | add RC, RC, #FRAME_VARG - | add TMP2, RA, RB, lsl #3 - | sub RC, RC, TMP1 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | sub TMP3, BASE, #16 // TMP3 = vtop - | cbz RB, >5 - | sub TMP2, TMP2, #16 - |1: // Copy vararg slots to destination slots. - | cmp RC, TMP3 - | ldr TMP0, [RC], #8 - | csel TMP0, TMP0, TISNIL, lo - | cmp RA, TMP2 - | str TMP0, [RA], #8 - | blo <1 - |2: - | ins_next - | - |5: // Copy all varargs. - | ldr TMP0, L->maxstack - | subs TMP2, TMP3, RC - | csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8 - | add RB, RB, #8 - | add TMP1, RA, TMP2 - | str RBw, SAVE_MULTRES - | ble <2 // Nothing to copy. - | cmp TMP1, TMP0 - | bhi >7 - |6: - | ldr TMP0, [RC], #8 - | str TMP0, [RA], #8 - | cmp RC, TMP3 - | blo <6 - | b <2 - | - |7: // Grow stack for varargs. - | lsr CARG2, TMP2, #3 - | stp BASE, RA, L->base - | mov CARG1, L - | sub RC, RC, BASE // Need delta, because BASE may change. - | str PC, SAVE_PC - | bl extern lj_state_growstack // (lua_State *L, int n) - | ldp BASE, RA, L->base - | add RC, BASE, RC - | sub TMP3, BASE, #16 - | b <6 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results, RC = extra results - | ldr TMP0w, SAVE_MULTRES - | ldr PC, [BASE, FRAME_PC] - | add RA, BASE, RA, lsl #3 - | add RC, TMP0, RC, lsl #3 - | b ->BC_RETM_Z - break; - - case BC_RET: - | // RA = results, RC = nresults+1 - | ldr PC, [BASE, FRAME_PC] - | lsl RC, RC, #3 - | add RA, BASE, RA, lsl #3 - |->BC_RETM_Z: - | str RCw, SAVE_MULTRES - |1: - | ands CARG1, PC, #FRAME_TYPE - | eor CARG2, PC, #FRAME_VARG - | bne ->BC_RETV2_Z - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return - | ldr INSw, [PC, #-4] - | subs TMP1, RC, #8 - | sub CARG3, BASE, #16 - | beq >3 - |2: - | ldr TMP0, [RA], #8 - | add BASE, BASE, #8 - | sub TMP1, TMP1, #8 - | str TMP0, [BASE, #-24] - | cbnz TMP1, <2 - |3: - | decode_RA RA, INS - | sub CARG4, CARG3, RA, lsl #3 - | decode_RB RB, INS - | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC] - |5: - | cmp RC, RB, lsl #3 // More results expected? - | blo >6 - | and LFUNC:CARG1, CARG1, #LJ_GCVMASK - | mov BASE, CARG4 - | ldr CARG2, LFUNC:CARG1->pc - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | ins_next - | - |6: // Fill up results with nil. - | add BASE, BASE, #8 - | add RC, RC, #8 - | str TISNIL, [BASE, #-24] - | b <5 - | - |->BC_RETV1_Z: // Non-standard return case. - | add RA, BASE, RA, lsl #3 - |->BC_RETV2_Z: - | tst CARG2, #FRAME_TYPEP - | bne ->vm_return - | // Return from vararg function: relocate BASE down. - | sub BASE, BASE, CARG2 - | ldr PC, [BASE, FRAME_PC] - | b <1 - break; - - case BC_RET0: case BC_RET1: - | // RA = results, RC = nresults+1 - | ldr PC, [BASE, FRAME_PC] - | lsl RC, RC, #3 - | str RCw, SAVE_MULTRES - | ands CARG1, PC, #FRAME_TYPE - | eor CARG2, PC, #FRAME_VARG - | bne ->BC_RETV1_Z - | ldr INSw, [PC, #-4] - if (op == BC_RET1) { - | ldr TMP0, [BASE, RA, lsl #3] - } - | sub CARG4, BASE, #16 - | decode_RA RA, INS - | sub BASE, CARG4, RA, lsl #3 - if (op == BC_RET1) { - | str TMP0, [CARG4], #8 - } - | decode_RB RB, INS - | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] - |5: - | cmp RC, RB, lsl #3 - | blo >6 - | and LFUNC:CARG1, CARG1, #LJ_GCVMASK - | ldr CARG2, LFUNC:CARG1->pc - | ldr KBASE, [CARG2, #PC2PROTO(k)] - | ins_next - | - |6: // Fill up results with nil. - | add RC, RC, #8 - | str TISNIL, [CARG4], #8 - | b <5 - break; - - /* -- Loops and branches ------------------------------------------------ */ - - |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4] - |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12] - |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20] - |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28] - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base, RC = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - | add RA, BASE, RA, lsl #3 - | ldp CARG1, CARG2, FOR_IDX // CARG1 = IDX, CARG2 = STOP - | ldr CARG3, FOR_STEP // CARG3 = STEP - if (op != BC_JFORL) { - | add RC, PC, RC, lsl #2 - | sub RC, RC, #0x20000 - } - | checkint CARG1, >5 - if (!vk) { - | checkint CARG2, ->vmeta_for - | checkint CARG3, ->vmeta_for - | tbnz CARG3w, #31, >4 - | cmp CARG1w, CARG2w - } else { - | adds CARG1w, CARG1w, CARG3w - | bvs >2 - | add TMP0, CARG1, TISNUM - | tbnz CARG3w, #31, >4 - | cmp CARG1w, CARG2w - } - |1: - if (op == BC_FORI) { - | csel PC, RC, PC, gt - } else if (op == BC_JFORI) { - | mov PC, RC - | ldrh RCw, [RC, #-2] - } else if (op == BC_IFORL) { - | csel PC, RC, PC, le - } - if (vk) { - | str TMP0, FOR_IDX - | str TMP0, FOR_EXT - } else { - | str CARG1, FOR_EXT - } - if (op == BC_JFORI || op == BC_JFORL) { - | ble =>BC_JLOOP - } - |2: - | ins_next - | - |4: // Invert check for negative step. - | cmp CARG2w, CARG1w - | b <1 - | - |5: // FP loop. - | ldp d0, d1, FOR_IDX - | blo ->vmeta_for - if (!vk) { - | checknum CARG2, ->vmeta_for - | checknum CARG3, ->vmeta_for - | str d0, FOR_EXT - } else { - | ldr d2, FOR_STEP - | fadd d0, d0, d2 - } - | tbnz CARG3, #63, >7 - | fcmp d0, d1 - |6: - if (vk) { - | str d0, FOR_IDX - | str d0, FOR_EXT - } - if (op == BC_FORI) { - | csel PC, RC, PC, hi - } else if (op == BC_JFORI) { - | ldrh RCw, [RC, #-2] - | bls =>BC_JLOOP - } else if (op == BC_IFORL) { - | csel PC, RC, PC, ls - } else { - | bls =>BC_JLOOP - } - | b <2 - | - |7: // Invert check for negative step. - | fcmp d1, d0 - | b <6 - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base, RC = target - | ldr CARG1, [BASE, RA, lsl #3] - | add TMP1, BASE, RA, lsl #3 - | cmp CARG1, TISNIL - | beq >1 // Stop if iterator returned nil. - if (op == BC_JITERL) { - | str CARG1, [TMP1, #-8] - | b =>BC_JLOOP - } else { - | add TMP0, PC, RC, lsl #2 // Otherwise save control var + branch. - | sub PC, TMP0, #0x20000 - | str CARG1, [TMP1, #-8] - } - |1: - | ins_next - break; - - case BC_LOOP: - | // RA = base, RC = target (loop extent) - | // Note: RA/RC is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base, RC = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base (ignored), RC = traceno - | ldr CARG1, [GL, #GL_J(trace)] - | mov CARG2, #0 // Traces on ARM64 don't store the trace #, so use 0. - | ldr TRACE:RC, [CARG1, RC, lsl #3] - | st_vmstate CARG2 - | ldr RA, TRACE:RC->mcode - | str BASE, GL->jit_base - | str L, GL->tmpbuf.L - | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace. - | br RA - |.endif - break; - - case BC_JMP: - | // RA = base (only used by trace recorder), RC = target - | add RC, PC, RC, lsl #2 - | sub PC, RC, #0x20000 - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 - | ldr CARG1, L->maxstack - | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)] - | ldr KBASE, [PC, #-4+PC2PROTO(k)] - | cmp RA, CARG1 - | bhi ->vm_growstack_l - |2: - | cmp NARGS8:RC, TMP1, lsl #3 // Check for missing parameters. - | blo >3 - if (op == BC_JFUNCF) { - | decode_RD RC, INS - | b =>BC_JLOOP - } else { - | ins_next - } - | - |3: // Clear missing parameters. - | str TISNIL, [BASE, NARGS8:RC] - | add NARGS8:RC, NARGS8:RC, #8 - | b <2 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 - | ldr CARG1, L->maxstack - | movn TMP0, #~LJ_TFUNC - | add TMP2, BASE, RC - | add LFUNC:CARG3, CARG3, TMP0, lsl #47 - | add RA, RA, RC - | add TMP0, RC, #16+FRAME_VARG - | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC. - | ldr KBASE, [PC, #-4+PC2PROTO(k)] - | cmp RA, CARG1 - | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG. - | bhs ->vm_growstack_l - | sub RC, TMP2, #16 - | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)] - | mov RA, BASE - | mov BASE, TMP2 - | cbz TMP1, >2 - |1: - | cmp RA, RC // Less args than parameters? - | bhs >3 - | ldr TMP0, [RA] - | sub TMP1, TMP1, #1 - | str TISNIL, [RA], #8 // Clear old fixarg slot (help the GC). - | str TMP0, [TMP2], #8 - | cbnz TMP1, <1 - |2: - | ins_next - | - |3: - | sub TMP1, TMP1, #1 - | str TISNIL, [TMP2], #8 - | cbz TMP1, <2 - | b <3 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | ldr CARG4, CFUNC:CARG3->f - } else { - | ldr CARG4, GL->wrapf - } - | add CARG2, RA, NARGS8:RC - | ldr CARG1, L->maxstack - | add RC, BASE, NARGS8:RC - | cmp CARG2, CARG1 - | stp BASE, RC, L->base - if (op == BC_FUNCCW) { - | ldr CARG2, CFUNC:CARG3->f - } - | mv_vmstate TMP0w, C - | mov CARG1, L - | bhi ->vm_growstack_c // Need to grow stack. - | st_vmstate TMP0w - | blr CARG4 // (lua_State *L [, lua_CFunction f]) - | // Returns nresults. - | ldp BASE, TMP1, L->base - | str L, GL->cur_L - | sbfiz RC, CRET1, #3, #32 - | st_vmstate ST_INTERP - | ldr PC, [BASE, FRAME_PC] - | sub RA, TMP1, RC // RA = L->top - nresults*8 - | b ->vm_returnc - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i, cf = CFRAME_SIZE >> 3; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.long .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.long 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -8\n" - "\t.byte 30\n" /* Return address is in lr. */ - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ - "\t.align 3\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.long .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.long .Lframe0\n" - "\t.quad .Lbegin\n" - "\t.quad %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ - fcofs, CFRAME_SIZE, cf, cf-1); - for (i = 19; i <= 28; i++) /* offset x19-x28 */ - fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); - for (i = 8; i <= 15; i++) /* offset d8-d15 */ - fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", - 64+i, cf-i-4); - fprintf(ctx->fp, - "\t.align 3\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.long .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.long .Lframe0\n" - "\t.quad lj_vm_ffi_call\n" - "\t.quad %d\n" - "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ - "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ - "\t.align 3\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n"); - fprintf(ctx->fp, - ".Lframe1:\n" - "\t.long .LECIE1-.LSCIE1\n" - ".LSCIE1:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zPR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -8\n" - "\t.byte 30\n" /* Return address is in lr. */ - "\t.uleb128 6\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.long lj_err_unwind_dwarf-.\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ - "\t.align 3\n" - ".LECIE1:\n\n"); - fprintf(ctx->fp, - ".LSFDE2:\n" - "\t.long .LEFDE2-.LASFDE2\n" - ".LASFDE2:\n" - "\t.long .LASFDE2-.Lframe1\n" - "\t.long .Lbegin-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */ - fcofs, CFRAME_SIZE, cf, cf-1); - for (i = 19; i <= 28; i++) /* offset x19-x28 */ - fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17); - for (i = 8; i <= 15; i++) /* offset d8-d15 */ - fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", - 64+i, cf-i-4); - fprintf(ctx->fp, - "\t.align 3\n" - ".LEFDE2:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".Lframe2:\n" - "\t.long .LECIE2-.LSCIE2\n" - ".LSCIE2:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -8\n" - "\t.byte 30\n" /* Return address is in lr. */ - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ - "\t.align 3\n" - ".LECIE2:\n\n"); - fprintf(ctx->fp, - ".LSFDE3:\n" - "\t.long .LEFDE3-.LASFDE3\n" - ".LASFDE3:\n" - "\t.long .LASFDE3-.Lframe2\n" - "\t.long lj_vm_ffi_call-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ - "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ - "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ - "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */ - "\t.align 3\n" - ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -#endif - break; - default: - break; - } -} - diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc deleted file mode 100644 index 1afd61187a..0000000000 --- a/src/vm_mips.dasc +++ /dev/null @@ -1,5264 +0,0 @@ -|// Low-level VM code for MIPS CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -|// -|// MIPS soft-float support contributed by Djordje Kovacevic and -|// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc. -| -|.arch mips -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// Fixed register assignments for the interpreter. -|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra -| -|.macro .FPU, a, b -|.if FPU -| a, b -|.endif -|.endmacro -| -|// The following must be C callee-save (but BASE is often refetched). -|.define BASE, r16 // Base of current Lua stack frame. -|.define KBASE, r17 // Constants of current Lua function. -|.define PC, r18 // Next PC. -|.define DISPATCH, r19 // Opcode dispatch table. -|.define LREG, r20 // Register holding lua_State (also in SAVE_L). -|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. -| -|.define JGL, r30 // On-trace: global_State + 32768. -| -|// Constants for type-comparisons, stores and conversions. C callee-save. -|.define TISNUM, r22 -|.define TISNIL, r30 -|.if FPU -|.define TOBIT, f30 // 2^52 + 2^51. -|.endif -| -|// The following temporaries are not saved across C calls, except for RA. -|.define RA, r23 // Callee-save. -|.define RB, r8 -|.define RC, r9 -|.define RD, r10 -|.define INS, r11 -| -|.define AT, r1 // Assembler temporary. -|.define TMP0, r12 -|.define TMP1, r13 -|.define TMP2, r14 -|.define TMP3, r15 -| -|// MIPS o32 calling convention. -|.define CFUNCADDR, r25 -|.define CARG1, r4 -|.define CARG2, r5 -|.define CARG3, r6 -|.define CARG4, r7 -| -|.define CRET1, r2 -|.define CRET2, r3 -| -|.if ENDIAN_LE -|.define SFRETLO, CRET1 -|.define SFRETHI, CRET2 -|.define SFARG1LO, CARG1 -|.define SFARG1HI, CARG2 -|.define SFARG2LO, CARG3 -|.define SFARG2HI, CARG4 -|.else -|.define SFRETLO, CRET2 -|.define SFRETHI, CRET1 -|.define SFARG1LO, CARG2 -|.define SFARG1HI, CARG1 -|.define SFARG2LO, CARG4 -|.define SFARG2HI, CARG3 -|.endif -| -|.if FPU -|.define FARG1, f12 -|.define FARG2, f14 -| -|.define FRET1, f0 -|.define FRET2, f2 -|.endif -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|.if FPU // MIPS32 hard-float. -| -|.define CFRAME_SPACE, 112 // Delta for sp. -| -|.define SAVE_ERRF, 124(sp) // 32 bit C frame info. -|.define SAVE_NRES, 120(sp) -|.define SAVE_CFRAME, 116(sp) -|.define SAVE_L, 112(sp) -|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. -|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. -|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. -| -|.else // MIPS32 soft-float -| -|.define CFRAME_SPACE, 64 // Delta for sp. -| -|.define SAVE_ERRF, 76(sp) // 32 bit C frame info. -|.define SAVE_NRES, 72(sp) -|.define SAVE_CFRAME, 68(sp) -|.define SAVE_L, 64(sp) -|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. -|.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves. -| -|.endif -| -|.define SAVE_PC, 20(sp) -|.define ARG5, 16(sp) -|.define CSAVE_4, 12(sp) -|.define CSAVE_3, 8(sp) -|.define CSAVE_2, 4(sp) -|.define CSAVE_1, 0(sp) -|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by callee. -| -|.define ARG5_OFS, 16 -|.define SAVE_MULTRES, ARG5 -| -|//----------------------------------------------------------------------- -| -|.macro saveregs -| addiu sp, sp, -CFRAME_SPACE -| sw ra, SAVE_GPR_+9*4(sp) -| sw r30, SAVE_GPR_+8*4(sp) -| .FPU sdc1 f30, SAVE_FPR_+5*8(sp) -| sw r23, SAVE_GPR_+7*4(sp) -| sw r22, SAVE_GPR_+6*4(sp) -| .FPU sdc1 f28, SAVE_FPR_+4*8(sp) -| sw r21, SAVE_GPR_+5*4(sp) -| sw r20, SAVE_GPR_+4*4(sp) -| .FPU sdc1 f26, SAVE_FPR_+3*8(sp) -| sw r19, SAVE_GPR_+3*4(sp) -| sw r18, SAVE_GPR_+2*4(sp) -| .FPU sdc1 f24, SAVE_FPR_+2*8(sp) -| sw r17, SAVE_GPR_+1*4(sp) -| sw r16, SAVE_GPR_+0*4(sp) -| .FPU sdc1 f22, SAVE_FPR_+1*8(sp) -| .FPU sdc1 f20, SAVE_FPR_+0*8(sp) -|.endmacro -| -|.macro restoreregs_ret -| lw ra, SAVE_GPR_+9*4(sp) -| lw r30, SAVE_GPR_+8*4(sp) -| .FPU ldc1 f30, SAVE_FPR_+5*8(sp) -| lw r23, SAVE_GPR_+7*4(sp) -| lw r22, SAVE_GPR_+6*4(sp) -| .FPU ldc1 f28, SAVE_FPR_+4*8(sp) -| lw r21, SAVE_GPR_+5*4(sp) -| lw r20, SAVE_GPR_+4*4(sp) -| .FPU ldc1 f26, SAVE_FPR_+3*8(sp) -| lw r19, SAVE_GPR_+3*4(sp) -| lw r18, SAVE_GPR_+2*4(sp) -| .FPU ldc1 f24, SAVE_FPR_+2*8(sp) -| lw r17, SAVE_GPR_+1*4(sp) -| lw r16, SAVE_GPR_+0*4(sp) -| .FPU ldc1 f22, SAVE_FPR_+1*8(sp) -| .FPU ldc1 f20, SAVE_FPR_+0*8(sp) -| jr ra -| addiu sp, sp, CFRAME_SPACE -|.endmacro -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; .long 0xf0f0f0f0; .endmacro -| -|// Macros to mark delay slots. -|.macro ., a; a; .endmacro -|.macro ., a,b; a,b; .endmacro -|.macro ., a,b,c; a,b,c; .endmacro -| -|//----------------------------------------------------------------------- -| -|// Endian-specific defines. -|.if ENDIAN_LE -|.define FRAME_PC, -4 -|.define FRAME_FUNC, -8 -|.define HI, 4 -|.define LO, 0 -|.define OFS_RD, 2 -|.define OFS_RA, 1 -|.define OFS_OP, 0 -|.else -|.define FRAME_PC, -8 -|.define FRAME_FUNC, -4 -|.define HI, 0 -|.define LO, 4 -|.define OFS_RD, 0 -|.define OFS_RA, 2 -|.define OFS_OP, 3 -|.endif -| -|// Instruction decode. -|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro -|.macro decode_OP4a, dst, ins; andi dst, ins, 0xff; .endmacro -|.macro decode_OP4b, dst; sll dst, dst, 2; .endmacro -|.macro decode_RC4a, dst, ins; srl dst, ins, 14; .endmacro -|.macro decode_RC4b, dst; andi dst, dst, 0x3fc; .endmacro -|.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro -|.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro -|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro -|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro -|.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro -|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro -| -|// Instruction fetch. -|.macro ins_NEXT1 -| lw INS, 0(PC) -| addiu PC, PC, 4 -|.endmacro -|// Instruction decode+dispatch. -|.macro ins_NEXT2 -| decode_OP4a TMP1, INS -| decode_OP4b TMP1 -| addu TMP0, DISPATCH, TMP1 -| decode_RD8a RD, INS -| lw AT, 0(TMP0) -| decode_RA8a RA, INS -| decode_RD8b RD -| jr AT -| decode_RA8b RA -|.endmacro -|.macro ins_NEXT -| ins_NEXT1 -| ins_NEXT2 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -| .define ins_next1, ins_NEXT1 -| .define ins_next2, ins_NEXT2 -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next1 -| .endmacro -| .macro ins_next2 -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| lw PC, LFUNC:RB->pc -| lw INS, 0(PC) -| addiu PC, PC, 4 -| decode_OP4a TMP1, INS -| decode_RA8a RA, INS -| decode_OP4b TMP1 -| decode_RA8b RA -| addu TMP0, DISPATCH, TMP1 -| lw TMP0, 0(TMP0) -| jr TMP0 -| addu RA, RA, BASE -|.endmacro -| -|.macro ins_call -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| sw PC, FRAME_PC(BASE) -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|.macro branch_RD -| srl TMP0, RD, 1 -| lui AT, (-(BCBIAS_J*4 >> 16) & 65535) -| addu TMP0, TMP0, AT -| addu PC, PC, TMP0 -|.endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) -#define DISPATCH_GOT(name) (GG_DISP2GOT + 4*LJ_GOT_##name) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro load_got, func -| lw CFUNCADDR, DISPATCH_GOT(func)(DISPATCH) -|.endmacro -|// Much faster. Sadly, there's no easy way to force the required code layout. -|// .macro call_intern, func; bal extern func; .endmacro -|.macro call_intern, func; jalr CFUNCADDR; .endmacro -|.macro call_extern; jalr CFUNCADDR; .endmacro -|.macro jmp_extern; jr CFUNCADDR; .endmacro -| -|.macro hotcheck, delta, target -| srl TMP1, PC, 1 -| andi TMP1, TMP1, 126 -| addu TMP1, TMP1, DISPATCH -| lhu TMP2, GG_DISP2HOT(TMP1) -| addiu TMP2, TMP2, -delta -| bltz TMP2, target -|. sh TMP2, GG_DISP2HOT(TMP1) -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP, ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL, ->vm_hotcall -|.endmacro -| -|// Set current VM state. Uses TMP0. -|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro -|.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp, target -| lw tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) -| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) -| sw tab, DISPATCH_GL(gc.grayagain)(DISPATCH) -| sb mark, tab->marked -| b target -|. sw tmp, tab->gclist -|.endmacro -| -|//----------------------------------------------------------------------- - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: TMP2 = previous base. - | andi AT, PC, FRAME_P - | beqz AT, ->cont_dispatch - |. li TMP1, LJ_TTRUE - | - | // Return from pcall or xpcall fast func. - | lw PC, FRAME_PC(TMP2) // Fetch PC of previous frame. - | move BASE, TMP2 // Restore caller base. - | // Prepending may overwrite the pcall frame, so do it at the end. - | sw TMP1, FRAME_PC(RA) // Prepend true to results. - | addiu RA, RA, -8 - | - |->vm_returnc: - | addiu RD, RD, 8 // RD = (nresults+1)*8. - | andi TMP0, PC, FRAME_TYPE - | beqz RD, ->vm_unwind_c_eh - |. li CRET1, LUA_YIELD - | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. - |. move MULTRES, RD - | - |->vm_return: - | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return - | // TMP0 = PC & FRAME_TYPE - | li TMP2, -8 - | xori AT, TMP0, FRAME_C - | and TMP2, PC, TMP2 - | bnez AT, ->vm_returnp - | subu TMP2, BASE, TMP2 // TMP2 = previous base. - | - | addiu TMP1, RD, -8 - | sw TMP2, L->base - | li_vmstate C - | lw TMP2, SAVE_NRES - | addiu BASE, BASE, -8 - | st_vmstate - | beqz TMP1, >2 - |. sll TMP2, TMP2, 3 - |1: - | addiu TMP1, TMP1, -8 - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu RA, RA, 8 - | sw SFRETHI, HI(BASE) - | sw SFRETLO, LO(BASE) - | bnez TMP1, <1 - |. addiu BASE, BASE, 8 - | - |2: - | bne TMP2, RD, >6 - |3: - |. sw BASE, L->top // Store new top. - | - |->vm_leave_cp: - | lw TMP0, SAVE_CFRAME // Restore previous C frame. - | move CRET1, r0 // Ok return status for vm_pcall. - | sw TMP0, L->cframe - | - |->vm_leave_unw: - | restoreregs_ret - | - |6: - | lw TMP1, L->maxstack - | slt AT, TMP2, RD - | bnez AT, >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - |. slt AT, BASE, TMP1 - | beqz AT, >8 - |. nop - | sw TISNIL, HI(BASE) - | addiu RD, RD, 8 - | b <2 - |. addiu BASE, BASE, 8 - | - |7: // Less results wanted. - | subu TMP0, RD, TMP2 - | subu TMP0, BASE, TMP0 // Either keep top or shrink it. - | b <3 - |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | load_got lj_state_growstack - | move MULTRES, RD - | srl CARG2, TMP2, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | lw TMP2, SAVE_NRES - | lw BASE, L->top // Need the (realloced) L->top in BASE. - | move RD, MULTRES - | b <2 - |. sll TMP2, TMP2, 3 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | move sp, CARG1 - | move CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | lw L, SAVE_L - | li TMP0, ~LJ_VMST_C - | lw GL:TMP1, L->glref - | b ->vm_leave_unw - |. sw TMP0, GL:TMP1->vmstate - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - | li AT, -4 - | and sp, CARG1, AT - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | lw L, SAVE_L - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | li TISNIL, LJ_TNIL - | lw BASE, L->base - | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | .FPU mtc1 TMP3, TOBIT - | li TMP1, LJ_TFALSE - | li_vmstate INTERP - | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. - | .FPU cvt.d.s TOBIT, TOBIT - | addiu RA, BASE, -8 // Results start at BASE-8. - | addiu DISPATCH, DISPATCH, GG_G2DISP - | sw TMP1, HI(RA) // Prepend false to error message. - | st_vmstate - | b ->vm_returnc - |. li RD, 16 // 2 results: false + error message. - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | b >2 - |. li CARG2, LUA_MINSTACK - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | addu RC, BASE, RC - | subu RA, RA, BASE - | sw BASE, L->base - | addiu PC, PC, 4 // Must point after first instruction. - | sw RC, L->top - | srl CARG2, RA, 3 - |2: - | // L->base = new base, L->top = top - | load_got lj_state_growstack - | sw PC, SAVE_PC - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | lw BASE, L->base - | lw RC, L->top - | lw LFUNC:RB, FRAME_FUNC(BASE) - | subu RC, RC, BASE - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | move L, CARG1 - | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | move BASE, CARG2 - | lbu TMP1, L->status - | sw L, SAVE_L - | li PC, FRAME_CP - | addiu TMP0, sp, CFRAME_RESUME - | addiu DISPATCH, DISPATCH, GG_G2DISP - | sw r0, SAVE_NRES - | sw r0, SAVE_ERRF - | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sw r0, SAVE_CFRAME - | beqz TMP1, >3 - |. sw TMP0, L->cframe - | - | // Resume after yield (like a return). - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | move RA, BASE - | lw BASE, L->base - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lw TMP1, L->top - | lw PC, FRAME_PC(BASE) - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | subu RD, TMP1, BASE - | .FPU mtc1 TMP3, TOBIT - | sb r0, L->status - | .FPU cvt.d.s TOBIT, TOBIT - | li_vmstate INTERP - | addiu RD, RD, 8 - | st_vmstate - | move MULTRES, RD - | andi TMP0, PC, FRAME_TYPE - | beqz TMP0, ->BC_RET_Z - |. li TISNIL, LJ_TNIL - | b ->vm_return - |. nop - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | sw CARG4, SAVE_ERRF - | b >1 - |. li PC, FRAME_CP - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | li PC, FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | lw TMP1, L:CARG1->cframe - | move L, CARG1 - | sw CARG3, SAVE_NRES - | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | sw CARG1, SAVE_L - | move BASE, CARG2 - | addiu DISPATCH, DISPATCH, GG_G2DISP - | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sw TMP1, SAVE_CFRAME - | sw sp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | lw TMP1, L->top - | .FPU mtc1 TMP3, TOBIT - | addu PC, PC, BASE - | subu NARGS8:RC, TMP1, BASE - | subu PC, PC, TMP2 // PC = frame delta + frame type - | .FPU cvt.d.s TOBIT, TOBIT - | li_vmstate INTERP - | li TISNIL, LJ_TNIL - | st_vmstate - | - |->vm_call_dispatch: - | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC - | lw TMP0, FRAME_PC(BASE) - | li AT, LJ_TFUNC - | bne TMP0, AT, ->vmeta_call - |. lw LFUNC:RB, FRAME_FUNC(BASE) - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | move L, CARG1 - | lw TMP0, L:CARG1->stack - | sw CARG1, SAVE_L - | lw TMP1, L->top - | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). - | lw TMP1, L->cframe - | addiu DISPATCH, DISPATCH, GG_G2DISP - | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. - | sw r0, SAVE_ERRF // No error function. - | sw TMP1, SAVE_CFRAME - | sw sp, L->cframe // Add our C frame to cframe chain. - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) - |. move CFUNCADDR, CARG4 - | move BASE, CRET1 - | bnez CRET1, <3 // Else continue with the call. - |. li PC, FRAME_CP - | b ->vm_leave_cp // No base? Just remove C frame. - |. nop - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the - |// stack, so BASE doesn't need to be reloaded across these calls. - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 - | lw TMP0, -16+LO(BASE) // Continuation. - | move RB, BASE - | move BASE, TMP2 // Restore caller BASE. - | lw LFUNC:TMP1, FRAME_FUNC(TMP2) - |.if FFI - | sltiu AT, TMP0, 2 - |.endif - | lw PC, -16+HI(RB) // Restore PC from [cont|PC]. - | addu TMP2, RA, RD - | lw TMP1, LFUNC:TMP1->pc - |.if FFI - | bnez AT, >1 - |.endif - |. sw TISNIL, -8+HI(TMP2) // Ensure one valid arg. - | // BASE = base, RA = resultptr, RB = meta base - | jr TMP0 // Jump to continuation. - |. lw KBASE, PC2PROTO(k)(TMP1) - | - |.if FFI - |1: - | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - |. addiu TMP1, RB, -16 - | b ->vm_call_tail - |. subu RC, TMP1, BASE - |.endif - | - |->cont_cat: // RA = resultptr, RB = meta base - | lw INS, -4(PC) - | addiu CARG2, RB, -16 - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | decode_RB8a MULTRES, INS - | decode_RA8a RA, INS - | decode_RB8b MULTRES - | decode_RA8b RA - | addu TMP1, BASE, MULTRES - | sw BASE, L->base - | subu CARG3, CARG2, TMP1 - | sw SFRETHI, HI(CARG2) - | bne TMP1, CARG2, ->BC_CAT_Z - |. sw SFRETLO, LO(CARG2) - | addu RA, BASE, RA - | sw SFRETHI, HI(RA) - | b ->cont_nop - |. sw SFRETLO, LO(RA) - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TSTR - | sw STR:RC, LO(CARG3) - | b >1 - |. sw TMP0, HI(CARG3) - | - |->vmeta_tgets: - | addiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TTAB - | sw TAB:RB, LO(CARG2) - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) - | sw TMP0, HI(CARG2) - | li TMP1, LJ_TSTR - | sw STR:RC, LO(CARG3) - | b >1 - |. sw TMP1, HI(CARG3) - | - |->vmeta_tgetb: // TMP0 = index - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | sw TMP0, LO(CARG3) - | sw TISNUM, HI(CARG3) - | - |->vmeta_tgetv: - |1: - | load_got lj_meta_tget - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - |. move CARG1, L - | // Returns TValue * (finished) or NULL (metamethod). - | beqz CRET1, >3 - |. addiu TMP1, BASE, -FRAME_CONT - | lw SFARG1HI, HI(CRET1) - | lw SFARG2HI, LO(CRET1) - | ins_next1 - | sw SFARG1HI, HI(RA) - | sw SFARG2HI, LO(RA) - | ins_next2 - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | lw BASE, L->top - | sw PC, -16+HI(BASE) // [cont|PC] - | subu PC, BASE, TMP1 - | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | b ->vm_call_dispatch_f - |. li NARGS8:RC, 16 // 2 args for func(t, k). - | - |->vmeta_tgetr: - | load_got lj_tab_getinth - | call_intern lj_tab_getinth // (GCtab *t, int32_t key) - |. nop - | // Returns cTValue * or NULL. - | beqz CRET1, ->BC_TGETR_Z - |. move SFARG2HI, TISNIL - | lw SFARG2HI, HI(CRET1) - | b ->BC_TGETR_Z - |. lw SFARG2LO, LO(CRET1) - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TSTR - | sw STR:RC, LO(CARG3) - | b >1 - |. sw TMP0, HI(CARG3) - | - |->vmeta_tsets: - | addiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TTAB - | sw TAB:RB, LO(CARG2) - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) - | sw TMP0, HI(CARG2) - | li TMP1, LJ_TSTR - | sw STR:RC, LO(CARG3) - | b >1 - |. sw TMP1, HI(CARG3) - | - |->vmeta_tsetb: // TMP0 = index - | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | sw TMP0, LO(CARG3) - | sw TISNUM, HI(CARG3) - | - |->vmeta_tsetv: - |1: - | load_got lj_meta_tset - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - |. move CARG1, L - | // Returns TValue * (finished) or NULL (metamethod). - | lw SFARG1HI, HI(RA) - | beqz CRET1, >3 - |. lw SFARG1LO, LO(RA) - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | ins_next1 - | sw SFARG1HI, HI(CRET1) - | sw SFARG1LO, LO(CRET1) - | ins_next2 - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | addiu TMP1, BASE, -FRAME_CONT - | lw BASE, L->top - | sw PC, -16+HI(BASE) // [cont|PC] - | subu PC, BASE, TMP1 - | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument. - | sw SFARG1LO, 16+LO(BASE) - | b ->vm_call_dispatch_f - |. li NARGS8:RC, 24 // 3 args for func(t, k, v) - | - |->vmeta_tsetr: - | load_got lj_tab_setinth - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - |. move CARG1, L - | // Returns TValue *. - | b ->BC_TSETR_Z - |. nop - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | // RA/RD point to o1/o2. - | move CARG2, RA - | move CARG3, RD - | load_got lj_meta_comp - | addiu PC, PC, -4 - | sw BASE, L->base - | sw PC, SAVE_PC - | decode_OP1 CARG4, INS - | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - |3: - | sltiu AT, CRET1, 2 - | beqz AT, ->vmeta_binop - | negu TMP2, CRET1 - |4: - | lhu RD, OFS_RD(PC) - | addiu PC, PC, 4 - | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) - | sll RD, RD, 2 - | addu RD, RD, TMP1 - | and RD, RD, TMP2 - | addu PC, PC, RD - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | lbu TMP1, -4+OFS_RA(PC) - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | sll TMP1, TMP1, 3 - | addu TMP1, BASE, TMP1 - | sw SFRETHI, HI(TMP1) - | b ->cont_nop - |. sw SFRETLO, LO(TMP1) - | - |->cont_condt: // RA = resultptr - | lw TMP0, HI(RA) - | sltiu AT, TMP0, LJ_TISTRUECOND - | b <4 - |. negu TMP2, AT // Branch if result is true. - | - |->cont_condf: // RA = resultptr - | lw TMP0, HI(RA) - | sltiu AT, TMP0, LJ_TISTRUECOND - | b <4 - |. addiu TMP2, AT, -1 // Branch if result is false. - | - |->vmeta_equal: - | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1. - | load_got lj_meta_equal - | move CARG2, SFARG1LO - | move CARG3, SFARG2LO - | move CARG4, TMP0 - | addiu PC, PC, -4 - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |. nop - | - |->vmeta_equal_cd: - |.if FFI - | load_got lj_meta_equal_cd - | move CARG2, INS - | addiu PC, PC, -4 - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |. nop - |.endif - | - |->vmeta_istype: - | load_got lj_meta_istype - | addiu PC, PC, -4 - | sw BASE, L->base - | srl CARG2, RA, 3 - | srl CARG3, RD, 3 - | sw PC, SAVE_PC - | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - |. move CARG1, L - | b ->cont_nop - |. nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_unm: - | move RC, RB - | - |->vmeta_arith: - | load_got lj_meta_arith - | decode_OP1 TMP0, INS - | sw BASE, L->base - | move CARG2, RA - | sw PC, SAVE_PC - | move CARG3, RB - | move CARG4, RC - | sw TMP0, ARG5 - | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - |. move CARG1, L - | // Returns NULL (finished) or TValue * (metamethod). - | beqz CRET1, ->cont_nop - |. nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | subu TMP1, CRET1, BASE - | sw PC, -16+HI(CRET1) // [cont|PC] - | move TMP2, BASE - | addiu PC, TMP1, FRAME_CONT - | move BASE, CRET1 - | b ->vm_call_dispatch - |. li NARGS8:RC, 16 // 2 args for func(o1, o2). - | - |->vmeta_len: - | // CARG2 already set by BC_LEN. -#if LJ_52 - | move MULTRES, CARG1 -#endif - | load_got lj_meta_len - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_meta_len // (lua_State *L, TValue *o) - |. move CARG1, L - | // Returns NULL (retry) or TValue * (metamethod base). -#if LJ_52 - | bnez CRET1, ->vmeta_binop // Binop call for compatibility. - |. nop - | b ->BC_LEN_Z - |. move CARG1, MULTRES -#else - | b ->vmeta_binop // Binop call for compatibility. - |. nop -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // TMP2 = old base, BASE = new base, RC = nargs*8 - | load_got lj_meta_call - | sw TMP2, L->base // This is the callers base! - | addiu CARG2, BASE, -8 - | sw PC, SAVE_PC - | addu CARG3, BASE, RC - | move MULTRES, NARGS8:RC - | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - |. move CARG1, L - | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | addiu NARGS8:RC, MULTRES, 8 // Got one more argument now. - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | load_got lj_meta_call - | sw BASE, L->base - | addiu CARG2, RA, -8 - | sw PC, SAVE_PC - | addu CARG3, RA, RC - | move MULTRES, NARGS8:RC - | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - |. move CARG1, L - | lw TMP1, FRAME_PC(BASE) - | lw LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here. - | b ->BC_CALLT_Z - |. addiu NARGS8:RC, MULTRES, 8 // Got one more argument now. - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | load_got lj_meta_for - | sw BASE, L->base - | move CARG2, RA - | sw PC, SAVE_PC - | move MULTRES, INS - | call_intern lj_meta_for // (lua_State *L, TValue *base) - |. move CARG1, L - |.if JIT - | decode_OP1 TMP0, MULTRES - | li AT, BC_JFORI - |.endif - | decode_RA8a RA, MULTRES - | decode_RD8a RD, MULTRES - | decode_RA8b RA - |.if JIT - | beq TMP0, AT, =>BC_JFORI - |. decode_RD8b RD - | b =>BC_FORI - |. nop - |.else - | b =>BC_FORI - |. decode_RD8b RD - |.endif - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | lw SFARG1HI, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. lw SFARG1LO, LO(BASE) - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | sltiu AT, NARGS8:RC, 16 - | lw SFARG1HI, HI(BASE) - | bnez AT, ->fff_fallback - |. lw SFARG2HI, 8+HI(BASE) - | lw SFARG1LO, LO(BASE) - | lw SFARG2LO, 8+LO(BASE) - |.endmacro - | - |.macro .ffunc_n, name // Caveat: has delay slot! - |->ff_ .. name: - | lw SFARG1HI, HI(BASE) - |.if FPU - | ldc1 FARG1, 0(BASE) - |.else - | lw SFARG1LO, LO(BASE) - |.endif - | beqz NARGS8:RC, ->fff_fallback - |. sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name // Caveat: has delay slot! - |->ff_ .. name: - | sltiu AT, NARGS8:RC, 16 - | lw SFARG1HI, HI(BASE) - | bnez AT, ->fff_fallback - |. lw SFARG2HI, 8+HI(BASE) - | sltiu TMP0, SFARG1HI, LJ_TISNUM - |.if FPU - | ldc1 FARG1, 0(BASE) - |.else - | lw SFARG1LO, LO(BASE) - |.endif - | sltiu TMP1, SFARG2HI, LJ_TISNUM - |.if FPU - | ldc1 FARG2, 8(BASE) - |.else - | lw SFARG2LO, 8+LO(BASE) - |.endif - | and TMP0, TMP0, TMP1 - | beqz TMP0, ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! - |.macro ffgccheck - | lw TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | lw TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | subu AT, TMP0, TMP1 - | bgezal AT, ->fff_gcstep - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | sltiu AT, SFARG1HI, LJ_TISTRUECOND - | beqz AT, ->fff_fallback - |. addiu RA, BASE, -8 - | lw PC, FRAME_PC(BASE) - | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. - | addu TMP2, RA, NARGS8:RC - | sw SFARG1HI, HI(RA) - | addiu TMP1, BASE, 8 - | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. - |. sw SFARG1LO, LO(RA) - |1: - | lw SFRETHI, HI(TMP1) - | lw SFRETLO, LO(TMP1) - | sw SFRETHI, -8+HI(TMP1) - | sw SFRETLO, -8+LO(TMP1) - | bne TMP1, TMP2, <1 - |. addiu TMP1, TMP1, 8 - | b ->fff_res - |. nop - | - |.ffunc type - | lw SFARG1HI, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. sltiu TMP0, SFARG1HI, LJ_TISNUM - | movn SFARG1HI, TISNUM, TMP0 - | not TMP1, SFARG1HI - | sll TMP1, TMP1, 3 - | addu TMP1, CFUNC:RB, TMP1 - | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi - | b ->fff_restv - |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | li AT, LJ_TTAB - | bne SFARG1HI, AT, >6 - |. li AT, LJ_TUDATA - |1: // Field metatable must be at same offset for GCtab and GCudata! - | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable - |2: - | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) - | beqz TAB:SFARG1LO, ->fff_restv - |. li SFARG1HI, LJ_TNIL - | lw TMP0, TAB:SFARG1LO->hmask - | li SFARG1HI, LJ_TTAB // Use metatable as default result. - | lw TMP1, STR:RC->hash - | lw NODE:TMP2, TAB:SFARG1LO->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - | li AT, LJ_TSTR - |3: // Rearranged logic, because we expect _not_ to find the key. - | lw CARG4, offsetof(Node, key)+HI(NODE:TMP2) - | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) - | lw NODE:TMP3, NODE:TMP2->next - | bne CARG4, AT, >4 - |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2) - | beq TMP0, STR:RC, >5 - |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) - |4: - | beqz NODE:TMP3, ->fff_restv // Not found, keep default result. - |. move NODE:TMP2, NODE:TMP3 - | b <3 - |. nop - |5: - | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value. - |. nop - | move SFARG1HI, CARG3 // Return value of mt.__metatable. - | b ->fff_restv - |. move SFARG1LO, TMP1 - | - |6: - | beq SFARG1HI, AT, <1 - |. sltu AT, TISNUM, SFARG1HI - | movz SFARG1HI, TISNUM, AT - | not TMP1, SFARG1HI - | sll TMP1, TMP1, 2 - | addu TMP1, DISPATCH, TMP1 - | b <2 - |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | li AT, LJ_TTAB - | bne SFARG1HI, AT, ->fff_fallback - |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB - | lw TAB:TMP1, TAB:SFARG1LO->metatable - | lbu TMP3, TAB:SFARG1LO->marked - | or AT, SFARG2HI, TAB:TMP1 - | bnez AT, ->fff_fallback - |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | beqz AT, ->fff_restv - |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable - | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv - | - |.ffunc rawget - | lw CARG4, HI(BASE) - | sltiu AT, NARGS8:RC, 16 - | lw TAB:CARG2, LO(BASE) - | load_got lj_tab_get - | addiu CARG4, CARG4, -LJ_TTAB - | or AT, AT, CARG4 - | bnez AT, ->fff_fallback - | addiu CARG3, BASE, 8 - | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - |. move CARG1, L - | // Returns cTValue *. - | lw SFARG1HI, HI(CRET1) - | b ->fff_restv - |. lw SFARG1LO, LO(CRET1) - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | lw CARG1, HI(BASE) - | xori AT, NARGS8:RC, 8 // Exactly one number argument. - | sltu TMP0, TISNUM, CARG1 - | or AT, AT, TMP0 - | bnez AT, ->fff_fallback - |. lw SFARG1HI, HI(BASE) - | b ->fff_restv - |. lw SFARG1LO, LO(BASE) - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | li AT, LJ_TSTR - | // A __tostring method in the string base metatable is ignored. - | beq SFARG1HI, AT, ->fff_restv // String key? - | // Handle numbers inline, unless a number base metatable is present. - |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) - | sltu TMP0, TISNUM, SFARG1HI - | or TMP0, TMP0, TMP1 - | bnez TMP0, ->fff_fallback - |. sw BASE, L->base // Add frame since C call can throw. - | ffgccheck - |. sw PC, SAVE_PC // Redundant (but a defined value). - | load_got lj_strfmt_number - | move CARG1, L - | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) - |. move CARG2, BASE - | // Returns GCstr *. - | li SFARG1HI, LJ_TSTR - | b ->fff_restv - |. move SFARG1LO, CRET1 - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc next - | lw CARG1, HI(BASE) - | lw TAB:CARG2, LO(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. addu TMP2, BASE, NARGS8:RC - | li AT, LJ_TTAB - | sw TISNIL, HI(TMP2) // Set missing 2nd arg to nil. - | bne CARG1, AT, ->fff_fallback - |. lw PC, FRAME_PC(BASE) - | load_got lj_tab_next - | sw BASE, L->base // Add frame since C call can throw. - | sw BASE, L->top // Dummy frame length is ok. - | addiu CARG3, BASE, 8 - | sw PC, SAVE_PC - | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - |. move CARG1, L - | // Returns 0 at end of traversal. - | beqz CRET1, ->fff_restv // End of traversal: return nil. - |. li SFARG1HI, LJ_TNIL - | lw TMP0, 8+HI(BASE) - | lw TMP1, 8+LO(BASE) - | addiu RA, BASE, -8 - | lw TMP2, 16+HI(BASE) - | lw TMP3, 16+LO(BASE) - | sw TMP0, HI(RA) - | sw TMP1, LO(RA) - | sw TMP2, 8+HI(RA) - | sw TMP3, 8+LO(RA) - | b ->fff_res - |. li RD, (2+1)*8 - | - |.ffunc_1 pairs - | li AT, LJ_TTAB - | bne SFARG1HI, AT, ->fff_fallback - |. lw PC, FRAME_PC(BASE) -#if LJ_52 - | lw TAB:TMP2, TAB:SFARG1LO->metatable - | lw TMP0, CFUNC:RB->upvalue[0].u32.hi - | lw TMP1, CFUNC:RB->upvalue[0].u32.lo - | bnez TAB:TMP2, ->fff_fallback -#else - | lw TMP0, CFUNC:RB->upvalue[0].u32.hi - | lw TMP1, CFUNC:RB->upvalue[0].u32.lo -#endif - |. addiu RA, BASE, -8 - | sw TISNIL, 8+HI(BASE) - | sw TMP0, HI(RA) - | sw TMP1, LO(RA) - | b ->fff_res - |. li RD, (3+1)*8 - | - |.ffunc ipairs_aux - | sltiu AT, NARGS8:RC, 16 - | lw CARG3, HI(BASE) - | lw TAB:CARG1, LO(BASE) - | lw CARG4, 8+HI(BASE) - | bnez AT, ->fff_fallback - |. addiu CARG3, CARG3, -LJ_TTAB - | xor CARG4, CARG4, TISNUM - | and AT, CARG3, CARG4 - | bnez AT, ->fff_fallback - |. lw PC, FRAME_PC(BASE) - | lw TMP2, 8+LO(BASE) - | lw TMP0, TAB:CARG1->asize - | lw TMP1, TAB:CARG1->array - | addiu TMP2, TMP2, 1 - | sw TISNUM, -8+HI(BASE) - | sltu AT, TMP2, TMP0 - | sw TMP2, -8+LO(BASE) - | beqz AT, >2 // Not in array part? - |. addiu RA, BASE, -8 - | sll TMP3, TMP2, 3 - | addu TMP3, TMP1, TMP3 - | lw TMP1, HI(TMP3) - | lw TMP2, LO(TMP3) - |1: - | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. - |. li RD, (0+1)*8 - | sw TMP1, 8+HI(RA) - | sw TMP2, 8+LO(RA) - | b ->fff_res - |. li RD, (2+1)*8 - | - |2: // Check for empty hash part first. Otherwise call C function. - | lw TMP0, TAB:CARG1->hmask - | load_got lj_tab_getinth - | beqz TMP0, ->fff_res - |. li RD, (0+1)*8 - | call_intern lj_tab_getinth // (GCtab *t, int32_t key) - |. move CARG2, TMP2 - | // Returns cTValue * or NULL. - | beqz CRET1, ->fff_res - |. li RD, (0+1)*8 - | lw TMP1, HI(CRET1) - | b <1 - |. lw TMP2, LO(CRET1) - | - |.ffunc_1 ipairs - | li AT, LJ_TTAB - | bne SFARG1HI, AT, ->fff_fallback - |. lw PC, FRAME_PC(BASE) -#if LJ_52 - | lw TAB:TMP2, TAB:SFARG1LO->metatable - | lw TMP0, CFUNC:RB->upvalue[0].u32.hi - | lw TMP1, CFUNC:RB->upvalue[0].u32.lo - | bnez TAB:TMP2, ->fff_fallback -#else - | lw TMP0, CFUNC:RB->upvalue[0].u32.hi - | lw TMP1, CFUNC:RB->upvalue[0].u32.lo -#endif - |. addiu RA, BASE, -8 - | sw TISNUM, 8+HI(BASE) - | sw r0, 8+LO(BASE) - | sw TMP0, HI(RA) - | sw TMP1, LO(RA) - | b ->fff_res - |. li RD, (3+1)*8 - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | beqz NARGS8:RC, ->fff_fallback - | move TMP2, BASE - | addiu BASE, BASE, 8 - | // Remember active hook before pcall. - | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT - | andi TMP3, TMP3, 1 - | addiu PC, TMP3, 8+FRAME_PCALL - | b ->vm_call_dispatch - |. addiu NARGS8:RC, NARGS8:RC, -8 - | - |.ffunc xpcall - | sltiu AT, NARGS8:RC, 16 - | lw CARG4, 8+HI(BASE) - | bnez AT, ->fff_fallback - |. lw CARG3, 8+LO(BASE) - | lw CARG1, LO(BASE) - | lw CARG2, HI(BASE) - | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) - | li AT, LJ_TFUNC - | move TMP2, BASE - | bne CARG4, AT, ->fff_fallback // Traceback must be a function. - | addiu BASE, BASE, 16 - | // Remember active hook before pcall. - | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT - | sw CARG3, LO(TMP2) // Swap function and traceback. - | sw CARG4, HI(TMP2) - | andi TMP3, TMP3, 1 - | sw CARG1, 8+LO(TMP2) - | sw CARG2, 8+HI(TMP2) - | addiu PC, TMP3, 16+FRAME_PCALL - | b ->vm_call_dispatch - |. addiu NARGS8:RC, NARGS8:RC, -16 - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc coroutine_resume - | lw CARG3, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. lw CARG1, LO(BASE) - | li AT, LJ_TTHREAD - | bne CARG3, AT, ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | lw L:CARG1, CFUNC:RB->upvalue[0].gcr - |.endif - | lbu TMP0, L:CARG1->status - | lw TMP1, L:CARG1->cframe - | lw CARG2, L:CARG1->top - | lw TMP2, L:CARG1->base - | addiu TMP3, TMP0, -LUA_YIELD - | bgtz TMP3, ->fff_fallback // st > LUA_YIELD? - |. xor TMP2, TMP2, CARG2 - | bnez TMP1, ->fff_fallback // cframe != 0? - |. or AT, TMP2, TMP0 - | lw TMP0, L:CARG1->maxstack - | beqz AT, ->fff_fallback // base == top && st == 0? - |. lw PC, FRAME_PC(BASE) - | addu TMP2, CARG2, NARGS8:RC - | sltu AT, TMP0, TMP2 - | bnez AT, ->fff_fallback // Stack overflow? - |. sw PC, SAVE_PC - | sw BASE, L->base - |1: - |.if resume - | addiu BASE, BASE, 8 // Keep resumed thread in stack for GC. - | addiu NARGS8:RC, NARGS8:RC, -8 - | addiu TMP2, TMP2, -8 - |.endif - | sw TMP2, L:CARG1->top - | addu TMP1, BASE, NARGS8:RC - | move CARG3, CARG2 - | sw BASE, L->top - |2: // Move args to coroutine. - | lw SFRETHI, HI(BASE) - | lw SFRETLO, LO(BASE) - | sltu AT, BASE, TMP1 - | beqz AT, >3 - |. addiu BASE, BASE, 8 - | sw SFRETHI, HI(CARG3) - | sw SFRETLO, LO(CARG3) - | b <2 - |. addiu CARG3, CARG3, 8 - |3: - | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0) - |. move L:RA, L:CARG1 - | // Returns thread status. - |4: - | lw TMP2, L:RA->base - | sltiu AT, CRET1, LUA_YIELD+1 - | lw TMP3, L:RA->top - | li_vmstate INTERP - | lw BASE, L->base - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | st_vmstate - | beqz AT, >8 - |. subu RD, TMP3, TMP2 - | lw TMP0, L->maxstack - | beqz RD, >6 // No results? - |. addu TMP1, BASE, RD - | sltu AT, TMP0, TMP1 - | bnez AT, >9 // Need to grow stack? - |. addu TMP3, TMP2, RD - | sw TMP2, L:RA->top // Clear coroutine stack. - | move TMP1, BASE - |5: // Move results from coroutine. - | lw SFRETHI, HI(TMP2) - | lw SFRETLO, LO(TMP2) - | addiu TMP2, TMP2, 8 - | sltu AT, TMP2, TMP3 - | sw SFRETHI, HI(TMP1) - | sw SFRETLO, LO(TMP1) - | bnez AT, <5 - |. addiu TMP1, TMP1, 8 - |6: - | andi TMP0, PC, FRAME_TYPE - |.if resume - | li TMP1, LJ_TTRUE - | addiu RA, BASE, -8 - | sw TMP1, -8+HI(BASE) // Prepend true to results. - | addiu RD, RD, 16 - |.else - | move RA, BASE - | addiu RD, RD, 8 - |.endif - |7: - | sw PC, SAVE_PC - | beqz TMP0, ->BC_RET_Z - |. move MULTRES, RD - | b ->vm_return - |. nop - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | addiu TMP3, TMP3, -8 - | li TMP1, LJ_TFALSE - | lw SFRETHI, HI(TMP3) - | lw SFRETLO, LO(TMP3) - | sw TMP3, L:RA->top // Remove error from coroutine stack. - | li RD, (2+1)*8 - | sw TMP1, -8+HI(BASE) // Prepend false to results. - | addiu RA, BASE, -8 - | sw SFRETHI, HI(BASE) // Copy error message. - | sw SFRETLO, LO(BASE) - | b <7 - |. andi TMP0, PC, FRAME_TYPE - |.else - | load_got lj_ffh_coroutine_wrap_err - | move CARG2, L:RA - | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - |. move CARG1, L - |.endif - | - |9: // Handle stack expansion on return from yield. - | load_got lj_state_growstack - | srl CARG2, RD, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | b <4 - |. li CRET1, 0 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | lw TMP0, L->cframe - | addu TMP1, BASE, NARGS8:RC - | sw BASE, L->base - | andi TMP0, TMP0, CFRAME_RESUME - | sw TMP1, L->top - | beqz TMP0, ->fff_fallback - |. li CRET1, LUA_YIELD - | sw r0, L->cframe - | b ->vm_leave_unw - |. sb CRET1, L->status - | - |//-- Math library ------------------------------------------------------- - | - |.ffunc_1 math_abs - | bne SFARG1HI, TISNUM, >1 - |. sra TMP0, SFARG1LO, 31 - | xor TMP1, SFARG1LO, TMP0 - | subu SFARG1LO, TMP1, TMP0 - | bgez SFARG1LO, ->fff_restv - |. nop - | lui SFARG1HI, 0x41e0 // 2^31 as a double. - | b ->fff_restv - |. li SFARG1LO, 0 - |1: - | sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |. sll SFARG1HI, SFARG1HI, 1 - | srl SFARG1HI, SFARG1HI, 1 - |// fallthrough - | - |->fff_restv: - | // SFARG1LO/SFARG1HI = TValue result. - | lw PC, FRAME_PC(BASE) - | sw SFARG1HI, -8+HI(BASE) - | addiu RA, BASE, -8 - | sw SFARG1LO, -8+LO(BASE) - |->fff_res1: - | // RA = results, PC = return. - | li RD, (1+1)*8 - |->fff_res: - | // RA = results, RD = (nresults+1)*8, PC = return. - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->vm_return - |. move MULTRES, RD - | lw INS, -4(PC) - | decode_RB8a RB, INS - | decode_RB8b RB - |5: - | sltu AT, RD, RB - | bnez AT, >6 // More results expected? - |. decode_RA8a TMP0, INS - | decode_RA8b TMP0 - | ins_next1 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | subu BASE, RA, TMP0 - | ins_next2 - | - |6: // Fill up results with nil. - | addu TMP1, RA, RD - | addiu RD, RD, 8 - | b <5 - |. sw TISNIL, -8+HI(TMP1) - | - |.macro math_extern, func - | .ffunc math_ .. func - | lw SFARG1HI, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. load_got func - | sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - |.else - |. lw SFARG1LO, LO(BASE) - |.endif - | call_extern - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nn math_ .. func - |. load_got func - | call_extern - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - |// TODO: Return integer type if result is integer (own sf implementation). - |.macro math_round, func - |->ff_math_ .. func: - | lw SFARG1HI, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. lw SFARG1LO, LO(BASE) - | beq SFARG1HI, TISNUM, ->fff_restv - |. sltu AT, SFARG1HI, TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - | bal ->vm_ .. func - |.else - |. load_got func - | call_extern - |.endif - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - | math_round floor - | math_round ceil - | - |.ffunc math_log - | li AT, 8 - | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. - |. lw SFARG1HI, HI(BASE) - | sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |. load_got log - |.if FPU - | call_extern - |. ldc1 FARG1, 0(BASE) - |.else - | call_extern - |. lw SFARG1LO, LO(BASE) - |.endif - | b ->fff_resn - |. nop - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.if FPU - |.ffunc_n math_sqrt - |. sqrt.d FRET1, FARG1 - |// fallthrough to ->fff_resn - |.else - | math_extern sqrt - |.endif - | - |->fff_resn: - | lw PC, FRAME_PC(BASE) - | addiu RA, BASE, -8 - |.if FPU - | b ->fff_res1 - |. sdc1 FRET1, -8(BASE) - |.else - | sw SFRETHI, -8+HI(BASE) - | b ->fff_res1 - |. sw SFRETLO, -8+LO(BASE) - |.endif - | - | - |.ffunc math_ldexp - | sltiu AT, NARGS8:RC, 16 - | lw SFARG1HI, HI(BASE) - | bnez AT, ->fff_fallback - |. lw CARG4, 8+HI(BASE) - | bne CARG4, TISNUM, ->fff_fallback - | load_got ldexp - |. sltu AT, SFARG1HI, TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - |.else - |. lw SFARG1LO, LO(BASE) - |.endif - | call_extern - |. lw CARG3, 8+LO(BASE) - | b ->fff_resn - |. nop - | - |.ffunc_n math_frexp - | load_got frexp - | lw PC, FRAME_PC(BASE) - | call_extern - |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) - | addiu RA, BASE, -8 - |.if FPU - | mtc1 TMP1, FARG2 - | sdc1 FRET1, 0(RA) - | cvt.d.w FARG2, FARG2 - | sdc1 FARG2, 8(RA) - |.else - | sw SFRETLO, LO(RA) - | sw SFRETHI, HI(RA) - | sw TMP1, 8+LO(RA) - | sw TISNUM, 8+HI(RA) - |.endif - | b ->fff_res - |. li RD, (2+1)*8 - | - |.ffunc_n math_modf - | load_got modf - | lw PC, FRAME_PC(BASE) - | call_extern - |. addiu CARG3, BASE, -8 - | addiu RA, BASE, -8 - |.if FPU - | sdc1 FRET1, 0(BASE) - |.else - | sw SFRETLO, LO(BASE) - | sw SFRETHI, HI(BASE) - |.endif - | b ->fff_res - |. li RD, (2+1)*8 - | - |.macro math_minmax, name, intins, fpins - | .ffunc_1 name - | addu TMP3, BASE, NARGS8:RC - | bne SFARG1HI, TISNUM, >5 - |. addiu TMP2, BASE, 8 - |1: // Handle integers. - |. lw SFARG2HI, HI(TMP2) - | beq TMP2, TMP3, ->fff_restv - |. lw SFARG2LO, LO(TMP2) - | bne SFARG2HI, TISNUM, >3 - |. slt AT, SFARG1LO, SFARG2LO - | intins SFARG1LO, SFARG2LO, AT - | b <1 - |. addiu TMP2, TMP2, 8 - | - |3: // Convert intermediate result to number and continue with number loop. - | sltiu AT, SFARG2HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. mtc1 SFARG1LO, FRET1 - | cvt.d.w FRET1, FRET1 - | b >7 - |. ldc1 FARG1, 0(TMP2) - |.else - |. nop - | bal ->vm_sfi2d_1 - |. nop - | b >7 - |. nop - |.endif - | - |5: - |. sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FRET1, 0(BASE) - |.endif - | - |6: // Handle numbers. - |. lw SFARG2HI, HI(TMP2) - |.if FPU - | beq TMP2, TMP3, ->fff_resn - |.else - | beq TMP2, TMP3, ->fff_restv - |.endif - |. sltiu AT, SFARG2HI, LJ_TISNUM - | beqz AT, >8 - |.if FPU - |. ldc1 FARG1, 0(TMP2) - |.else - |. lw SFARG2LO, LO(TMP2) - |.endif - |7: - |.if FPU - | c.olt.d FRET1, FARG1 - | fpins FRET1, FARG1 - |.else - | bal ->vm_sfcmpolt - |. nop - | intins SFARG1LO, SFARG2LO, CRET1 - | intins SFARG1HI, SFARG2HI, CRET1 - |.endif - | b <6 - |. addiu TMP2, TMP2, 8 - | - |8: // Convert integer to number and continue with number loop. - | bne SFARG2HI, TISNUM, ->fff_fallback - |.if FPU - |. lwc1 FARG1, LO(TMP2) - | b <7 - |. cvt.d.w FARG1, FARG1 - |.else - |. nop - | bal ->vm_sfi2d_2 - |. nop - | b <7 - |. nop - |.endif - | - |.endmacro - | - | math_minmax math_min, movz, movf.d - | math_minmax math_max, movn, movt.d - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | lw CARG3, HI(BASE) - | lw STR:CARG1, LO(BASE) - | xori AT, NARGS8:RC, 8 - | addiu CARG3, CARG3, -LJ_TSTR - | or AT, AT, CARG3 - | bnez AT, ->fff_fallback // Need exactly 1 string argument. - |. nop - | lw TMP0, STR:CARG1->len - | addiu RA, BASE, -8 - | lw PC, FRAME_PC(BASE) - | sltu RD, r0, TMP0 - | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). - | addiu RD, RD, 1 - | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 - | sw TISNUM, HI(RA) - | b ->fff_res - |. sw TMP1, LO(RA) - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - |. nop - | lw CARG3, HI(BASE) - | lw CARG1, LO(BASE) - | li TMP1, 255 - | xori AT, NARGS8:RC, 8 // Exactly 1 argument. - | xor TMP0, CARG3, TISNUM // Integer. - | sltu TMP1, TMP1, CARG1 // !(255 < n). - | or AT, AT, TMP0 - | or AT, AT, TMP1 - | bnez AT, ->fff_fallback - |. li CARG3, 1 - | addiu CARG2, sp, ARG5_OFS - | sb CARG1, ARG5 - |->fff_newstr: - | load_got lj_str_new - | sw BASE, L->base - | sw PC, SAVE_PC - | call_intern lj_str_new // (lua_State *L, char *str, size_t l) - |. move CARG1, L - | // Returns GCstr *. - | lw BASE, L->base - |->fff_resstr: - | move SFARG1LO, CRET1 - | b ->fff_restv - |. li SFARG1HI, LJ_TSTR - | - |.ffunc string_sub - | ffgccheck - |. nop - | addiu AT, NARGS8:RC, -16 - | lw CARG3, 16+HI(BASE) - | lw TMP0, HI(BASE) - | lw STR:CARG1, LO(BASE) - | bltz AT, ->fff_fallback - |. lw CARG2, 8+HI(BASE) - | beqz AT, >1 - |. li CARG4, -1 - | bne CARG3, TISNUM, ->fff_fallback - |. lw CARG4, 16+LO(BASE) - |1: - | bne CARG2, TISNUM, ->fff_fallback - |. li AT, LJ_TSTR - | bne TMP0, AT, ->fff_fallback - |. lw CARG3, 8+LO(BASE) - | lw CARG2, STR:CARG1->len - | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end - | slt AT, CARG4, r0 - | addiu TMP0, CARG2, 1 - | addu TMP1, CARG4, TMP0 - | slt TMP3, CARG3, r0 - | movn CARG4, TMP1, AT // if (end < 0) end += len+1 - | addu TMP1, CARG3, TMP0 - | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 - | li TMP2, 1 - | slt AT, CARG4, r0 - | slt TMP3, r0, CARG3 - | movn CARG4, r0, AT // if (end < 0) end = 0 - | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 - | slt AT, CARG2, CARG4 - | movn CARG4, CARG2, AT // if (end > len) end = len - | addu CARG2, STR:CARG1, CARG3 - | subu CARG3, CARG4, CARG3 // len = end - start - | addiu CARG2, CARG2, sizeof(GCstr)-1 - | bgez CARG3, ->fff_newstr - |. addiu CARG3, CARG3, 1 // len++ - |->fff_emptystr: // Return empty string. - | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty) - | b ->fff_restv - |. li SFARG1HI, LJ_TSTR - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - |. nop - | lw CARG3, HI(BASE) - | lw STR:CARG2, LO(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. li AT, LJ_TSTR - | bne CARG3, AT, ->fff_fallback - |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) - | load_got lj_buf_putstr_ .. name - | lw TMP0, SBUF:CARG1->b - | sw L, SBUF:CARG1->L - | sw BASE, L->base - | sw TMP0, SBUF:CARG1->p - | call_intern extern lj_buf_putstr_ .. name - |. sw PC, SAVE_PC - | load_got lj_buf_tostr - | call_intern lj_buf_tostr - |. move SBUF:CARG1, SBUF:CRET1 - | b ->fff_resstr - |. lw BASE, L->base - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |->vm_tobit_fb: - | beqz TMP1, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - | add.d FARG1, FARG1, TOBIT - | jr ra - |. mfc1 CRET1, FARG1 - |.else - |// FP number to bit conversion for soft-float. - |->vm_tobit: - | sll TMP0, SFARG1HI, 1 - | lui AT, 0x0020 - | addu TMP0, TMP0, AT - | slt AT, TMP0, r0 - | movz SFARG1LO, r0, AT - | beqz AT, >2 - |. li TMP1, 0x3e0 - | not TMP1, TMP1 - | sra TMP0, TMP0, 21 - | subu TMP0, TMP1, TMP0 - | slt AT, TMP0, r0 - | bnez AT, >1 - |. sll TMP1, SFARG1HI, 11 - | lui AT, 0x8000 - | or TMP1, TMP1, AT - | srl AT, SFARG1LO, 21 - | or TMP1, TMP1, AT - | slt AT, SFARG1HI, r0 - | beqz AT, >2 - |. srlv SFARG1LO, TMP1, TMP0 - | subu SFARG1LO, r0, SFARG1LO - |2: - | jr ra - |. move CRET1, SFARG1LO - |1: - | addiu TMP0, TMP0, 21 - | srlv TMP1, SFARG1LO, TMP0 - | li AT, 20 - | subu TMP0, AT, TMP0 - | sll SFARG1LO, SFARG1HI, 12 - | sllv AT, SFARG1LO, TMP0 - | or SFARG1LO, TMP1, AT - | slt AT, SFARG1HI, r0 - | beqz AT, <2 - |. nop - | jr ra - |. subu CRET1, r0, SFARG1LO - |.endif - | - |.macro .ffunc_bit, name - | .ffunc_1 bit_..name - | beq SFARG1HI, TISNUM, >6 - |. move CRET1, SFARG1LO - | bal ->vm_tobit_fb - |. sltu TMP1, SFARG1HI, TISNUM - |6: - |.endmacro - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name - | addiu TMP2, BASE, 8 - | addu TMP3, BASE, NARGS8:RC - |1: - | lw SFARG1HI, HI(TMP2) - | beq TMP2, TMP3, ->fff_resi - |. lw SFARG1LO, LO(TMP2) - |.if FPU - | bne SFARG1HI, TISNUM, >2 - |. addiu TMP2, TMP2, 8 - | b <1 - |. ins CRET1, CRET1, SFARG1LO - |2: - | ldc1 FARG1, -8(TMP2) - | sltu TMP1, SFARG1HI, TISNUM - | beqz TMP1, ->fff_fallback - |. add.d FARG1, FARG1, TOBIT - | mfc1 SFARG1LO, FARG1 - | b <1 - |. ins CRET1, CRET1, SFARG1LO - |.else - | beq SFARG1HI, TISNUM, >2 - |. move CRET2, CRET1 - | bal ->vm_tobit_fb - |. sltu TMP1, SFARG1HI, TISNUM - | move SFARG1LO, CRET2 - |2: - | ins CRET1, CRET1, SFARG1LO - | b <1 - |. addiu TMP2, TMP2, 8 - |.endif - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, or - |.ffunc_bit_op bxor, xor - | - |.ffunc_bit bswap - | srl TMP0, CRET1, 24 - | srl TMP2, CRET1, 8 - | sll TMP1, CRET1, 24 - | andi TMP2, TMP2, 0xff00 - | or TMP0, TMP0, TMP1 - | andi CRET1, CRET1, 0xff00 - | or TMP0, TMP0, TMP2 - | sll CRET1, CRET1, 8 - | b ->fff_resi - |. or CRET1, TMP0, CRET1 - | - |.ffunc_bit bnot - | b ->fff_resi - |. not CRET1, CRET1 - | - |.macro .ffunc_bit_sh, name, ins, shmod - | .ffunc_2 bit_..name - | beq SFARG1HI, TISNUM, >1 - |. nop - | bal ->vm_tobit_fb - |. sltu TMP1, SFARG1HI, TISNUM - | move SFARG1LO, CRET1 - |1: - | bne SFARG2HI, TISNUM, ->fff_fallback - |. nop - |.if shmod == 1 - | li AT, 32 - | subu TMP0, AT, SFARG2LO - | sllv SFARG2LO, SFARG1LO, SFARG2LO - | srlv SFARG1LO, SFARG1LO, TMP0 - |.elif shmod == 2 - | li AT, 32 - | subu TMP0, AT, SFARG2LO - | srlv SFARG2LO, SFARG1LO, SFARG2LO - | sllv SFARG1LO, SFARG1LO, TMP0 - |.endif - | b ->fff_resi - |. ins CRET1, SFARG1LO, SFARG2LO - |.endmacro - | - |.ffunc_bit_sh lshift, sllv, 0 - |.ffunc_bit_sh rshift, srlv, 0 - |.ffunc_bit_sh arshift, srav, 0 - |// Can't use rotrv, since it's only in MIPS32R2. - |.ffunc_bit_sh rol, or, 1 - |.ffunc_bit_sh ror, or, 2 - | - |.ffunc_bit tobit - |->fff_resi: - | lw PC, FRAME_PC(BASE) - | addiu RA, BASE, -8 - | sw TISNUM, -8+HI(BASE) - | b ->fff_res1 - |. sw CRET1, -8+LO(BASE) - | - |//----------------------------------------------------------------------- - | - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RB = CFUNC, RC = nargs*8 - | lw TMP3, CFUNC:RB->f - | addu TMP1, BASE, NARGS8:RC - | lw PC, FRAME_PC(BASE) // Fallback may overwrite PC. - | addiu TMP0, TMP1, 8*LUA_MINSTACK - | lw TMP2, L->maxstack - | sw PC, SAVE_PC // Redundant (but a defined value). - | sltu AT, TMP2, TMP0 - | sw BASE, L->base - | sw TMP1, L->top - | bnez AT, >5 // Need to grow stack. - |. move CFUNCADDR, TMP3 - | jalr TMP3 // (lua_State *L) - |. move CARG1, L - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | lw BASE, L->base - | sll RD, CRET1, 3 - | bgtz CRET1, ->fff_res // Returned nresults+1? - |. addiu RA, BASE, -8 - |1: // Returned 0 or -1: retry fast path. - | lw TMP0, L->top - | lw LFUNC:RB, FRAME_FUNC(BASE) - | bnez CRET1, ->vm_call_tail // Returned -1? - |. subu NARGS8:RC, TMP0, BASE - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | andi TMP0, PC, FRAME_TYPE - | li AT, -4 - | bnez TMP0, >3 - |. and TMP1, PC, AT - | lbu TMP1, OFS_RA(PC) - | sll TMP1, TMP1, 3 - | addiu TMP1, TMP1, 8 - |3: - | b ->vm_call_dispatch // Resolve again for tailcall. - |. subu TMP2, BASE, TMP1 - | - |5: // Grow stack for fallback handler. - | load_got lj_state_growstack - | li CARG2, LUA_MINSTACK - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | lw BASE, L->base - | b <1 - |. li CRET1, 0 // Force retry. - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | move MULTRES, ra - | load_got lj_gc_step - | sw BASE, L->base - | addu TMP0, BASE, NARGS8:RC - | sw PC, SAVE_PC // Redundant (but a defined value). - | sw TMP0, L->top - | call_intern lj_gc_step // (lua_State *L) - |. move CARG1, L - | lw BASE, L->base - | move ra, MULTRES - | lw TMP0, L->top - | lw CFUNC:RB, FRAME_FUNC(BASE) - | jr ra - |. subu NARGS8:RC, TMP0, BASE - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent. - | bnez AT, >5 - | // Decrement the hookcount for consistency, but always do the call. - |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE - | bnez AT, >1 - |. addiu TMP2, TMP2, -1 - | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqz AT, >1 - |. nop - | b >1 - |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE // Hook already active? - | beqz AT, >1 - |5: // Re-dispatch to static ins. - |. lw AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. - | jr AT - |. nop - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE // Hook already active? - | bnez AT, <5 - |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqz AT, <5 - |. addiu TMP2, TMP2, -1 - | beqz TMP2, >1 - |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, LUA_MASKLINE - | beqz AT, <5 - |1: - |. load_got lj_dispatch_ins - | sw MULTRES, SAVE_MULTRES - | move CARG2, PC - | sw BASE, L->base - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |. move CARG1, L - |3: - | lw BASE, L->base - |4: // Re-dispatch to static ins. - | lw INS, -4(PC) - | decode_OP4a TMP1, INS - | decode_OP4b TMP1 - | addu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS - | lw AT, GG_DISP2STATIC(TMP0) - | decode_RA8a RA, INS - | decode_RD8b RD - | jr AT - | decode_RA8b RA - | - |->cont_hook: // Continue from hook yield. - | addiu PC, PC, 4 - | b <4 - |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins. - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | lw LFUNC:TMP1, FRAME_FUNC(BASE) - | addiu CARG1, DISPATCH, GG_DISP2J - | sw PC, SAVE_PC - | lw TMP1, LFUNC:TMP1->pc - | move CARG2, PC - | sw L, DISPATCH_J(L)(DISPATCH) - | lbu TMP1, PC2PROTO(framesize)(TMP1) - | load_got lj_trace_hot - | sw BASE, L->base - | sll TMP1, TMP1, 3 - | addu TMP1, BASE, TMP1 - | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc) - |. sw TMP1, L->top - | b <3 - |. nop - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - |.if JIT - | b >1 - |.endif - |. move CARG2, PC - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | ori CARG2, PC, 1 - |1: - |.endif - | load_got lj_dispatch_call - | addu TMP0, BASE, RC - | sw PC, SAVE_PC - | sw BASE, L->base - | subu RA, RA, BASE - | sw TMP0, L->top - | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc) - |. move CARG1, L - | // Returns ASMFunction. - | lw BASE, L->base - | lw TMP0, L->top - | sw r0, SAVE_PC // Invalidate for subsequent line hook. - | subu NARGS8:RC, TMP0, BASE - | addu RA, BASE, RA - | lw LFUNC:RB, FRAME_FUNC(BASE) - | jr CRET1 - |. lw INS, -4(PC) - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, RB = meta base - | lw INS, -4(PC) - | lw TMP2, -24+LO(RB) // Save previous trace. - | decode_RA8a RC, INS - | addiu AT, MULTRES, -8 - | decode_RA8b RC - | beqz AT, >2 - |. addu RC, BASE, RC // Call base. - |1: // Move results down. - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu AT, AT, -8 - | addiu RA, RA, 8 - | sw SFRETHI, HI(RC) - | sw SFRETLO, LO(RC) - | bnez AT, <1 - |. addiu RC, RC, 8 - |2: - | decode_RA8a RA, INS - | decode_RB8a RB, INS - | decode_RA8b RA - | decode_RB8b RB - | addu RA, RA, RB - | addu RA, BASE, RA - |3: - | sltu AT, RC, RA - | bnez AT, >9 // More results wanted? - |. nop - | - | lhu TMP3, TRACE:TMP2->traceno - | lhu RD, TRACE:TMP2->link - | beq RD, TMP3, ->cont_nop // Blacklisted. - |. load_got lj_dispatch_stitch - | bnez RD, =>BC_JLOOP // Jump to stitched trace. - |. sll RD, RD, 3 - | - | // Stitch a new trace to the previous trace. - | sw TMP3, DISPATCH_J(exitno)(DISPATCH) - | sw L, DISPATCH_J(L)(DISPATCH) - | sw BASE, L->base - | addiu CARG1, DISPATCH, GG_DISP2J - | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - |. move CARG2, PC - | b ->cont_nop - |. lw BASE, L->base - | - |9: - | sw TISNIL, HI(RC) - | b <3 - |. addiu RC, RC, 8 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | load_got lj_dispatch_profile - | sw MULTRES, SAVE_MULTRES - | move CARG2, PC - | sw BASE, L->base - | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - |. move CARG1, L - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | addiu PC, PC, -4 - | b ->cont_nop - |. lw BASE, L->base -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro savex_, a, b - |.if FPU - | sdc1 f..a, 16+a*8(sp) - | sw r..a, 16+32*8+a*4(sp) - | sw r..b, 16+32*8+b*4(sp) - |.else - | sw r..a, 16+a*4(sp) - | sw r..b, 16+b*4(sp) - |.endif - |.endmacro - | - |->vm_exit_handler: - |.if JIT - |.if FPU - | addiu sp, sp, -(16+32*8+32*4) - |.else - | addiu sp, sp, -(16+32*4) - |.endif - | savex_ 0, 1 - | savex_ 2, 3 - | savex_ 4, 5 - | savex_ 6, 7 - | savex_ 8, 9 - | savex_ 10, 11 - | savex_ 12, 13 - | savex_ 14, 15 - | savex_ 16, 17 - | savex_ 18, 19 - | savex_ 20, 21 - | savex_ 22, 23 - | savex_ 24, 25 - | savex_ 26, 27 - |.if FPU - | sdc1 f28, 16+28*8(sp) - | sdc1 f30, 16+30*8(sp) - | sw r28, 16+32*8+28*4(sp) - | sw r30, 16+32*8+30*4(sp) - | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. - | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp. - | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP - |.else - | sw r28, 16+28*4(sp) - | sw r30, 16+30*4(sp) - | sw r0, 16+31*4(sp) // Clear RID_TMP. - | addiu TMP2, sp, 16+32*4 // Recompute original value of sp. - | sw TMP2, 16+29*4(sp) // Store sp in RID_SP - |.endif - | li_vmstate EXIT - | addiu DISPATCH, JGL, -GG_DISP2G-32768 - | lw TMP1, 0(TMP2) // Load exit number. - | st_vmstate - | lw L, DISPATCH_GL(cur_L)(DISPATCH) - | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) - | load_got lj_trace_exit - | sw L, DISPATCH_J(L)(DISPATCH) - | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. - | sw BASE, L->base - | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. - | addiu CARG1, DISPATCH, GG_DISP2J - | sw r0, DISPATCH_GL(jit_base)(DISPATCH) - | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) - |. addiu CARG2, sp, 16 - | // Returns MULTRES (unscaled) or negated error code. - | lw TMP1, L->cframe - | li AT, -4 - | lw BASE, L->base - | and sp, TMP1, AT - | lw PC, SAVE_PC // Get SAVE_PC. - | b >1 - |. sw L, SAVE_L // Set SAVE_L (on-trace resume/yield). - |.endif - |->vm_exit_interp: - |.if JIT - | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. - | lw L, SAVE_L - | addiu DISPATCH, JGL, -GG_DISP2G-32768 - | sw BASE, L->base - |1: - | bltz CRET1, >9 // Check for error from exit. - |. lw LFUNC:RB, FRAME_FUNC(BASE) - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | sll MULTRES, CRET1, 3 - | li TISNIL, LJ_TNIL - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | sw MULTRES, SAVE_MULTRES - | .FPU mtc1 TMP3, TOBIT - | lw TMP1, LFUNC:RB->pc - | sw r0, DISPATCH_GL(jit_base)(DISPATCH) - | lw KBASE, PC2PROTO(k)(TMP1) - | .FPU cvt.d.s TOBIT, TOBIT - | // Modified copy of ins_next which handles function header dispatch, too. - | lw INS, 0(PC) - | addiu PC, PC, 4 - | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 - | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) - | decode_OP4a TMP1, INS - | decode_OP4b TMP1 - | sltiu TMP2, TMP1, BC_FUNCF*4 - | addu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS - | lw AT, 0(TMP0) - | decode_RA8a RA, INS - | beqz TMP2, >2 - |. decode_RA8b RA - | jr AT - |. decode_RD8b RD - |2: - | sltiu TMP2, TMP1, (BC_FUNCC+2)*4 // Fast function? - | bnez TMP2, >3 - |. lw TMP1, FRAME_PC(BASE) - | // Check frame below fast function. - | andi TMP0, TMP1, FRAME_TYPE - | bnez TMP0, >3 // Trace stitching continuation? - |. nop - | // Otherwise set KBASE for Lua function below fast function. - | lw TMP2, -4(TMP1) - | decode_RA8a TMP0, TMP2 - | decode_RA8b TMP0 - | subu TMP1, BASE, TMP0 - | lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1) - | lw TMP1, LFUNC:TMP2->pc - | lw KBASE, PC2PROTO(k)(TMP1) - |3: - | addiu RC, MULTRES, -8 - | jr AT - |. addu RA, RA, BASE - | - |9: // Rethrow error from the right C frame. - | load_got lj_err_throw - | negu CARG2, CRET1 - | call_intern lj_err_throw // (lua_State *L, int errcode) - |. move CARG1, L - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Hard-float round to integer. - |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. - |.macro vm_round_hf, func - | lui TMP0, 0x4330 // Hiword of 2^52 (double). - | mtc1 r0, f4 - | mtc1 TMP0, f5 - | abs.d FRET2, FARG1 // |x| - | mfc1 AT, f13 - | c.olt.d 0, FRET2, f4 - | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 - | bc1f 0, >1 // Truncate only if |x| < 2^52. - |. sub.d FRET1, FRET1, f4 - | slt AT, AT, r0 - |.if "func" == "ceil" - | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0. - |.else - | lui TMP0, 0x3ff0 // Hiword of +1 (double). - |.endif - |.if "func" == "trunc" - | mtc1 TMP0, f5 - | c.olt.d 0, FRET2, FRET1 // |x| < result? - | sub.d FRET2, FRET1, f4 - | movt.d FRET1, FRET2, 0 // If yes, subtract +1. - | neg.d FRET2, FRET1 - | jr ra - |. movn.d FRET1, FRET2, AT // Merge sign bit back in. - |.else - | neg.d FRET2, FRET1 - | mtc1 TMP0, f5 - | movn.d FRET1, FRET2, AT // Merge sign bit back in. - |.if "func" == "ceil" - | c.olt.d 0, FRET1, FARG1 // x > result? - |.else - | c.olt.d 0, FARG1, FRET1 // x < result? - |.endif - | sub.d FRET2, FRET1, f4 // If yes, subtract +-1. - | jr ra - |. movt.d FRET1, FRET2, 0 - |.endif - |1: - | jr ra - |. mov.d FRET1, FARG1 - |.endmacro - | - |.macro vm_round, func - |.if FPU - | vm_round_hf, func - |.endif - |.endmacro - | - |->vm_floor: - | vm_round floor - |->vm_ceil: - | vm_round ceil - |->vm_trunc: - |.if JIT - | vm_round trunc - |.endif - | - |// Soft-float integer to number conversion. - |.macro sfi2d, AHI, ALO - |.if not FPU - | beqz ALO, >9 // Handle zero first. - |. sra TMP0, ALO, 31 - | xor TMP1, ALO, TMP0 - | subu TMP1, TMP1, TMP0 // Absolute value in TMP1. - | clz AHI, TMP1 - | andi TMP0, TMP0, 0x800 // Mask sign bit. - | li AT, 0x3ff+31-1 - | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1. - | subu AHI, AT, AHI // Exponent - 1 in AHI. - | sll ALO, TMP1, 21 - | or AHI, AHI, TMP0 // Sign | Exponent. - | srl TMP1, TMP1, 11 - | sll AHI, AHI, 20 // Align left. - | jr ra - |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent. - |9: - | jr ra - |. li AHI, 0 - |.endif - |.endmacro - | - |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1. - |->vm_sfi2d_1: - | sfi2d SFARG1HI, SFARG1LO - | - |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1. - |->vm_sfi2d_2: - | sfi2d SFARG2HI, SFARG2LO - | - |// Soft-float comparison. Equivalent to c.eq.d. - |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. - |->vm_sfcmpeq: - |.if not FPU - | sll AT, SFARG1HI, 1 - | sll TMP0, SFARG2HI, 1 - | or CRET1, SFARG1LO, SFARG2LO - | or TMP1, AT, TMP0 - | or TMP1, TMP1, CRET1 - | beqz TMP1, >8 // Both args +-0: return 1. - |. sltu CRET1, r0, SFARG1LO - | lui TMP1, 0xffe0 - | addu AT, AT, CRET1 - | sltu CRET1, r0, SFARG2LO - | sltu AT, TMP1, AT - | addu TMP0, TMP0, CRET1 - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0; - |. xor TMP0, SFARG1HI, SFARG2HI - | xor TMP1, SFARG1LO, SFARG2LO - | or AT, TMP0, TMP1 - | jr ra - |. sltiu CRET1, AT, 1 // Same values: return 1. - |8: - | jr ra - |. li CRET1, 1 - |9: - | jr ra - |. li CRET1, 0 - |.endif - | - |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. - |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. - |->vm_sfcmpult: - |.if not FPU - | b >1 - |. li CRET2, 1 - |.endif - | - |->vm_sfcmpolt: - |.if not FPU - | li CRET2, 0 - |1: - | sll AT, SFARG1HI, 1 - | sll TMP0, SFARG2HI, 1 - | or CRET1, SFARG1LO, SFARG2LO - | or TMP1, AT, TMP0 - | or TMP1, TMP1, CRET1 - | beqz TMP1, >8 // Both args +-0: return 0. - |. sltu CRET1, r0, SFARG1LO - | lui TMP1, 0xffe0 - | addu AT, AT, CRET1 - | sltu CRET1, r0, SFARG2LO - | sltu AT, TMP1, AT - | addu TMP0, TMP0, CRET1 - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; - |. and AT, SFARG1HI, SFARG2HI - | bltz AT, >5 // Both args negative? - |. nop - | beq SFARG1HI, SFARG2HI, >8 - |. sltu CRET1, SFARG1LO, SFARG2LO - | jr ra - |. slt CRET1, SFARG1HI, SFARG2HI - |5: // Swap conditions if both operands are negative. - | beq SFARG1HI, SFARG2HI, >8 - |. sltu CRET1, SFARG2LO, SFARG1LO - | jr ra - |. slt CRET1, SFARG2HI, SFARG1HI - |8: - | jr ra - |. nop - |9: - | jr ra - |. move CRET1, CRET2 - |.endif - | - |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. - |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. - |->vm_sfcmpolex: - |.if not FPU - | sll AT, SFARG1HI, 1 - | sll TMP0, SFARG2HI, 1 - | or CRET1, SFARG1LO, SFARG2LO - | or TMP1, AT, TMP0 - | or TMP1, TMP1, CRET1 - | beqz TMP1, >8 // Both args +-0: return 1. - |. sltu CRET1, r0, SFARG1LO - | lui TMP1, 0xffe0 - | addu AT, AT, CRET1 - | sltu CRET1, r0, SFARG2LO - | sltu AT, TMP1, AT - | addu TMP0, TMP0, CRET1 - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0; - |. and AT, SFARG1HI, SFARG2HI - | xor AT, AT, TMP3 - | bltz AT, >5 // Both args negative? - |. nop - | beq SFARG1HI, SFARG2HI, >6 - |. sltu CRET1, SFARG2LO, SFARG1LO - | jr ra - |. slt CRET1, SFARG2HI, SFARG1HI - |5: // Swap conditions if both operands are negative. - | beq SFARG1HI, SFARG2HI, >6 - |. sltu CRET1, SFARG1LO, SFARG2LO - | slt CRET1, SFARG1HI, SFARG2HI - |6: - | jr ra - |. nop - |8: - | jr ra - |. li CRET1, 1 - |9: - | jr ra - |. li CRET1, 0 - |.endif - | - |.macro sfmin_max, name, intins - |->vm_sf .. name: - |.if JIT and not FPU - | move TMP2, ra - | bal ->vm_sfcmpolt - |. nop - | move TMP0, CRET1 - | move SFRETHI, SFARG1HI - | move SFRETLO, SFARG1LO - | move ra, TMP2 - | intins SFRETHI, SFARG2HI, TMP0 - | jr ra - |. intins SFRETLO, SFARG2LO, TMP0 - |.endif - |.endmacro - | - | sfmin_max min, movz - | sfmin_max max, movn - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. Callback slot number in r1, g in r2. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | saveregs - | lw CTSTATE, GL:r2->ctype_state - | addiu DISPATCH, r2, GG_G2DISP - | load_got lj_ccallback_enter - | sw r1, CTSTATE->cb.slot - | sw CARG1, CTSTATE->cb.gpr[0] - | sw CARG2, CTSTATE->cb.gpr[1] - | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] - | sw CARG3, CTSTATE->cb.gpr[2] - | sw CARG4, CTSTATE->cb.gpr[3] - | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] - | addiu TMP0, sp, CFRAME_SPACE+16 - | sw TMP0, CTSTATE->cb.stack - | sw r0, SAVE_PC // Any value outside of bytecode is ok. - | move CARG2, sp - | call_intern lj_ccallback_enter // (CTState *cts, void *cf) - |. move CARG1, CTSTATE - | // Returns lua_State *. - | lw BASE, L:CRET1->base - | lw RC, L:CRET1->top - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | move L, CRET1 - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | lw LFUNC:RB, FRAME_FUNC(BASE) - | .FPU mtc1 TMP3, TOBIT - | li_vmstate INTERP - | li TISNIL, LJ_TNIL - | subu RC, RC, BASE - | st_vmstate - | .FPU cvt.d.s TOBIT, TOBIT - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | load_got lj_ccallback_leave - | lw CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) - | sw BASE, L->base - | sw RB, L->top - | sw L, CTSTATE->L - | move CARG2, RA - | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) - |. move CARG1, CTSTATE - | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] - | lw CRET1, CTSTATE->cb.gpr[0] - | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] - | b ->vm_leave_unw - |. lw CRET2, CTSTATE->cb.gpr[1] - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, CARG1 - | lw TMP1, CCSTATE->spadj - | lbu CARG2, CCSTATE->nsp - | move TMP2, sp - | subu sp, sp, TMP1 - | sw ra, -4(TMP2) - | sll CARG2, CARG2, 2 - | sw r16, -8(TMP2) - | sw CCSTATE, -12(TMP2) - | move r16, TMP2 - | addiu TMP1, CCSTATE, offsetof(CCallState, stack) - | addiu TMP2, sp, 16 - | beqz CARG2, >2 - |. addu TMP3, TMP1, CARG2 - |1: - | lw TMP0, 0(TMP1) - | addiu TMP1, TMP1, 4 - | sltu AT, TMP1, TMP3 - | sw TMP0, 0(TMP2) - | bnez AT, <1 - |. addiu TMP2, TMP2, 4 - |2: - | lw CFUNCADDR, CCSTATE->func - | lw CARG2, CCSTATE->gpr[1] - | lw CARG3, CCSTATE->gpr[2] - | lw CARG4, CCSTATE->gpr[3] - | .FPU ldc1 FARG1, CCSTATE->fpr[0] - | .FPU ldc1 FARG2, CCSTATE->fpr[1] - | jalr CFUNCADDR - |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. - | lw CCSTATE:TMP1, -12(r16) - | lw TMP2, -8(r16) - | lw ra, -4(r16) - | sw CRET1, CCSTATE:TMP1->gpr[0] - | sw CRET2, CCSTATE:TMP1->gpr[1] - |.if FPU - | sdc1 FRET1, CCSTATE:TMP1->fpr[0] - | sdc1 FRET2, CCSTATE:TMP1->fpr[1] - |.else - | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part. - | sw CARG2, CCSTATE:TMP1->gpr[3] - |.endif - | move sp, r16 - | jr ra - |. move r16, TMP2 - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp - | addu RA, BASE, RA - | addu RD, BASE, RD - | lw RAHI, HI(RA) - | lw RDHI, HI(RD) - | lhu TMP2, OFS_RD(PC) - | addiu PC, PC, 4 - | bne RAHI, TISNUM, >2 - |. lw RALO, LO(RA) - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | lw RDLO, LO(RD) - | bne RDHI, TISNUM, >5 - |. decode_RD4b TMP2 - | slt AT, SFARG1LO, SFARG2LO - | addu TMP2, TMP2, TMP3 - | movop TMP2, r0, AT - |1: - | addu PC, PC, TMP2 - | ins_next - | - |2: // RA is not an integer. - | sltiu AT, RAHI, LJ_TISNUM - | beqz AT, ->vmeta_comp - |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sltiu AT, RDHI, LJ_TISNUM - |.if FPU - | ldc1 FRA, 0(RA) - | ldc1 FRD, 0(RD) - |.else - | lw RDLO, LO(RD) - |.endif - | beqz AT, >4 - |. decode_RD4b TMP2 - |3: // RA and RD are both numbers. - |.if FPU - | fcomp f20, f22 - | addu TMP2, TMP2, TMP3 - | b <1 - |. fmovop TMP2, r0 - |.else - | bal sfcomp - |. addu TMP2, TMP2, TMP3 - | b <1 - |. movop TMP2, r0, CRET1 - |.endif - | - |4: // RA is a number, RD is not a number. - | bne RDHI, TISNUM, ->vmeta_comp - | // RA is a number, RD is an integer. Convert RD to a number. - |.if FPU - |. lwc1 FRD, LO(RD) - | b <3 - |. cvt.d.w FRD, FRD - |.else - |. nop - |.if "RDHI" == "SFARG1HI" - | bal ->vm_sfi2d_1 - |.else - | bal ->vm_sfi2d_2 - |.endif - |. nop - | b <3 - |. nop - |.endif - | - |5: // RA is an integer, RD is not an integer - | sltiu AT, RDHI, LJ_TISNUM - | beqz AT, ->vmeta_comp - | // RA is an integer, RD is a number. Convert RA to a number. - |.if FPU - |. mtc1 RALO, FRA - | ldc1 FRD, 0(RD) - | b <3 - | cvt.d.w FRA, FRA - |.else - |. nop - |.if "RAHI" == "SFARG1HI" - | bal ->vm_sfi2d_1 - |.else - | bal ->vm_sfi2d_2 - |.endif - |. nop - | b <3 - |. nop - |.endif - |.endmacro - | - if (op == BC_ISLT) { - | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt - } else if (op == BC_ISGE) { - | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt - } else if (op == BC_ISLE) { - | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult - } else { - | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult - } - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1*8, RD = src2*8, JMP with RD = target - | addu RA, BASE, RA - | addiu PC, PC, 4 - | addu RD, BASE, RD - | lw SFARG1HI, HI(RA) - | lhu TMP2, -4+OFS_RD(PC) - | lw SFARG2HI, HI(RD) - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sltu AT, TISNUM, SFARG1HI - | sltu TMP0, TISNUM, SFARG2HI - | or AT, AT, TMP0 - if (vk) { - | beqz AT, ->BC_ISEQN_Z - } else { - | beqz AT, ->BC_ISNEN_Z - } - |. decode_RD4b TMP2 - | // Either or both types are not numbers. - | lw SFARG1LO, LO(RA) - | lw SFARG2LO, LO(RD) - | addu TMP2, TMP2, TMP3 - |.if FFI - | li TMP3, LJ_TCDATA - | beq SFARG1HI, TMP3, ->vmeta_equal_cd - |.endif - |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive? - |.if FFI - | beq SFARG2HI, TMP3, ->vmeta_equal_cd - |.endif - |. xor TMP3, SFARG1LO, SFARG2LO // Same tv? - | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type? - | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata? - | movz TMP3, r0, AT // Ignore tv if primitive. - | movn TMP0, r0, SFARG2HI // Tab/ud and same type? - | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv). - | movz TMP0, r0, AT - | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv. - if (vk) { - |. movn TMP2, r0, AT - } else { - |. movz TMP2, r0, AT - } - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | lw TAB:TMP1, TAB:SFARG1LO->metatable - | beqz TAB:TMP1, >1 // No metatable? - |. nop - | lbu TMP1, TAB:TMP1->nomm - | andi TMP1, TMP1, 1<1 // Or 'no __eq' flag set? - |. nop - | b ->vmeta_equal // Handle __eq metamethod. - |. li TMP0, 1-vk // ne = 0 or 1. - |1: - | addu PC, PC, TMP2 - | ins_next - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src*8, RD = str_const*8 (~), JMP with RD = target - | addu RA, BASE, RA - | addiu PC, PC, 4 - | lw TMP0, HI(RA) - | srl RD, RD, 1 - | lw STR:TMP3, LO(RA) - | subu RD, KBASE, RD - | lhu TMP2, -4+OFS_RD(PC) - |.if FFI - | li AT, LJ_TCDATA - | beq TMP0, AT, ->vmeta_equal_cd - |.endif - |. lw STR:TMP1, -4(RD) // KBASE-4-str_const*4 - | addiu TMP0, TMP0, -LJ_TSTR - | decode_RD4b TMP2 - | xor TMP1, STR:TMP1, STR:TMP3 - | or TMP0, TMP0, TMP1 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (vk) { - | movn TMP2, r0, TMP0 - } else { - | movz TMP2, r0, TMP0 - } - | addu PC, PC, TMP2 - | ins_next - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src*8, RD = num_const*8, JMP with RD = target - | addu RA, BASE, RA - | addu RD, KBASE, RD - | lw SFARG1HI, HI(RA) - | lw SFARG2HI, HI(RD) - | lhu TMP2, OFS_RD(PC) - | addiu PC, PC, 4 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | decode_RD4b TMP2 - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | bne SFARG1HI, TISNUM, >3 - |. lw SFARG1LO, LO(RA) - | lw SFARG2LO, LO(RD) - | addu TMP2, TMP2, TMP3 - | bne SFARG2HI, TISNUM, >6 - |. xor AT, SFARG1LO, SFARG2LO - if (vk) { - | movn TMP2, r0, AT - |1: - | addu PC, PC, TMP2 - |2: - } else { - | movz TMP2, r0, AT - |1: - |2: - | addu PC, PC, TMP2 - } - | ins_next - | - |3: // RA is not an integer. - | sltiu AT, SFARG1HI, LJ_TISNUM - |.if FFI - | beqz AT, >8 - |.else - | beqz AT, <2 - |.endif - |. addu TMP2, TMP2, TMP3 - | sltiu AT, SFARG2HI, LJ_TISNUM - |.if FPU - | ldc1 f20, 0(RA) - | ldc1 f22, 0(RD) - |.endif - | beqz AT, >5 - |. lw SFARG2LO, LO(RD) - |4: // RA and RD are both numbers. - |.if FPU - | c.eq.d f20, f22 - | b <1 - if (vk) { - |. movf TMP2, r0 - } else { - |. movt TMP2, r0 - } - |.else - | bal ->vm_sfcmpeq - |. nop - | b <1 - if (vk) { - |. movz TMP2, r0, CRET1 - } else { - |. movn TMP2, r0, CRET1 - } - |.endif - | - |5: // RA is a number, RD is not a number. - |.if FFI - | bne SFARG2HI, TISNUM, >9 - |.else - | bne SFARG2HI, TISNUM, <2 - |.endif - | // RA is a number, RD is an integer. Convert RD to a number. - |.if FPU - |. lwc1 f22, LO(RD) - | b <4 - |. cvt.d.w f22, f22 - |.else - |. nop - | bal ->vm_sfi2d_2 - |. nop - | b <4 - |. nop - |.endif - | - |6: // RA is an integer, RD is not an integer - | sltiu AT, SFARG2HI, LJ_TISNUM - |.if FFI - | beqz AT, >9 - |.else - | beqz AT, <2 - |.endif - | // RA is an integer, RD is a number. Convert RA to a number. - |.if FPU - |. mtc1 SFARG1LO, f20 - | ldc1 f22, 0(RD) - | b <4 - | cvt.d.w f20, f20 - |.else - |. nop - | bal ->vm_sfi2d_1 - |. nop - | b <4 - |. nop - |.endif - | - |.if FFI - |8: - | li AT, LJ_TCDATA - | bne SFARG1HI, AT, <2 - |. nop - | b ->vmeta_equal_cd - |. nop - |9: - | li AT, LJ_TCDATA - | bne SFARG2HI, AT, <2 - |. nop - | b ->vmeta_equal_cd - |. nop - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target - | addu RA, BASE, RA - | srl TMP1, RD, 3 - | lw TMP0, HI(RA) - | lhu TMP2, OFS_RD(PC) - | not TMP1, TMP1 - | addiu PC, PC, 4 - |.if FFI - | li AT, LJ_TCDATA - | beq TMP0, AT, ->vmeta_equal_cd - |.endif - |. xor TMP0, TMP0, TMP1 - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (vk) { - | movn TMP2, r0, TMP0 - } else { - | movz TMP2, r0, TMP0 - } - | addu PC, PC, TMP2 - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst*8 or unused, RD = src*8, JMP with RD = target - | addu RD, BASE, RD - | lhu TMP2, OFS_RD(PC) - | lw TMP0, HI(RD) - | addiu PC, PC, 4 - if (op == BC_IST || op == BC_ISF) { - | sltiu TMP0, TMP0, LJ_TISTRUECOND - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (op == BC_IST) { - | movz TMP2, r0, TMP0 - } else { - | movn TMP2, r0, TMP0 - } - | addu PC, PC, TMP2 - } else { - | sltiu TMP0, TMP0, LJ_TISTRUECOND - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - if (op == BC_ISTC) { - | beqz TMP0, >1 - } else { - | bnez TMP0, >1 - } - |. addu RA, BASE, RA - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | addu PC, PC, TMP2 - |1: - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src*8, RD = -type*8 - | addu TMP2, BASE, RA - | srl TMP1, RD, 3 - | lw TMP0, HI(TMP2) - | ins_next1 - | addu AT, TMP0, TMP1 - | bnez AT, ->vmeta_istype - |. ins_next2 - break; - case BC_ISNUM: - | // RA = src*8, RD = -(TISNUM-1)*8 - | addu TMP2, BASE, RA - | lw TMP0, HI(TMP2) - | ins_next1 - | sltiu AT, TMP0, LJ_TISNUM - | beqz AT, ->vmeta_istype - |. ins_next2 - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst*8, RD = src*8 - | addu RD, BASE, RD - | addu RA, BASE, RA - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - break; - case BC_NOT: - | // RA = dst*8, RD = src*8 - | addu RD, BASE, RD - | addu RA, BASE, RA - | lw TMP0, HI(RD) - | li TMP1, LJ_TFALSE - | sltiu TMP0, TMP0, LJ_TISTRUECOND - | addiu TMP1, TMP0, LJ_TTRUE - | ins_next1 - | sw TMP1, HI(RA) - | ins_next2 - break; - case BC_UNM: - | // RA = dst*8, RD = src*8 - | addu RB, BASE, RD - | lw SFARG1HI, HI(RB) - | addu RA, BASE, RA - | bne SFARG1HI, TISNUM, >2 - |. lw SFARG1LO, LO(RB) - | lui TMP1, 0x8000 - | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31. - |. negu SFARG1LO, SFARG1LO - |1: - | ins_next1 - | sw SFARG1HI, HI(RA) - | sw SFARG1LO, LO(RA) - | ins_next2 - |2: - | sltiu AT, SFARG1HI, LJ_TISNUM - | beqz AT, ->vmeta_unm - |. lui TMP1, 0x8000 - | b <1 - |. xor SFARG1HI, SFARG1HI, TMP1 - break; - case BC_LEN: - | // RA = dst*8, RD = src*8 - | addu CARG2, BASE, RD - | addu RA, BASE, RA - | lw TMP0, HI(CARG2) - | lw CARG1, LO(CARG2) - | li AT, LJ_TSTR - | bne TMP0, AT, >2 - |. li AT, LJ_TTAB - | lw CRET1, STR:CARG1->len - |1: - | ins_next1 - | sw TISNUM, HI(RA) - | sw CRET1, LO(RA) - | ins_next2 - |2: - | bne TMP0, AT, ->vmeta_len - |. nop -#if LJ_52 - | lw TAB:TMP2, TAB:CARG1->metatable - | bnez TAB:TMP2, >9 - |. nop - |3: -#endif - |->BC_LEN_Z: - | load_got lj_tab_len - | call_intern lj_tab_len // (GCtab *t) - |. nop - | // Returns uint32_t (but less than 2^31). - | b <1 - |. nop -#if LJ_52 - |9: - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_len - |. nop -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro fpmod, a, b, c - | bal ->vm_floor // floor(b/c) - |. div.d FARG1, b, c - | mul.d a, FRET1, c - | sub.d a, b, a // b - floor(b/c)*c - |.endmacro - - |.macro sfpmod - | addiu sp, sp, -16 - | - | load_got __divdf3 - | sw SFARG1HI, HI(sp) - | sw SFARG1LO, LO(sp) - | sw SFARG2HI, 8+HI(sp) - | call_extern - |. sw SFARG2LO, 8+LO(sp) - | - | load_got floor - | move SFARG1HI, SFRETHI - | call_extern - |. move SFARG1LO, SFRETLO - | - | load_got __muldf3 - | move SFARG1HI, SFRETHI - | move SFARG1LO, SFRETLO - | lw SFARG2HI, 8+HI(sp) - | call_extern - |. lw SFARG2LO, 8+LO(sp) - | - | load_got __subdf3 - | lw SFARG1HI, HI(sp) - | lw SFARG1LO, LO(sp) - | move SFARG2HI, SFRETHI - | call_extern - |. move SFARG2LO, SFRETLO - | - | addiu sp, sp, 16 - |.endmacro - - |.macro ins_arithpre, label - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||switch (vk) { - ||case 0: - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | // RA = dst*8, RB = src1*8, RC = num_const*8 - | addu RB, BASE, RB - |.if "label" ~= "none" - | b label - |.endif - |. addu RC, KBASE, RC - || break; - ||case 1: - | decode_RB8a RC, INS - | decode_RB8b RC - | decode_RDtoRC8 RB, RD - | // RA = dst*8, RB = num_const*8, RC = src1*8 - | addu RC, BASE, RC - |.if "label" ~= "none" - | b label - |.endif - |. addu RB, KBASE, RB - || break; - ||default: - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | // RA = dst*8, RB = src1*8, RC = src2*8 - | addu RB, BASE, RB - |.if "label" ~= "none" - | b label - |.endif - |. addu RC, BASE, RC - || break; - ||} - |.endmacro - | - |.macro ins_arith, intins, fpins, fpcall, label - | ins_arithpre none - | - |.if "label" ~= "none" - |label: - |.endif - | - | lw SFARG1HI, HI(RB) - | lw SFARG2HI, HI(RC) - | - |.if "intins" ~= "div" - | - | // Check for two integers. - | lw SFARG1LO, LO(RB) - | bne SFARG1HI, TISNUM, >5 - |. lw SFARG2LO, LO(RC) - | bne SFARG2HI, TISNUM, >5 - | - |.if "intins" == "addu" - |. intins CRET1, SFARG1LO, SFARG2LO - | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow. - | xor TMP2, CRET1, SFARG2LO - | and TMP1, TMP1, TMP2 - | bltz TMP1, ->vmeta_arith - |. addu RA, BASE, RA - |.elif "intins" == "subu" - |. intins CRET1, SFARG1LO, SFARG2LO - | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow. - | xor TMP2, SFARG1LO, SFARG2LO - | and TMP1, TMP1, TMP2 - | bltz TMP1, ->vmeta_arith - |. addu RA, BASE, RA - |.elif "intins" == "mult" - |. intins SFARG1LO, SFARG2LO - | mflo CRET1 - | mfhi TMP2 - | sra TMP1, CRET1, 31 - | bne TMP1, TMP2, ->vmeta_arith - |. addu RA, BASE, RA - |.else - |. load_got lj_vm_modi - | beqz SFARG2LO, ->vmeta_arith - |. addu RA, BASE, RA - |.if ENDIAN_BE - | move CARG1, SFARG1LO - |.endif - | call_extern - |. move CARG2, SFARG2LO - |.endif - | - | ins_next1 - | sw TISNUM, HI(RA) - | sw CRET1, LO(RA) - |3: - | ins_next2 - | - |.elif not FPU - | - | lw SFARG1LO, LO(RB) - | lw SFARG2LO, LO(RC) - | - |.endif - | - |5: // Check for two numbers. - | .FPU ldc1 f20, 0(RB) - | sltiu AT, SFARG1HI, LJ_TISNUM - | sltiu TMP0, SFARG2HI, LJ_TISNUM - | .FPU ldc1 f22, 0(RC) - | and AT, AT, TMP0 - | beqz AT, ->vmeta_arith - |. addu RA, BASE, RA - | - |.if FPU - | fpins FRET1, f20, f22 - |.elif "fpcall" == "sfpmod" - | sfpmod - |.else - | load_got fpcall - | call_extern - |. nop - |.endif - | - | ins_next1 - |.if not FPU - | sw SFRETHI, HI(RA) - |.endif - |.if "intins" ~= "div" - | b <3 - |.endif - |.if FPU - |. sdc1 FRET1, 0(RA) - |.else - |. sw SFRETLO, LO(RA) - |.endif - |.if "intins" == "div" - | ins_next2 - |.endif - | - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arith addu, add.d, __adddf3, none - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arith subu, sub.d, __subdf3, none - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith mult, mul.d, __muldf3, none - break; - case BC_DIVVN: - | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z - break; - case BC_DIVNV: case BC_DIVVV: - | ins_arithpre ->BC_DIVVN_Z - break; - case BC_MODVN: - | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z - break; - case BC_MODNV: case BC_MODVV: - | ins_arithpre ->BC_MODVN_Z - break; - case BC_POW: - | ins_arithpre none - | lw SFARG1HI, HI(RB) - | lw SFARG2HI, HI(RC) - | sltiu AT, SFARG1HI, LJ_TISNUM - | sltiu TMP0, SFARG2HI, LJ_TISNUM - | and AT, AT, TMP0 - | load_got pow - | beqz AT, ->vmeta_arith - |. addu RA, BASE, RA - |.if FPU - | ldc1 FARG1, 0(RB) - | ldc1 FARG2, 0(RC) - |.else - | lw SFARG1LO, LO(RB) - | lw SFARG2LO, LO(RC) - |.endif - | call_extern - |. nop - | ins_next1 - |.if FPU - | sdc1 FRET1, 0(RA) - |.else - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - |.endif - | ins_next2 - break; - - case BC_CAT: - | // RA = dst*8, RB = src_start*8, RC = src_end*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | subu CARG3, RC, RB - | sw BASE, L->base - | addu CARG2, BASE, RC - | move MULTRES, RB - |->BC_CAT_Z: - | load_got lj_meta_cat - | srl CARG3, CARG3, 3 - | sw PC, SAVE_PC - | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left) - |. move CARG1, L - | // Returns NULL (finished) or TValue * (metamethod). - | bnez CRET1, ->vmeta_binop - |. lw BASE, L->base - | addu RB, BASE, MULTRES - | lw SFRETHI, HI(RB) - | lw SFRETLO, LO(RB) - | addu RA, BASE, RA - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst*8, RD = str_const*8 (~) - | srl TMP1, RD, 1 - | subu TMP1, KBASE, TMP1 - | ins_next1 - | lw TMP0, -4(TMP1) // KBASE-4-str_const*4 - | addu RA, BASE, RA - | li TMP2, LJ_TSTR - | sw TMP0, LO(RA) - | sw TMP2, HI(RA) - | ins_next2 - break; - case BC_KCDATA: - |.if FFI - | // RA = dst*8, RD = cdata_const*8 (~) - | srl TMP1, RD, 1 - | subu TMP1, KBASE, TMP1 - | ins_next1 - | lw TMP0, -4(TMP1) // KBASE-4-cdata_const*4 - | addu RA, BASE, RA - | li TMP2, LJ_TCDATA - | sw TMP0, LO(RA) - | sw TMP2, HI(RA) - | ins_next2 - |.endif - break; - case BC_KSHORT: - | // RA = dst*8, RD = int16_literal*8 - | sra RD, INS, 16 - | addu RA, BASE, RA - | ins_next1 - | sw TISNUM, HI(RA) - | sw RD, LO(RA) - | ins_next2 - break; - case BC_KNUM: - | // RA = dst*8, RD = num_const*8 - | addu RD, KBASE, RD - | addu RA, BASE, RA - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - break; - case BC_KPRI: - | // RA = dst*8, RD = primitive_type*8 (~) - | srl TMP1, RD, 3 - | addu RA, BASE, RA - | not TMP0, TMP1 - | ins_next1 - | sw TMP0, HI(RA) - | ins_next2 - break; - case BC_KNIL: - | // RA = base*8, RD = end*8 - | addu RA, BASE, RA - | sw TISNIL, HI(RA) - | addiu RA, RA, 8 - | addu RD, BASE, RD - |1: - | sw TISNIL, HI(RA) - | slt AT, RA, RD - | bnez AT, <1 - |. addiu RA, RA, 8 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst*8, RD = uvnum*8 - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RD, RD, 1 - | addu RD, RD, LFUNC:RB - | lw UPVAL:RB, LFUNC:RD->uvptr - | ins_next1 - | lw TMP1, UPVAL:RB->v - | lw SFRETHI, HI(TMP1) - | lw SFRETLO, LO(TMP1) - | addu RA, BASE, RA - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - break; - case BC_USETV: - | // RA = uvnum*8, RD = src*8 - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RA, RA, 1 - | addu RD, BASE, RD - | addu RA, RA, LFUNC:RB - | lw UPVAL:RB, LFUNC:RA->uvptr - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - | lbu TMP3, UPVAL:RB->marked - | lw CARG2, UPVAL:RB->v - | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbu TMP0, UPVAL:RB->closed - | sw SFRETHI, HI(CARG2) - | sw SFRETLO, LO(CARG2) - | li AT, LJ_GC_BLACK|1 - | or TMP3, TMP3, TMP0 - | beq TMP3, AT, >2 // Upvalue is closed and black? - |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1) - |1: - | ins_next - | - |2: // Check if new value is collectable. - | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) - | beqz AT, <1 // tvisgcv(v) - |. nop - | lbu TMP3, GCOBJ:SFRETLO->gch.marked - | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) - | beqz TMP3, <1 - |. load_got lj_gc_barrieruv - | // Crossed a write barrier. Move the barrier forward. - | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) - |. addiu CARG1, DISPATCH, GG_DISP2G - | b <1 - |. nop - break; - case BC_USETS: - | // RA = uvnum*8, RD = str_const*8 (~) - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RA, RA, 1 - | srl TMP1, RD, 1 - | addu RA, RA, LFUNC:RB - | subu TMP1, KBASE, TMP1 - | lw UPVAL:RB, LFUNC:RA->uvptr - | lw STR:TMP1, -4(TMP1) // KBASE-4-str_const*4 - | lbu TMP2, UPVAL:RB->marked - | lw CARG2, UPVAL:RB->v - | lbu TMP3, STR:TMP1->marked - | andi AT, TMP2, LJ_GC_BLACK // isblack(uv) - | lbu TMP2, UPVAL:RB->closed - | li TMP0, LJ_TSTR - | sw STR:TMP1, LO(CARG2) - | bnez AT, >2 - |. sw TMP0, HI(CARG2) - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | beqz TMP2, <1 - |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str) - | beqz AT, <1 - |. load_got lj_gc_barrieruv - | // Crossed a write barrier. Move the barrier forward. - | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) - |. addiu CARG1, DISPATCH, GG_DISP2G - | b <1 - |. nop - break; - case BC_USETN: - | // RA = uvnum*8, RD = num_const*8 - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RA, RA, 1 - | addu RD, KBASE, RD - | addu RA, RA, LFUNC:RB - | lw UPVAL:RB, LFUNC:RA->uvptr - | lw SFRETHI, HI(RD) - | lw SFRETLO, LO(RD) - | lw TMP1, UPVAL:RB->v - | ins_next1 - | sw SFRETHI, HI(TMP1) - | sw SFRETLO, LO(TMP1) - | ins_next2 - break; - case BC_USETP: - | // RA = uvnum*8, RD = primitive_type*8 (~) - | lw LFUNC:RB, FRAME_FUNC(BASE) - | srl RA, RA, 1 - | srl TMP0, RD, 3 - | addu RA, RA, LFUNC:RB - | not TMP0, TMP0 - | lw UPVAL:RB, LFUNC:RA->uvptr - | ins_next1 - | lw TMP1, UPVAL:RB->v - | sw TMP0, HI(TMP1) - | ins_next2 - break; - - case BC_UCLO: - | // RA = level*8, RD = target - | lw TMP2, L->openupval - | branch_RD // Do this first since RD is not saved. - | load_got lj_func_closeuv - | sw BASE, L->base - | beqz TMP2, >1 - |. move CARG1, L - | call_intern lj_func_closeuv // (lua_State *L, TValue *level) - |. addu CARG2, BASE, RA - | lw BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) - | srl TMP1, RD, 1 - | load_got lj_func_newL_gc - | subu TMP1, KBASE, TMP1 - | lw CARG3, FRAME_FUNC(BASE) - | lw CARG2, -4(TMP1) // KBASE-4-tab_const*4 - | sw BASE, L->base - | sw PC, SAVE_PC - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | call_intern lj_func_newL_gc - |. move CARG1, L - | // Returns GCfuncL *. - | lw BASE, L->base - | li TMP0, LJ_TFUNC - | ins_next1 - | addu RA, BASE, RA - | sw LFUNC:CRET1, LO(RA) - | sw TMP0, HI(RA) - | ins_next2 - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) - | lw TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | lw TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | sw BASE, L->base - | sw PC, SAVE_PC - | sltu AT, TMP0, TMP1 - | beqz AT, >5 - |1: - if (op == BC_TNEW) { - | load_got lj_tab_new - | srl CARG2, RD, 3 - | andi CARG2, CARG2, 0x7ff - | li TMP0, 0x801 - | addiu AT, CARG2, -0x7ff - | srl CARG3, RD, 14 - | movz CARG2, TMP0, AT - | // (lua_State *L, int32_t asize, uint32_t hbits) - | call_intern lj_tab_new - |. move CARG1, L - | // Returns Table *. - } else { - | load_got lj_tab_dup - | srl TMP1, RD, 1 - | subu TMP1, KBASE, TMP1 - | move CARG1, L - | call_intern lj_tab_dup // (lua_State *L, Table *kt) - |. lw CARG2, -4(TMP1) // KBASE-4-str_const*4 - | // Returns Table *. - } - | lw BASE, L->base - | ins_next1 - | addu RA, BASE, RA - | li TMP0, LJ_TTAB - | sw TAB:CRET1, LO(RA) - | sw TMP0, HI(RA) - | ins_next2 - |5: - | load_got lj_gc_step_fixtop - | move MULTRES, RD - | call_intern lj_gc_step_fixtop // (lua_State *L) - |. move CARG1, L - | b <1 - |. move RD, MULTRES - break; - - case BC_GGET: - | // RA = dst*8, RD = str_const*8 (~) - case BC_GSET: - | // RA = src*8, RD = str_const*8 (~) - | lw LFUNC:TMP2, FRAME_FUNC(BASE) - | srl TMP1, RD, 1 - | subu TMP1, KBASE, TMP1 - | lw TAB:RB, LFUNC:TMP2->env - | lw STR:RC, -4(TMP1) // KBASE-4-str_const*4 - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - |. addu RA, BASE, RA - break; - - case BC_TGETV: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | addu CARG2, BASE, RB - | addu CARG3, BASE, RC - | lw TMP1, HI(CARG2) - | lw TMP2, HI(CARG3) - | lw TAB:RB, LO(CARG2) - | li AT, LJ_TTAB - | bne TMP1, AT, ->vmeta_tgetv - |. addu RA, BASE, RA - | bne TMP2, TISNUM, >5 - |. lw RC, LO(CARG3) - | lw TMP0, TAB:RB->asize - | lw TMP1, TAB:RB->array - | sltu AT, RC, TMP0 - | sll TMP2, RC, 3 - | beqz AT, ->vmeta_tgetv // Integer key and in array part? - |. addu TMP2, TMP1, TMP2 - | lw SFRETHI, HI(TMP2) - | beq SFRETHI, TISNIL, >2 - |. lw SFRETLO, LO(TMP2) - |1: - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - | - |2: // Check for __index if table value is nil. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_tgetv - |. nop - | - |5: - | li AT, LJ_TSTR - | bne TMP2, AT, ->vmeta_tgetv - |. nop - | b ->BC_TGETS_Z // String key? - |. nop - break; - case BC_TGETS: - | // RA = dst*8, RB = table*8, RC = str_const*4 (~) - | decode_RB8a RB, INS - | decode_RB8b RB - | addu CARG2, BASE, RB - | decode_RC4a RC, INS - | lw TMP0, HI(CARG2) - | decode_RC4b RC - | li AT, LJ_TTAB - | lw TAB:RB, LO(CARG2) - | subu CARG3, KBASE, RC - | lw STR:RC, -4(CARG3) // KBASE-4-str_const*4 - | bne TMP0, AT, ->vmeta_tgets1 - |. addu RA, BASE, RA - |->BC_TGETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | lw NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |1: - | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) - | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) - | lw NODE:TMP1, NODE:TMP2->next - | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2) - | addiu CARG1, CARG1, -LJ_TSTR - | xor TMP0, TMP0, STR:RC - | or AT, CARG1, TMP0 - | bnez AT, >4 - |. lw TAB:TMP3, TAB:RB->metatable - | beq SFRETHI, TISNIL, >5 // Key found, but nil value? - |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2) - |3: - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - | - |4: // Follow hash chain. - | bnez NODE:TMP1, <1 - |. move NODE:TMP2, NODE:TMP1 - | // End of hash chain: key not found, nil result. - | - |5: // Check for __index if table value is nil. - | beqz TAB:TMP3, <3 // No metatable: done. - |. li SFRETHI, LJ_TNIL - | lbu TMP0, TAB:TMP3->nomm - | andi TMP0, TMP0, 1<vmeta_tgets - |. nop - break; - case BC_TGETB: - | // RA = dst*8, RB = table*8, RC = index*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | addu CARG2, BASE, RB - | decode_RDtoRC8 RC, RD - | lw CARG1, HI(CARG2) - | li AT, LJ_TTAB - | lw TAB:RB, LO(CARG2) - | addu RA, BASE, RA - | bne CARG1, AT, ->vmeta_tgetb - |. srl TMP0, RC, 3 - | lw TMP1, TAB:RB->asize - | lw TMP2, TAB:RB->array - | sltu AT, TMP0, TMP1 - | beqz AT, ->vmeta_tgetb - |. addu RC, TMP2, RC - | lw SFRETHI, HI(RC) - | beq SFRETHI, TISNIL, >5 - |. lw SFRETLO, LO(RC) - |1: - | ins_next1 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | ins_next2 - | - |5: // Check for __index if table value is nil. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP1, TAB:TMP2->nomm - | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! - |. nop - break; - case BC_TGETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | addu RB, BASE, RB - | addu RC, BASE, RC - | lw TAB:CARG1, LO(RB) - | lw CARG2, LO(RC) - | addu RA, BASE, RA - | lw TMP0, TAB:CARG1->asize - | lw TMP1, TAB:CARG1->array - | sltu AT, CARG2, TMP0 - | sll TMP2, CARG2, 3 - | beqz AT, ->vmeta_tgetr // In array part? - |. addu CRET1, TMP1, TMP2 - | lw SFARG2HI, HI(CRET1) - | lw SFARG2LO, LO(CRET1) - |->BC_TGETR_Z: - | ins_next1 - | sw SFARG2HI, HI(RA) - | sw SFARG2LO, LO(RA) - | ins_next2 - break; - - case BC_TSETV: - | // RA = src*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | addu CARG2, BASE, RB - | addu CARG3, BASE, RC - | lw TMP1, HI(CARG2) - | lw TMP2, HI(CARG3) - | lw TAB:RB, LO(CARG2) - | li AT, LJ_TTAB - | bne TMP1, AT, ->vmeta_tsetv - |. addu RA, BASE, RA - | bne TMP2, TISNUM, >5 - |. lw RC, LO(CARG3) - | lw TMP0, TAB:RB->asize - | lw TMP1, TAB:RB->array - | sltu AT, RC, TMP0 - | sll TMP2, RC, 3 - | beqz AT, ->vmeta_tsetv // Integer key and in array part? - |. addu TMP1, TMP1, TMP2 - | lw TMP0, HI(TMP1) - | lbu TMP3, TAB:RB->marked - | lw SFRETHI, HI(RA) - | beq TMP0, TISNIL, >3 - |. lw SFRETLO, LO(RA) - |1: - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | sw SFRETHI, HI(TMP1) - | bnez AT, >7 - |. sw SFRETLO, LO(TMP1) - |2: - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP2, TAB:TMP2->nomm - | andi TMP2, TMP2, 1<vmeta_tsetv - |. nop - | - |5: - | li AT, LJ_TSTR - | bne TMP2, AT, ->vmeta_tsetv - |. nop - | b ->BC_TSETS_Z // String key? - |. nop - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 - break; - case BC_TSETS: - | // RA = src*8, RB = table*8, RC = str_const*8 (~) - | decode_RB8a RB, INS - | decode_RB8b RB - | addu CARG2, BASE, RB - | decode_RC4a RC, INS - | lw TMP0, HI(CARG2) - | decode_RC4b RC - | li AT, LJ_TTAB - | subu CARG3, KBASE, RC - | lw TAB:RB, LO(CARG2) - | lw STR:RC, -4(CARG3) // KBASE-4-str_const*4 - | bne TMP0, AT, ->vmeta_tsets1 - |. addu RA, BASE, RA - |->BC_TSETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | lw NODE:TMP2, TAB:RB->node - | sb r0, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |.if FPU - | ldc1 f20, 0(RA) - |.else - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - |.endif - |1: - | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) - | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) - | li AT, LJ_TSTR - | lw NODE:TMP1, NODE:TMP2->next - | bne CARG1, AT, >5 - |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) - | bne TMP0, STR:RC, >5 - |. lbu TMP3, TAB:RB->marked - | beq CARG2, TISNIL, >4 // Key found, but nil value? - |. lw TAB:TMP0, TAB:RB->metatable - |2: - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - |.if FPU - | bnez AT, >7 - |. sdc1 f20, NODE:TMP2->val - |.else - | sw SFRETHI, NODE:TMP2->val.u32.hi - | bnez AT, >7 - |. sw SFRETLO, NODE:TMP2->val.u32.lo - |.endif - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | beqz TAB:TMP0, <2 // No metatable: done. - |. nop - | lbu TMP0, TAB:TMP0->nomm - | andi TMP0, TMP0, 1<vmeta_tsets - |. nop - | - |5: // Follow hash chain. - | bnez NODE:TMP1, <1 - |. move NODE:TMP2, NODE:TMP1 - | // End of hash chain: key not found, add a new one - | - | // But check for __newindex first. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, >6 // No metatable: continue. - |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |. li AT, LJ_TSTR - |6: - | load_got lj_tab_newkey - | sw STR:RC, LO(CARG3) - | sw AT, HI(CARG3) - | sw BASE, L->base - | move CARG2, TAB:RB - | sw PC, SAVE_PC - | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k - |. move CARG1, L - | // Returns TValue *. - | lw BASE, L->base - |.if FPU - | b <3 // No 2nd write barrier needed. - |. sdc1 f20, 0(CRET1) - |.else - | lw SFARG1HI, HI(RA) - | lw SFARG1LO, LO(RA) - | sw SFARG1HI, HI(CRET1) - | b <3 // No 2nd write barrier needed. - |. sw SFARG1LO, LO(CRET1) - |.endif - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <3 - break; - case BC_TSETB: - | // RA = src*8, RB = table*8, RC = index*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | addu CARG2, BASE, RB - | decode_RDtoRC8 RC, RD - | lw CARG1, HI(CARG2) - | li AT, LJ_TTAB - | lw TAB:RB, LO(CARG2) - | addu RA, BASE, RA - | bne CARG1, AT, ->vmeta_tsetb - |. srl TMP0, RC, 3 - | lw TMP1, TAB:RB->asize - | lw TMP2, TAB:RB->array - | sltu AT, TMP0, TMP1 - | beqz AT, ->vmeta_tsetb - |. addu RC, TMP2, RC - | lw TMP1, HI(RC) - | lbu TMP3, TAB:RB->marked - | beq TMP1, TISNIL, >5 - |1: - |. lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | sw SFRETHI, HI(RC) - | bnez AT, >7 - |. sw SFRETLO, LO(RC) - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | lw TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP1, TAB:TMP2->nomm - | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! - |. nop - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 - break; - case BC_TSETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | addu CARG1, BASE, RB - | addu CARG3, BASE, RC - | lw TAB:CARG2, LO(CARG1) - | lw CARG3, LO(CARG3) - | lbu TMP3, TAB:CARG2->marked - | lw TMP0, TAB:CARG2->asize - | lw TMP1, TAB:CARG2->array - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |. addu RA, BASE, RA - |2: - | sltu AT, CARG3, TMP0 - | sll TMP2, CARG3, 3 - | beqz AT, ->vmeta_tsetr // In array part? - |. addu CRET1, TMP1, TMP2 - |->BC_TSETR_Z: - | lw SFARG1HI, HI(RA) - | lw SFARG1LO, LO(RA) - | ins_next1 - | sw SFARG1HI, HI(CRET1) - | sw SFARG1LO, LO(CRET1) - | ins_next2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0, <2 - break; - - case BC_TSETM: - | // RA = base*8 (table at base-1), RD = num_const*8 (start index) - | addu RA, BASE, RA - |1: - | addu TMP3, KBASE, RD - | lw TAB:CARG2, -8+LO(RA) // Guaranteed to be a table. - | addiu TMP0, MULTRES, -8 - | lw TMP3, LO(TMP3) // Integer constant is in lo-word. - | beqz TMP0, >4 // Nothing to copy? - |. srl CARG3, TMP0, 3 - | addu CARG3, CARG3, TMP3 - | lw TMP2, TAB:CARG2->asize - | sll TMP1, TMP3, 3 - | lbu TMP3, TAB:CARG2->marked - | lw CARG1, TAB:CARG2->array - | sltu AT, TMP2, CARG3 - | bnez AT, >5 - |. addu TMP2, RA, TMP0 - | addu TMP1, TMP1, CARG1 - | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) - |3: // Copy result slots to table. - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu RA, RA, 8 - | sltu AT, RA, TMP2 - | sw SFRETHI, HI(TMP1) - | sw SFRETLO, LO(TMP1) - | bnez AT, <3 - |. addiu TMP1, TMP1, 8 - | bnez TMP0, >7 - |. nop - |4: - | ins_next - | - |5: // Need to resize array part. - | load_got lj_tab_reasize - | sw BASE, L->base - | sw PC, SAVE_PC - | move BASE, RD - | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - |. move CARG1, L - | // Must not reallocate the stack. - | move RD, BASE - | b <1 - |. lw BASE, L->base // Reload BASE for lack of a saved register. - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0, <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 - | decode_RDtoRC8 NARGS8:RC, RD - | b ->BC_CALL_Z - |. addu NARGS8:RC, NARGS8:RC, MULTRES - break; - case BC_CALL: - | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 - | decode_RDtoRC8 NARGS8:RC, RD - |->BC_CALL_Z: - | move TMP2, BASE - | addu BASE, BASE, RA - | li AT, LJ_TFUNC - | lw TMP0, HI(BASE) - | lw LFUNC:RB, LO(BASE) - | addiu BASE, BASE, 8 - | bne TMP0, AT, ->vmeta_call - |. addiu NARGS8:RC, NARGS8:RC, -8 - | ins_call - break; - - case BC_CALLMT: - | // RA = base*8, (RB = 0,) RC = extra_nargs*8 - | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD. - | // Fall through. Assumes BC_CALLT follows. - break; - case BC_CALLT: - | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - | addu RA, BASE, RA - | li AT, LJ_TFUNC - | lw TMP0, HI(RA) - | lw LFUNC:RB, LO(RA) - | move NARGS8:RC, RD - | lw TMP1, FRAME_PC(BASE) - | addiu RA, RA, 8 - | bne TMP0, AT, ->vmeta_callt - |. addiu NARGS8:RC, NARGS8:RC, -8 - |->BC_CALLT_Z: - | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. - | lbu TMP3, LFUNC:RB->ffid - | bnez TMP0, >7 - |. xori TMP2, TMP1, FRAME_VARG - |1: - | sw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. - | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function? - | move TMP2, BASE - | beqz NARGS8:RC, >3 - |. move TMP3, NARGS8:RC - |2: - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu RA, RA, 8 - | addiu TMP3, TMP3, -8 - | sw SFRETHI, HI(TMP2) - | sw SFRETLO, LO(TMP2) - | bnez TMP3, <2 - |. addiu TMP2, TMP2, 8 - |3: - | or TMP0, TMP0, AT - | beqz TMP0, >5 - |. nop - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | lw INS, -4(TMP1) - | decode_RA8a RA, INS - | decode_RA8b RA - | subu TMP1, BASE, RA - | lw LFUNC:TMP1, -8+FRAME_FUNC(TMP1) - | lw TMP1, LFUNC:TMP1->pc - | b <4 - |. lw KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. - | - |7: // Tailcall from a vararg function. - | andi AT, TMP2, FRAME_TYPEP - | bnez AT, <1 // Vararg frame below? - |. subu TMP2, BASE, TMP2 // Relocate BASE down. - | move BASE, TMP2 - | lw TMP1, FRAME_PC(TMP2) - | b <1 - |. andi TMP0, TMP1, FRAME_TYPE - break; - - case BC_ITERC: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) - | move TMP2, BASE - | addu BASE, BASE, RA - | li AT, LJ_TFUNC - | lw TMP1, -24+HI(BASE) - | lw LFUNC:RB, -24+LO(BASE) - | lw SFARG1HI, -16+HI(BASE) - | lw SFARG1LO, -16+LO(BASE) - | lw SFARG2HI, -8+HI(BASE) - | lw SFARG2LO, -8+LO(BASE) - | sw TMP1, HI(BASE) // Copy callable. - | sw LFUNC:RB, LO(BASE) - | sw SFARG1HI, 8+HI(BASE) // Copy state. - | sw SFARG1LO, 8+LO(BASE) - | sw SFARG2HI, 16+HI(BASE) // Copy control var. - | sw SFARG2LO, 16+LO(BASE) - | addiu BASE, BASE, 8 - | bne TMP1, AT, ->vmeta_call - |. li NARGS8:RC, 16 // Iterators get 2 arguments. - | ins_call - break; - - case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | addu RA, BASE, RA - | lw TAB:RB, -16+LO(RA) - | lw RC, -8+LO(RA) // Get index from control var. - | lw TMP0, TAB:RB->asize - | lw TMP1, TAB:RB->array - | addiu PC, PC, 4 - |1: // Traverse array part. - | sltu AT, RC, TMP0 - | beqz AT, >5 // Index points after array part? - |. sll TMP3, RC, 3 - | addu TMP3, TMP1, TMP3 - | lw SFARG1HI, HI(TMP3) - | lw SFARG1LO, LO(TMP3) - | lhu RD, -4+OFS_RD(PC) - | sw TISNUM, HI(RA) - | sw RC, LO(RA) - | beq SFARG1HI, TISNIL, <1 // Skip holes in array part. - |. addiu RC, RC, 1 - | sw SFARG1HI, 8+HI(RA) - | sw SFARG1LO, 8+LO(RA) - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | decode_RD4b RD - | addu RD, RD, TMP3 - | sw RC, -8+LO(RA) // Update control var. - | addu PC, PC, RD - |3: - | ins_next - | - |5: // Traverse hash part. - | lw TMP1, TAB:RB->hmask - | subu RC, RC, TMP0 - | lw TMP2, TAB:RB->node - |6: - | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1. - | bnez AT, <3 - |. sll TMP3, RC, 5 - | sll RB, RC, 3 - | subu TMP3, TMP3, RB - | addu NODE:TMP3, TMP3, TMP2 - | lw SFARG1HI, NODE:TMP3->val.u32.hi - | lw SFARG1LO, NODE:TMP3->val.u32.lo - | lhu RD, -4+OFS_RD(PC) - | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part. - |. addiu RC, RC, 1 - | lw SFARG2HI, NODE:TMP3->key.u32.hi - | lw SFARG2LO, NODE:TMP3->key.u32.lo - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sw SFARG1HI, 8+HI(RA) - | sw SFARG1LO, 8+LO(RA) - | addu RC, RC, TMP0 - | decode_RD4b RD - | addu RD, RD, TMP3 - | sw SFARG2HI, HI(RA) - | sw SFARG2LO, LO(RA) - | addu PC, PC, RD - | b <3 - |. sw RC, -8+LO(RA) // Update control var. - break; - - case BC_ISNEXT: - | // RA = base*8, RD = target (points to ITERN) - | addu RA, BASE, RA - | srl TMP0, RD, 1 - | lw CARG1, -24+HI(RA) - | lw CFUNC:CARG2, -24+LO(RA) - | addu TMP0, PC, TMP0 - | lw CARG3, -16+HI(RA) - | lw CARG4, -8+HI(RA) - | li AT, LJ_TFUNC - | bne CARG1, AT, >5 - |. lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) - | lbu CARG2, CFUNC:CARG2->ffid - | addiu CARG3, CARG3, -LJ_TTAB - | addiu CARG4, CARG4, -LJ_TNIL - | or CARG3, CARG3, CARG4 - | addiu CARG2, CARG2, -FF_next_N - | or CARG2, CARG2, CARG3 - | bnez CARG2, >5 - |. lui TMP1, 0xfffe - | addu PC, TMP0, TMP2 - | ori TMP1, TMP1, 0x7fff - | sw r0, -8+LO(RA) // Initialize control var. - | sw TMP1, -8+HI(RA) - |1: - | ins_next - |5: // Despecialize bytecode if any of the checks fail. - | li TMP3, BC_JMP - | li TMP1, BC_ITERC - | sb TMP3, -4+OFS_OP(PC) - | addu PC, TMP0, TMP2 - | b <1 - |. sb TMP1, OFS_OP(PC) - break; - - case BC_VARG: - | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 - | lw TMP0, FRAME_PC(BASE) - | decode_RDtoRC8 RC, RD - | decode_RB8a RB, INS - | addu RC, BASE, RC - | decode_RB8b RB - | addu RA, BASE, RA - | addiu RC, RC, FRAME_VARG - | addu TMP2, RA, RB - | addiu TMP3, BASE, -8 // TMP3 = vtop - | subu RC, RC, TMP0 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | beqz RB, >5 // Copy all varargs? - |. subu TMP1, TMP3, RC - | addiu TMP2, TMP2, -16 - |1: // Copy vararg slots to destination slots. - | lw CARG1, HI(RC) - | sltu AT, RC, TMP3 - | lw CARG2, LO(RC) - | addiu RC, RC, 8 - | movz CARG1, TISNIL, AT - | sw CARG1, HI(RA) - | sw CARG2, LO(RA) - | sltu AT, RA, TMP2 - | bnez AT, <1 - |. addiu RA, RA, 8 - |3: - | ins_next - | - |5: // Copy all varargs. - | lw TMP0, L->maxstack - | blez TMP1, <3 // No vararg slots? - |. li MULTRES, 8 // MULTRES = (0+1)*8 - | addu TMP2, RA, TMP1 - | sltu AT, TMP0, TMP2 - | bnez AT, >7 - |. addiu MULTRES, TMP1, 8 - |6: - | lw SFRETHI, HI(RC) - | lw SFRETLO, LO(RC) - | addiu RC, RC, 8 - | sw SFRETHI, HI(RA) - | sw SFRETLO, LO(RA) - | sltu AT, RC, TMP3 - | bnez AT, <6 // More vararg slots? - |. addiu RA, RA, 8 - | b <3 - |. nop - | - |7: // Grow stack for varargs. - | load_got lj_state_growstack - | sw RA, L->top - | subu RA, RA, BASE - | sw BASE, L->base - | subu BASE, RC, BASE // Need delta, because BASE may change. - | sw PC, SAVE_PC - | srl CARG2, TMP1, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | move RC, BASE - | lw BASE, L->base - | addu RA, BASE, RA - | addu RC, BASE, RC - | b <6 - |. addiu TMP3, BASE, -8 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results*8, RD = extra_nresults*8 - | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. - | // Fall through. Assumes BC_RET follows. - break; - - case BC_RET: - | // RA = results*8, RD = (nresults+1)*8 - | lw PC, FRAME_PC(BASE) - | addu RA, BASE, RA - | move MULTRES, RD - |1: - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->BC_RETV_Z - |. xori TMP1, PC, FRAME_VARG - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return - | lw INS, -4(PC) - | addiu TMP2, BASE, -8 - | addiu RC, RD, -8 - | decode_RA8a TMP0, INS - | decode_RB8a RB, INS - | decode_RA8b TMP0 - | decode_RB8b RB - | addu TMP3, TMP2, RB - | beqz RC, >3 - |. subu BASE, TMP2, TMP0 - |2: - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - | addiu RA, RA, 8 - | addiu RC, RC, -8 - | sw SFRETHI, HI(TMP2) - | sw SFRETLO, LO(TMP2) - | bnez RC, <2 - |. addiu TMP2, TMP2, 8 - |3: - | addiu TMP3, TMP3, -8 - |5: - | sltu AT, TMP2, TMP3 - | bnez AT, >6 - |. lw LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | lw TMP1, LFUNC:TMP1->pc - | lw KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | sw TISNIL, HI(TMP2) - | b <5 - |. addiu TMP2, TMP2, 8 - | - |->BC_RETV_Z: // Non-standard return case. - | andi TMP2, TMP1, FRAME_TYPEP - | bnez TMP2, ->vm_return - |. nop - | // Return from vararg function: relocate BASE down. - | subu BASE, BASE, TMP1 - | b <1 - |. lw PC, FRAME_PC(BASE) - break; - - case BC_RET0: case BC_RET1: - | // RA = results*8, RD = (nresults+1)*8 - | lw PC, FRAME_PC(BASE) - | addu RA, BASE, RA - | move MULTRES, RD - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->BC_RETV_Z - |. xori TMP1, PC, FRAME_VARG - | - | lw INS, -4(PC) - | addiu TMP2, BASE, -8 - if (op == BC_RET1) { - | lw SFRETHI, HI(RA) - | lw SFRETLO, LO(RA) - } - | decode_RB8a RB, INS - | decode_RA8a RA, INS - | decode_RB8b RB - | decode_RA8b RA - if (op == BC_RET1) { - | sw SFRETHI, HI(TMP2) - | sw SFRETLO, LO(TMP2) - } - | subu BASE, TMP2, RA - |5: - | sltu AT, RD, RB - | bnez AT, >6 - |. lw LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | lw TMP1, LFUNC:TMP1->pc - | lw KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | addiu TMP2, TMP2, 8 - | addiu RD, RD, 8 - | b <5 - if (op == BC_RET1) { - |. sw TISNIL, HI(TMP2) - } else { - |. sw TISNIL, -8+HI(TMP2) - } - break; - - /* -- Loops and branches ------------------------------------------------ */ - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base*8, RD = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - | addu RA, BASE, RA - | lw SFARG1HI, FORL_IDX*8+HI(RA) - | lw SFARG1LO, FORL_IDX*8+LO(RA) - if (op != BC_JFORL) { - | srl RD, RD, 1 - | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, RD, TMP2 - } - if (!vk) { - | lw SFARG2HI, FORL_STOP*8+HI(RA) - | lw SFARG2LO, FORL_STOP*8+LO(RA) - | bne SFARG1HI, TISNUM, >5 - |. lw SFRETHI, FORL_STEP*8+HI(RA) - | xor AT, SFARG2HI, TISNUM - | lw SFRETLO, FORL_STEP*8+LO(RA) - | xor TMP0, SFRETHI, TISNUM - | or AT, AT, TMP0 - | bnez AT, ->vmeta_for - |. slt AT, SFRETLO, r0 - | slt CRET1, SFARG2LO, SFARG1LO - | slt TMP1, SFARG1LO, SFARG2LO - | movn CRET1, TMP1, AT - } else { - | bne SFARG1HI, TISNUM, >5 - |. lw SFARG2LO, FORL_STEP*8+LO(RA) - | lw SFRETLO, FORL_STOP*8+LO(RA) - | move TMP3, SFARG1LO - | addu SFARG1LO, SFARG1LO, SFARG2LO - | xor TMP0, SFARG1LO, TMP3 - | xor TMP1, SFARG1LO, SFARG2LO - | and TMP0, TMP0, TMP1 - | slt TMP1, SFARG1LO, SFRETLO - | slt CRET1, SFRETLO, SFARG1LO - | slt AT, SFARG2LO, r0 - | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. - | movn CRET1, TMP1, AT - | or CRET1, CRET1, TMP0 - } - |1: - if (op == BC_FORI) { - | movz TMP2, r0, CRET1 - | addu PC, PC, TMP2 - } else if (op == BC_JFORI) { - | addu PC, PC, TMP2 - | lhu RD, -4+OFS_RD(PC) - } else if (op == BC_IFORL) { - | movn TMP2, r0, CRET1 - | addu PC, PC, TMP2 - } - if (vk) { - | sw SFARG1HI, FORL_IDX*8+HI(RA) - | sw SFARG1LO, FORL_IDX*8+LO(RA) - } - | ins_next1 - | sw SFARG1HI, FORL_EXT*8+HI(RA) - | sw SFARG1LO, FORL_EXT*8+LO(RA) - |2: - if (op == BC_JFORI) { - | beqz CRET1, =>BC_JLOOP - |. decode_RD8b RD - } else if (op == BC_JFORL) { - | beqz CRET1, =>BC_JLOOP - } - | ins_next2 - | - |5: // FP loop. - |.if FPU - if (!vk) { - | ldc1 f0, FORL_IDX*8(RA) - | ldc1 f2, FORL_STOP*8(RA) - | sltiu TMP0, SFARG1HI, LJ_TISNUM - | sltiu TMP1, SFARG2HI, LJ_TISNUM - | sltiu AT, SFRETHI, LJ_TISNUM - | and TMP0, TMP0, TMP1 - | and AT, AT, TMP0 - | beqz AT, ->vmeta_for - |. slt TMP3, SFRETHI, r0 - | c.ole.d 0, f0, f2 - | c.ole.d 1, f2, f0 - | li CRET1, 1 - | movt CRET1, r0, 0 - | movt AT, r0, 1 - | b <1 - |. movn CRET1, AT, TMP3 - } else { - | ldc1 f0, FORL_IDX*8(RA) - | ldc1 f4, FORL_STEP*8(RA) - | ldc1 f2, FORL_STOP*8(RA) - | lw SFARG2HI, FORL_STEP*8+HI(RA) - | add.d f0, f0, f4 - | c.ole.d 0, f0, f2 - | c.ole.d 1, f2, f0 - | slt TMP3, SFARG2HI, r0 - | li CRET1, 1 - | li AT, 1 - | movt CRET1, r0, 0 - | movt AT, r0, 1 - | movn CRET1, AT, TMP3 - if (op == BC_IFORL) { - | movn TMP2, r0, CRET1 - | addu PC, PC, TMP2 - } - | sdc1 f0, FORL_IDX*8(RA) - | ins_next1 - | b <2 - |. sdc1 f0, FORL_EXT*8(RA) - } - |.else - if (!vk) { - | sltiu TMP0, SFARG1HI, LJ_TISNUM - | sltiu TMP1, SFARG2HI, LJ_TISNUM - | sltiu AT, SFRETHI, LJ_TISNUM - | and TMP0, TMP0, TMP1 - | and AT, AT, TMP0 - | beqz AT, ->vmeta_for - |. nop - | bal ->vm_sfcmpolex - |. move TMP3, SFRETHI - | b <1 - |. nop - } else { - | lw SFARG2HI, FORL_STEP*8+HI(RA) - | load_got __adddf3 - | call_extern - |. sw TMP2, ARG5 - | lw SFARG2HI, FORL_STOP*8+HI(RA) - | lw SFARG2LO, FORL_STOP*8+LO(RA) - | move SFARG1HI, SFRETHI - | move SFARG1LO, SFRETLO - | bal ->vm_sfcmpolex - |. lw TMP3, FORL_STEP*8+HI(RA) - if ( op == BC_JFORL ) { - | lhu RD, -4+OFS_RD(PC) - | lw TMP2, ARG5 - | b <1 - |. decode_RD8b RD - } else { - | b <1 - |. lw TMP2, ARG5 - } - } - |.endif - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base*8, RD = target - | addu RA, BASE, RA - | lw TMP1, HI(RA) - | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. - |. lw TMP2, LO(RA) - if (op == BC_JITERL) { - | sw TMP1, -8+HI(RA) - | b =>BC_JLOOP - |. sw TMP2, -8+LO(RA) - } else { - | branch_RD // Otherwise save control var + branch. - | sw TMP1, -8+HI(RA) - | sw TMP2, -8+LO(RA) - } - |1: - | ins_next - break; - - case BC_LOOP: - | // RA = base*8, RD = target (loop extent) - | // Note: RA/RD is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base*8, RD = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base*8 (ignored), RD = traceno*8 - | lw TMP1, DISPATCH_J(trace)(DISPATCH) - | srl RD, RD, 1 - | li AT, 0 - | addu TMP1, TMP1, RD - | // Traces on MIPS don't store the trace number, so use 0. - | sw AT, DISPATCH_GL(vmstate)(DISPATCH) - | lw TRACE:TMP2, 0(TMP1) - | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) - | lw TMP2, TRACE:TMP2->mcode - | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH) - | jr TMP2 - |. addiu JGL, DISPATCH, GG_DISP2G+32768 - |.endif - break; - - case BC_JMP: - | // RA = base*8 (only used by trace recorder), RD = target - | branch_RD - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | lw TMP2, L->maxstack - | lbu TMP1, -4+PC2PROTO(numparams)(PC) - | lw KBASE, -4+PC2PROTO(k)(PC) - | sltu AT, TMP2, RA - | bnez AT, ->vm_growstack_l - |. sll TMP1, TMP1, 3 - if (op != BC_JFUNCF) { - | ins_next1 - } - |2: - | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters. - | bnez AT, >3 - |. addu AT, BASE, NARGS8:RC - if (op == BC_JFUNCF) { - | decode_RD8a RD, INS - | b =>BC_JLOOP - |. decode_RD8b RD - } else { - | ins_next2 - } - | - |3: // Clear missing parameters. - | sw TISNIL, HI(AT) - | b <2 - |. addiu NARGS8:RC, NARGS8:RC, 8 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | addu TMP1, BASE, RC - | lw TMP2, L->maxstack - | addu TMP0, RA, RC - | sw LFUNC:RB, LO(TMP1) // Store copy of LFUNC. - | addiu TMP3, RC, 8+FRAME_VARG - | sltu AT, TMP0, TMP2 - | lw KBASE, -4+PC2PROTO(k)(PC) - | beqz AT, ->vm_growstack_l - |. sw TMP3, HI(TMP1) // Store delta + FRAME_VARG. - | lbu TMP2, -4+PC2PROTO(numparams)(PC) - | move RA, BASE - | move RC, TMP1 - | ins_next1 - | beqz TMP2, >3 - |. addiu BASE, TMP1, 8 - |1: - | lw TMP0, HI(RA) - | lw TMP3, LO(RA) - | sltu AT, RA, RC // Less args than parameters? - | move CARG1, TMP0 - | movz TMP0, TISNIL, AT // Clear missing parameters. - | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). - | sw TMP3, 8+LO(TMP1) - | addiu TMP2, TMP2, -1 - | sw TMP0, 8+HI(TMP1) - | addiu TMP1, TMP1, 8 - | sw CARG1, HI(RA) - | bnez TMP2, <1 - |. addiu RA, RA, 8 - |3: - | ins_next2 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | lw CFUNCADDR, CFUNC:RB->f - } else { - | lw CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH) - } - | addu TMP1, RA, NARGS8:RC - | lw TMP2, L->maxstack - | addu RC, BASE, NARGS8:RC - | sw BASE, L->base - | sltu AT, TMP2, TMP1 - | sw RC, L->top - | li_vmstate C - if (op == BC_FUNCCW) { - | lw CARG2, CFUNC:RB->f - } - | bnez AT, ->vm_growstack_c // Need to grow stack. - |. move CARG1, L - | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f]) - |. st_vmstate - | // Returns nresults. - | lw BASE, L->base - | sll RD, CRET1, 3 - | lw TMP1, L->top - | li_vmstate INTERP - | lw PC, FRAME_PC(BASE) // Fetch PC of caller. - | subu RA, TMP1, RD // RA = L->top - nresults*8 - | sw L, DISPATCH_GL(cur_L)(DISPATCH) - | b ->vm_returnc - |. st_vmstate - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.4byte .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.4byte 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 31\n" - "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.4byte .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.4byte .Lframe0\n" - "\t.4byte .Lbegin\n" - "\t.4byte %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x9f\n\t.sleb128 1\n" - "\t.byte 0x9e\n\t.sleb128 2\n", - fcofs, CFRAME_SIZE); - for (i = 23; i >= 16; i--) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); -#if !LJ_SOFTFP - for (i = 30; i >= 20; i -= 2) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); -#endif - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.4byte .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.4byte .Lframe0\n" - "\t.4byte lj_vm_ffi_call\n" - "\t.4byte %d\n" - "\t.byte 0x9f\n\t.uleb128 1\n" - "\t.byte 0x90\n\t.uleb128 2\n" - "\t.byte 0xd\n\t.uleb128 0x10\n" - "\t.align 2\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif -#if !LJ_NO_UNWIND - fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n"); - fprintf(ctx->fp, - "\t.globl lj_err_unwind_dwarf\n" - ".Lframe1:\n" - "\t.4byte .LECIE1-.LSCIE1\n" - ".LSCIE1:\n" - "\t.4byte 0\n" - "\t.byte 0x1\n" - "\t.string \"zPR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 31\n" - "\t.uleb128 6\n" /* augmentation length */ - "\t.byte 0\n" - "\t.4byte lj_err_unwind_dwarf\n" - "\t.byte 0\n" - "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE1:\n\n"); - fprintf(ctx->fp, - ".LSFDE2:\n" - "\t.4byte .LEFDE2-.LASFDE2\n" - ".LASFDE2:\n" - "\t.4byte .LASFDE2-.Lframe1\n" - "\t.4byte .Lbegin\n" - "\t.4byte %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x9f\n\t.sleb128 1\n" - "\t.byte 0x9e\n\t.sleb128 2\n", - fcofs, CFRAME_SIZE); - for (i = 23; i >= 16; i--) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); -#if !LJ_SOFTFP - for (i = 30; i >= 20; i -= 2) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); -#endif - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE2:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".Lframe2:\n" - "\t.4byte .LECIE2-.LSCIE2\n" - ".LSCIE2:\n" - "\t.4byte 0\n" - "\t.byte 0x1\n" - "\t.string \"zR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 31\n" - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0\n" - "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE2:\n\n"); - fprintf(ctx->fp, - ".LSFDE3:\n" - "\t.4byte .LEFDE3-.LASFDE3\n" - ".LASFDE3:\n" - "\t.4byte .LASFDE3-.Lframe2\n" - "\t.4byte lj_vm_ffi_call\n" - "\t.4byte %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0x9f\n\t.uleb128 1\n" - "\t.byte 0x90\n\t.uleb128 2\n" - "\t.byte 0xd\n\t.uleb128 0x10\n" - "\t.align 2\n" - ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -#endif -#endif - break; - default: - break; - } -} - diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc deleted file mode 100644 index f0c22a74df..0000000000 --- a/src/vm_mips64.dasc +++ /dev/null @@ -1,5062 +0,0 @@ -|// Low-level VM code for MIPS64 CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -|// -|// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. -|// Sponsored by Cisco Systems, Inc. -| -|.arch mips64 -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// Fixed register assignments for the interpreter. -|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra -| -|.macro .FPU, a, b -|.if FPU -| a, b -|.endif -|.endmacro -| -|// The following must be C callee-save (but BASE is often refetched). -|.define BASE, r16 // Base of current Lua stack frame. -|.define KBASE, r17 // Constants of current Lua function. -|.define PC, r18 // Next PC. -|.define DISPATCH, r19 // Opcode dispatch table. -|.define LREG, r20 // Register holding lua_State (also in SAVE_L). -|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. -| -|.define JGL, r30 // On-trace: global_State + 32768. -| -|// Constants for type-comparisons, stores and conversions. C callee-save. -|.define TISNIL, r30 -|.define TISNUM, r22 -|.if FPU -|.define TOBIT, f30 // 2^52 + 2^51. -|.endif -| -|// The following temporaries are not saved across C calls, except for RA. -|.define RA, r23 // Callee-save. -|.define RB, r8 -|.define RC, r9 -|.define RD, r10 -|.define INS, r11 -| -|.define AT, r1 // Assembler temporary. -|.define TMP0, r12 -|.define TMP1, r13 -|.define TMP2, r14 -|.define TMP3, r15 -| -|// MIPS n64 calling convention. -|.define CFUNCADDR, r25 -|.define CARG1, r4 -|.define CARG2, r5 -|.define CARG3, r6 -|.define CARG4, r7 -|.define CARG5, r8 -|.define CARG6, r9 -|.define CARG7, r10 -|.define CARG8, r11 -| -|.define CRET1, r2 -|.define CRET2, r3 -| -|.if FPU -|.define FARG1, f12 -|.define FARG2, f13 -|.define FARG3, f14 -|.define FARG4, f15 -|.define FARG5, f16 -|.define FARG6, f17 -|.define FARG7, f18 -|.define FARG8, f19 -| -|.define FRET1, f0 -|.define FRET2, f2 -|.endif -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|.if FPU // MIPS64 hard-float. -| -|.define CFRAME_SPACE, 192 // Delta for sp. -| -|//----- 16 byte aligned, <-- sp entering interpreter -|.define SAVE_ERRF, 188(sp) // 32 bit values. -|.define SAVE_NRES, 184(sp) -|.define SAVE_CFRAME, 176(sp) // 64 bit values. -|.define SAVE_L, 168(sp) -|.define SAVE_PC, 160(sp) -|//----- 16 byte aligned -|.define SAVE_GPR_, 80 // .. 80+10*8: 64 bit GPR saves. -|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves. -| -|.else // MIPS64 soft-float -| -|.define CFRAME_SPACE, 128 // Delta for sp. -| -|//----- 16 byte aligned, <-- sp entering interpreter -|.define SAVE_ERRF, 124(sp) // 32 bit values. -|.define SAVE_NRES, 120(sp) -|.define SAVE_CFRAME, 112(sp) // 64 bit values. -|.define SAVE_L, 104(sp) -|.define SAVE_PC, 96(sp) -|//----- 16 byte aligned -|.define SAVE_GPR_, 16 // .. 16+10*8: 64 bit GPR saves. -| -|.endif -| -|.define TMPX, 8(sp) // Unused by interpreter, temp for JIT code. -|.define TMPD, 0(sp) -|//----- 16 byte aligned -| -|.define TMPD_OFS, 0 -| -|.define SAVE_MULTRES, TMPD -| -|//----------------------------------------------------------------------- -| -|.macro saveregs -| daddiu sp, sp, -CFRAME_SPACE -| sd ra, SAVE_GPR_+9*8(sp) -| sd r30, SAVE_GPR_+8*8(sp) -| .FPU sdc1 f31, SAVE_FPR_+7*8(sp) -| sd r23, SAVE_GPR_+7*8(sp) -| .FPU sdc1 f30, SAVE_FPR_+6*8(sp) -| sd r22, SAVE_GPR_+6*8(sp) -| .FPU sdc1 f29, SAVE_FPR_+5*8(sp) -| sd r21, SAVE_GPR_+5*8(sp) -| .FPU sdc1 f28, SAVE_FPR_+4*8(sp) -| sd r20, SAVE_GPR_+4*8(sp) -| .FPU sdc1 f27, SAVE_FPR_+3*8(sp) -| sd r19, SAVE_GPR_+3*8(sp) -| .FPU sdc1 f26, SAVE_FPR_+2*8(sp) -| sd r18, SAVE_GPR_+2*8(sp) -| .FPU sdc1 f25, SAVE_FPR_+1*8(sp) -| sd r17, SAVE_GPR_+1*8(sp) -| .FPU sdc1 f24, SAVE_FPR_+0*8(sp) -| sd r16, SAVE_GPR_+0*8(sp) -|.endmacro -| -|.macro restoreregs_ret -| ld ra, SAVE_GPR_+9*8(sp) -| ld r30, SAVE_GPR_+8*8(sp) -| ld r23, SAVE_GPR_+7*8(sp) -| .FPU ldc1 f31, SAVE_FPR_+7*8(sp) -| ld r22, SAVE_GPR_+6*8(sp) -| .FPU ldc1 f30, SAVE_FPR_+6*8(sp) -| ld r21, SAVE_GPR_+5*8(sp) -| .FPU ldc1 f29, SAVE_FPR_+5*8(sp) -| ld r20, SAVE_GPR_+4*8(sp) -| .FPU ldc1 f28, SAVE_FPR_+4*8(sp) -| ld r19, SAVE_GPR_+3*8(sp) -| .FPU ldc1 f27, SAVE_FPR_+3*8(sp) -| ld r18, SAVE_GPR_+2*8(sp) -| .FPU ldc1 f26, SAVE_FPR_+2*8(sp) -| ld r17, SAVE_GPR_+1*8(sp) -| .FPU ldc1 f25, SAVE_FPR_+1*8(sp) -| ld r16, SAVE_GPR_+0*8(sp) -| .FPU ldc1 f24, SAVE_FPR_+0*8(sp) -| jr ra -| daddiu sp, sp, CFRAME_SPACE -|.endmacro -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; .long 0xf0f0f0f0; .endmacro -| -|// Macros to mark delay slots. -|.macro ., a; a; .endmacro -|.macro ., a,b; a,b; .endmacro -|.macro ., a,b,c; a,b,c; .endmacro -|.macro ., a,b,c,d; a,b,c,d; .endmacro -| -|.define FRAME_PC, -8 -|.define FRAME_FUNC, -16 -| -|//----------------------------------------------------------------------- -| -|// Endian-specific defines. -|.if ENDIAN_LE -|.define HI, 4 -|.define LO, 0 -|.define OFS_RD, 2 -|.define OFS_RA, 1 -|.define OFS_OP, 0 -|.else -|.define HI, 0 -|.define LO, 4 -|.define OFS_RD, 0 -|.define OFS_RA, 2 -|.define OFS_OP, 3 -|.endif -| -|// Instruction decode. -|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro -|.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro -|.macro decode_OP8b, dst; sll dst, dst, 3; .endmacro -|.macro decode_RC8a, dst, ins; srl dst, ins, 13; .endmacro -|.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro -|.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro -|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro -|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro -|.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro -|.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro -|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro -| -|// Instruction fetch. -|.macro ins_NEXT1 -| lw INS, 0(PC) -| daddiu PC, PC, 4 -|.endmacro -|// Instruction decode+dispatch. -|.macro ins_NEXT2 -| decode_OP8a TMP1, INS -| decode_OP8b TMP1 -| daddu TMP0, DISPATCH, TMP1 -| decode_RD8a RD, INS -| ld AT, 0(TMP0) -| decode_RA8a RA, INS -| decode_RD8b RD -| jr AT -| decode_RA8b RA -|.endmacro -|.macro ins_NEXT -| ins_NEXT1 -| ins_NEXT2 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -| .define ins_next1, ins_NEXT1 -| .define ins_next2, ins_NEXT2 -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next1 -| .endmacro -| .macro ins_next2 -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| ld PC, LFUNC:RB->pc -| lw INS, 0(PC) -| daddiu PC, PC, 4 -| decode_OP8a TMP1, INS -| decode_RA8a RA, INS -| decode_OP8b TMP1 -| decode_RA8b RA -| daddu TMP0, DISPATCH, TMP1 -| ld TMP0, 0(TMP0) -| jr TMP0 -| daddu RA, RA, BASE -|.endmacro -| -|.macro ins_call -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| sd PC, FRAME_PC(BASE) -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|.macro branch_RD -| srl TMP0, RD, 1 -| lui AT, (-(BCBIAS_J*4 >> 16) & 65535) -| addu TMP0, TMP0, AT -| daddu PC, PC, TMP0 -|.endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) -#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro load_got, func -| ld CFUNCADDR, DISPATCH_GOT(func)(DISPATCH) -|.endmacro -|// Much faster. Sadly, there's no easy way to force the required code layout. -|// .macro call_intern, func; bal extern func; .endmacro -|.macro call_intern, func; jalr CFUNCADDR; .endmacro -|.macro call_extern; jalr CFUNCADDR; .endmacro -|.macro jmp_extern; jr CFUNCADDR; .endmacro -| -|.macro hotcheck, delta, target -| dsrl TMP1, PC, 1 -| andi TMP1, TMP1, 126 -| daddu TMP1, TMP1, DISPATCH -| lhu TMP2, GG_DISP2HOT(TMP1) -| addiu TMP2, TMP2, -delta -| bltz TMP2, target -|. sh TMP2, GG_DISP2HOT(TMP1) -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP, ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL, ->vm_hotcall -|.endmacro -| -|// Set current VM state. Uses TMP0. -|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro -|.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp, target -| ld tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) -| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) -| sd tab, DISPATCH_GL(gc.grayagain)(DISPATCH) -| sb mark, tab->marked -| b target -|. sd tmp, tab->gclist -|.endmacro -| -|// Clear type tag. Isolate lowest 14+32+1=47 bits of reg. -|.macro cleartp, reg; dextm reg, reg, 0, 14; .endmacro -|.macro cleartp, dst, reg; dextm dst, reg, 0, 14; .endmacro -| -|// Set type tag: Merge 17 type bits into bits [15+32=47, 31+32+1=64) of dst. -|.macro settp, dst, tp; dinsu dst, tp, 15, 31; .endmacro -| -|// Extract (negative) type tag. -|.macro gettp, dst, src; dsra dst, src, 47; .endmacro -| -|// Macros to check the TValue type and extract the GCobj. Branch on failure. -|.macro checktp, reg, tp, target -| gettp AT, reg -| daddiu AT, AT, tp -| bnez AT, target -|. cleartp reg -|.endmacro -|.macro checktp, dst, reg, tp, target -| gettp AT, reg -| daddiu AT, AT, tp -| bnez AT, target -|. cleartp dst, reg -|.endmacro -|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro -|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro -|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro -|.macro checkint, reg, target // Caveat: has delay slot! -| gettp AT, reg -| bne AT, TISNUM, target -|.endmacro -|.macro checknum, reg, target // Caveat: has delay slot! -| gettp AT, reg -| sltiu AT, AT, LJ_TISNUM -| beqz AT, target -|.endmacro -| -|.macro mov_false, reg -| lu reg, 0x8000 -| dsll reg, reg, 32 -| not reg, reg -|.endmacro -|.macro mov_true, reg -| li reg, 0x0001 -| dsll reg, reg, 48 -| not reg, reg -|.endmacro -| -|//----------------------------------------------------------------------- - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: TMP2 = previous base. - | andi AT, PC, FRAME_P - | beqz AT, ->cont_dispatch - | - | // Return from pcall or xpcall fast func. - |. mov_true TMP1 - | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. - | move BASE, TMP2 // Restore caller base. - | // Prepending may overwrite the pcall frame, so do it at the end. - | sd TMP1, -8(RA) // Prepend true to results. - | daddiu RA, RA, -8 - | - |->vm_returnc: - | addiu RD, RD, 8 // RD = (nresults+1)*8. - | andi TMP0, PC, FRAME_TYPE - | beqz RD, ->vm_unwind_c_eh - |. li CRET1, LUA_YIELD - | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. - |. move MULTRES, RD - | - |->vm_return: - | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return - | // TMP0 = PC & FRAME_TYPE - | li TMP2, -8 - | xori AT, TMP0, FRAME_C - | and TMP2, PC, TMP2 - | bnez AT, ->vm_returnp - | dsubu TMP2, BASE, TMP2 // TMP2 = previous base. - | - | addiu TMP1, RD, -8 - | sd TMP2, L->base - | li_vmstate C - | lw TMP2, SAVE_NRES - | daddiu BASE, BASE, -16 - | st_vmstate - | beqz TMP1, >2 - |. sll TMP2, TMP2, 3 - |1: - | addiu TMP1, TMP1, -8 - | ld CRET1, 0(RA) - | daddiu RA, RA, 8 - | sd CRET1, 0(BASE) - | bnez TMP1, <1 - |. daddiu BASE, BASE, 8 - | - |2: - | bne TMP2, RD, >6 - |3: - |. sd BASE, L->top // Store new top. - | - |->vm_leave_cp: - | ld TMP0, SAVE_CFRAME // Restore previous C frame. - | move CRET1, r0 // Ok return status for vm_pcall. - | sd TMP0, L->cframe - | - |->vm_leave_unw: - | restoreregs_ret - | - |6: - | ld TMP1, L->maxstack - | slt AT, TMP2, RD - | bnez AT, >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - |. slt AT, BASE, TMP1 - | beqz AT, >8 - |. nop - | sd TISNIL, 0(BASE) - | addiu RD, RD, 8 - | b <2 - |. daddiu BASE, BASE, 8 - | - |7: // Less results wanted. - | subu TMP0, RD, TMP2 - | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it. - | b <3 - |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | load_got lj_state_growstack - | move MULTRES, RD - | srl CARG2, TMP2, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | lw TMP2, SAVE_NRES - | ld BASE, L->top // Need the (realloced) L->top in BASE. - | move RD, MULTRES - | b <2 - |. sll TMP2, TMP2, 3 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | move sp, CARG1 - | move CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | ld L, SAVE_L - | li TMP0, ~LJ_VMST_C - | ld GL:TMP1, L->glref - | b ->vm_leave_unw - |. sw TMP0, GL:TMP1->vmstate - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - | li AT, -4 - | and sp, CARG1, AT - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | ld L, SAVE_L - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | li TISNIL, LJ_TNIL - | li TISNUM, LJ_TISNUM - | ld BASE, L->base - | ld DISPATCH, L->glref // Setup pointer to dispatch table. - | .FPU mtc1 TMP3, TOBIT - | mov_false TMP1 - | li_vmstate INTERP - | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame. - | .FPU cvt.d.s TOBIT, TOBIT - | daddiu RA, BASE, -8 // Results start at BASE-8. - | daddiu DISPATCH, DISPATCH, GG_G2DISP - | sd TMP1, 0(RA) // Prepend false to error message. - | st_vmstate - | b ->vm_returnc - |. li RD, 16 // 2 results: false + error message. - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | b >2 - |. li CARG2, LUA_MINSTACK - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | daddu RC, BASE, RC - | dsubu RA, RA, BASE - | sd BASE, L->base - | daddiu PC, PC, 4 // Must point after first instruction. - | sd RC, L->top - | srl CARG2, RA, 3 - |2: - | // L->base = new base, L->top = top - | load_got lj_state_growstack - | sd PC, SAVE_PC - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | ld BASE, L->base - | ld RC, L->top - | ld LFUNC:RB, FRAME_FUNC(BASE) - | dsubu RC, RC, BASE - | cleartp LFUNC:RB - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | move L, CARG1 - | ld DISPATCH, L->glref // Setup pointer to dispatch table. - | move BASE, CARG2 - | lbu TMP1, L->status - | sd L, SAVE_L - | li PC, FRAME_CP - | daddiu TMP0, sp, CFRAME_RESUME - | daddiu DISPATCH, DISPATCH, GG_G2DISP - | sw r0, SAVE_NRES - | sw r0, SAVE_ERRF - | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sd r0, SAVE_CFRAME - | beqz TMP1, >3 - |. sd TMP0, L->cframe - | - | // Resume after yield (like a return). - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | move RA, BASE - | ld BASE, L->base - | ld TMP1, L->top - | ld PC, FRAME_PC(BASE) - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | dsubu RD, TMP1, BASE - | .FPU mtc1 TMP3, TOBIT - | sb r0, L->status - | .FPU cvt.d.s TOBIT, TOBIT - | li_vmstate INTERP - | daddiu RD, RD, 8 - | st_vmstate - | move MULTRES, RD - | andi TMP0, PC, FRAME_TYPE - | li TISNIL, LJ_TNIL - | beqz TMP0, ->BC_RET_Z - |. li TISNUM, LJ_TISNUM - | b ->vm_return - |. nop - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | sw CARG4, SAVE_ERRF - | b >1 - |. li PC, FRAME_CP - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | li PC, FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | ld TMP1, L:CARG1->cframe - | move L, CARG1 - | sw CARG3, SAVE_NRES - | ld DISPATCH, L->glref // Setup pointer to dispatch table. - | sd CARG1, SAVE_L - | move BASE, CARG2 - | daddiu DISPATCH, DISPATCH, GG_G2DISP - | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sd TMP1, SAVE_CFRAME - | sd sp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | ld TMP2, L->base // TMP2 = old base (used in vmeta_call). - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | ld TMP1, L->top - | .FPU mtc1 TMP3, TOBIT - | daddu PC, PC, BASE - | dsubu NARGS8:RC, TMP1, BASE - | li TISNUM, LJ_TISNUM - | dsubu PC, PC, TMP2 // PC = frame delta + frame type - | .FPU cvt.d.s TOBIT, TOBIT - | li_vmstate INTERP - | li TISNIL, LJ_TNIL - | st_vmstate - | - |->vm_call_dispatch: - | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC - | ld LFUNC:RB, FRAME_FUNC(BASE) - | checkfunc LFUNC:RB, ->vmeta_call - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | move L, CARG1 - | ld TMP0, L:CARG1->stack - | sd CARG1, SAVE_L - | ld TMP1, L->top - | ld DISPATCH, L->glref // Setup pointer to dispatch table. - | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. - | dsubu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). - | ld TMP1, L->cframe - | daddiu DISPATCH, DISPATCH, GG_G2DISP - | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. - | sw r0, SAVE_ERRF // No error function. - | sd TMP1, SAVE_CFRAME - | sd sp, L->cframe // Add our C frame to cframe chain. - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) - |. move CFUNCADDR, CARG4 - | move BASE, CRET1 - | bnez CRET1, <3 // Else continue with the call. - |. li PC, FRAME_CP - | b ->vm_leave_cp // No base? Just remove C frame. - |. nop - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the - |// stack, so BASE doesn't need to be reloaded across these calls. - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 - | ld TMP0, -32(BASE) // Continuation. - | move RB, BASE - | move BASE, TMP2 // Restore caller BASE. - | ld LFUNC:TMP1, FRAME_FUNC(TMP2) - |.if FFI - | sltiu AT, TMP0, 2 - |.endif - | ld PC, -24(RB) // Restore PC from [cont|PC]. - | cleartp LFUNC:TMP1 - | daddu TMP2, RA, RD - | ld TMP1, LFUNC:TMP1->pc - |.if FFI - | bnez AT, >1 - |.endif - |. sd TISNIL, -8(TMP2) // Ensure one valid arg. - | // BASE = base, RA = resultptr, RB = meta base - | jr TMP0 // Jump to continuation. - |. ld KBASE, PC2PROTO(k)(TMP1) - | - |.if FFI - |1: - | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - |. daddiu TMP1, RB, -32 - | b ->vm_call_tail - |. dsubu RC, TMP1, BASE - |.endif - | - |->cont_cat: // RA = resultptr, RB = meta base - | lw INS, -4(PC) - | daddiu CARG2, RB, -32 - | ld CRET1, 0(RA) - | decode_RB8a MULTRES, INS - | decode_RA8a RA, INS - | decode_RB8b MULTRES - | decode_RA8b RA - | daddu TMP1, BASE, MULTRES - | sd BASE, L->base - | dsubu CARG3, CARG2, TMP1 - | bne TMP1, CARG2, ->BC_CAT_Z - |. sd CRET1, 0(CARG2) - | daddu RA, BASE, RA - | b ->cont_nop - |. sd CRET1, 0(RA) - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TSTR - | settp STR:RC, TMP0 - | b >1 - |. sd STR:RC, 0(CARG3) - | - |->vmeta_tgets: - | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TTAB - | li TMP1, LJ_TSTR - | settp TAB:RB, TMP0 - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) - | sd TAB:RB, 0(CARG2) - | settp STR:RC, TMP1 - | b >1 - |. sd STR:RC, 0(CARG3) - | - |->vmeta_tgetb: // TMP0 = index - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | settp TMP0, TISNUM - | sd TMP0, 0(CARG3) - | - |->vmeta_tgetv: - |1: - | load_got lj_meta_tget - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - |. move CARG1, L - | // Returns TValue * (finished) or NULL (metamethod). - | beqz CRET1, >3 - |. daddiu TMP1, BASE, -FRAME_CONT - | ld CARG1, 0(CRET1) - | ins_next1 - | sd CARG1, 0(RA) - | ins_next2 - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | ld BASE, L->top - | sd PC, -24(BASE) // [cont|PC] - | dsubu PC, BASE, TMP1 - | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | cleartp LFUNC:RB - | b ->vm_call_dispatch_f - |. li NARGS8:RC, 16 // 2 args for func(t, k). - | - |->vmeta_tgetr: - | load_got lj_tab_getinth - | call_intern lj_tab_getinth // (GCtab *t, int32_t key) - |. nop - | // Returns cTValue * or NULL. - | beqz CRET1, ->BC_TGETR_Z - |. move CARG2, TISNIL - | b ->BC_TGETR_Z - |. ld CARG2, 0(CRET1) - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TSTR - | settp STR:RC, TMP0 - | b >1 - |. sd STR:RC, 0(CARG3) - | - |->vmeta_tsets: - | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | li TMP0, LJ_TTAB - | li TMP1, LJ_TSTR - | settp TAB:RB, TMP0 - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) - | sd TAB:RB, 0(CARG2) - | settp STR:RC, TMP1 - | b >1 - |. sd STR:RC, 0(CARG3) - | - |->vmeta_tsetb: // TMP0 = index - | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | settp TMP0, TISNUM - | sd TMP0, 0(CARG3) - | - |->vmeta_tsetv: - |1: - | load_got lj_meta_tset - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - |. move CARG1, L - | // Returns TValue * (finished) or NULL (metamethod). - | beqz CRET1, >3 - |. ld CARG1, 0(RA) - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | ins_next1 - | sd CARG1, 0(CRET1) - | ins_next2 - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | daddiu TMP1, BASE, -FRAME_CONT - | ld BASE, L->top - | sd PC, -24(BASE) // [cont|PC] - | dsubu PC, BASE, TMP1 - | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | cleartp LFUNC:RB - | sd CARG1, 16(BASE) // Copy value to third argument. - | b ->vm_call_dispatch_f - |. li NARGS8:RC, 24 // 3 args for func(t, k, v) - | - |->vmeta_tsetr: - | load_got lj_tab_setinth - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - |. move CARG1, L - | // Returns TValue *. - | b ->BC_TSETR_Z - |. nop - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | // RA/RD point to o1/o2. - | move CARG2, RA - | move CARG3, RD - | load_got lj_meta_comp - | daddiu PC, PC, -4 - | sd BASE, L->base - | sd PC, SAVE_PC - | decode_OP1 CARG4, INS - | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - |3: - | sltiu AT, CRET1, 2 - | beqz AT, ->vmeta_binop - | negu TMP2, CRET1 - |4: - | lhu RD, OFS_RD(PC) - | daddiu PC, PC, 4 - | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) - | sll RD, RD, 2 - | addu RD, RD, TMP1 - | and RD, RD, TMP2 - | daddu PC, PC, RD - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | lbu TMP1, -4+OFS_RA(PC) - | ld CRET1, 0(RA) - | sll TMP1, TMP1, 3 - | daddu TMP1, BASE, TMP1 - | b ->cont_nop - |. sd CRET1, 0(TMP1) - | - |->cont_condt: // RA = resultptr - | ld TMP0, 0(RA) - | gettp TMP0, TMP0 - | sltiu AT, TMP0, LJ_TISTRUECOND - | b <4 - |. negu TMP2, AT // Branch if result is true. - | - |->cont_condf: // RA = resultptr - | ld TMP0, 0(RA) - | gettp TMP0, TMP0 - | sltiu AT, TMP0, LJ_TISTRUECOND - | b <4 - |. addiu TMP2, AT, -1 // Branch if result is false. - | - |->vmeta_equal: - | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. - | load_got lj_meta_equal - | cleartp LFUNC:CARG3, CARG2 - | cleartp LFUNC:CARG2, CARG1 - | move CARG4, TMP0 - | daddiu PC, PC, -4 - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |. nop - | - |->vmeta_equal_cd: - |.if FFI - | load_got lj_meta_equal_cd - | move CARG2, INS - | daddiu PC, PC, -4 - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op) - |. move CARG1, L - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |. nop - |.endif - | - |->vmeta_istype: - | load_got lj_meta_istype - | daddiu PC, PC, -4 - | sd BASE, L->base - | srl CARG2, RA, 3 - | srl CARG3, RD, 3 - | sd PC, SAVE_PC - | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - |. move CARG1, L - | b ->cont_nop - |. nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_unm: - | move RC, RB - | - |->vmeta_arith: - | load_got lj_meta_arith - | sd BASE, L->base - | move CARG2, RA - | sd PC, SAVE_PC - | move CARG3, RB - | move CARG4, RC - | decode_OP1 CARG5, INS // CARG5 == RB. - | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - |. move CARG1, L - | // Returns NULL (finished) or TValue * (metamethod). - | beqz CRET1, ->cont_nop - |. nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | dsubu TMP1, CRET1, BASE - | sd PC, -24(CRET1) // [cont|PC] - | move TMP2, BASE - | daddiu PC, TMP1, FRAME_CONT - | move BASE, CRET1 - | b ->vm_call_dispatch - |. li NARGS8:RC, 16 // 2 args for func(o1, o2). - | - |->vmeta_len: - | // CARG2 already set by BC_LEN. -#if LJ_52 - | move MULTRES, CARG1 -#endif - | load_got lj_meta_len - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_meta_len // (lua_State *L, TValue *o) - |. move CARG1, L - | // Returns NULL (retry) or TValue * (metamethod base). -#if LJ_52 - | bnez CRET1, ->vmeta_binop // Binop call for compatibility. - |. nop - | b ->BC_LEN_Z - |. move CARG1, MULTRES -#else - | b ->vmeta_binop // Binop call for compatibility. - |. nop -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // TMP2 = old base, BASE = new base, RC = nargs*8 - | load_got lj_meta_call - | sd TMP2, L->base // This is the callers base! - | daddiu CARG2, BASE, -16 - | sd PC, SAVE_PC - | daddu CARG3, BASE, RC - | move MULTRES, NARGS8:RC - | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - |. move CARG1, L - | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. - | cleartp LFUNC:RB - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | load_got lj_meta_call - | sd BASE, L->base - | daddiu CARG2, RA, -16 - | sd PC, SAVE_PC - | daddu CARG3, RA, RC - | move MULTRES, NARGS8:RC - | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - |. move CARG1, L - | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here. - | ld TMP1, FRAME_PC(BASE) - | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. - | b ->BC_CALLT_Z - |. cleartp LFUNC:CARG3, RB - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | load_got lj_meta_for - | sd BASE, L->base - | move CARG2, RA - | sd PC, SAVE_PC - | move MULTRES, INS - | call_intern lj_meta_for // (lua_State *L, TValue *base) - |. move CARG1, L - |.if JIT - | decode_OP1 TMP0, MULTRES - | li AT, BC_JFORI - |.endif - | decode_RA8a RA, MULTRES - | decode_RD8a RD, MULTRES - | decode_RA8b RA - |.if JIT - | beq TMP0, AT, =>BC_JFORI - |. decode_RD8b RD - | b =>BC_FORI - |. nop - |.else - | b =>BC_FORI - |. decode_RD8b RD - |.endif - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | beqz NARGS8:RC, ->fff_fallback - |. ld CARG1, 0(BASE) - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | sltiu AT, NARGS8:RC, 16 - | ld CARG1, 0(BASE) - | bnez AT, ->fff_fallback - |. ld CARG2, 8(BASE) - |.endmacro - | - |.macro .ffunc_n, name // Caveat: has delay slot! - |->ff_ .. name: - | ld CARG1, 0(BASE) - | beqz NARGS8:RC, ->fff_fallback - | // Either ldc1 or the 1st instruction of checknum is in the delay slot. - | .FPU ldc1 FARG1, 0(BASE) - | checknum CARG1, ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name // Caveat: has delay slot! - |->ff_ .. name: - | ld CARG1, 0(BASE) - | sltiu AT, NARGS8:RC, 16 - | ld CARG2, 8(BASE) - | bnez AT, ->fff_fallback - |. gettp TMP0, CARG1 - | gettp TMP1, CARG2 - | sltiu TMP0, TMP0, LJ_TISNUM - | sltiu TMP1, TMP1, LJ_TISNUM - | .FPU ldc1 FARG1, 0(BASE) - | and TMP0, TMP0, TMP1 - | .FPU ldc1 FARG2, 8(BASE) - | beqz TMP0, ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! - |.macro ffgccheck - | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | dsubu AT, TMP0, TMP1 - | bgezal AT, ->fff_gcstep - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - |.ffunc_1 assert - | gettp AT, CARG1 - | sltiu AT, AT, LJ_TISTRUECOND - | beqz AT, ->fff_fallback - |. daddiu RA, BASE, -16 - | ld PC, FRAME_PC(BASE) - | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. - | daddu TMP2, RA, RD - | daddiu TMP1, BASE, 8 - | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. - |. sd CARG1, 0(RA) - |1: - | ld CRET1, 0(TMP1) - | sd CRET1, -16(TMP1) - | bne TMP1, TMP2, <1 - |. daddiu TMP1, TMP1, 8 - | b ->fff_res - |. nop - | - |.ffunc_1 type - | gettp TMP0, CARG1 - | sltu TMP1, TISNUM, TMP0 - | not TMP2, TMP0 - | li TMP3, ~LJ_TISNUM - | movz TMP2, TMP3, TMP1 - | dsll TMP2, TMP2, 3 - | daddu TMP2, CFUNC:RB, TMP2 - | b ->fff_restv - |. ld CARG1, CFUNC:TMP2->upvalue - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | gettp TMP2, CARG1 - | daddiu TMP0, TMP2, -LJ_TTAB - | daddiu TMP1, TMP2, -LJ_TUDATA - | movn TMP0, TMP1, TMP0 - | bnez TMP0, >6 - |. cleartp TAB:CARG1 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | ld TAB:RB, TAB:CARG1->metatable - |2: - | ld STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) - | beqz TAB:RB, ->fff_restv - |. li CARG1, LJ_TNIL - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | ld NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | dsll TMP0, TMP1, 5 - | dsll TMP1, TMP1, 3 - | dsubu TMP1, TMP0, TMP1 - | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - | li CARG4, LJ_TSTR - | settp STR:RC, CARG4 // Tagged key to look for. - |3: // Rearranged logic, because we expect _not_ to find the key. - | ld TMP0, NODE:TMP2->key - | ld CARG1, NODE:TMP2->val - | ld NODE:TMP2, NODE:TMP2->next - | beq RC, TMP0, >5 - |. li AT, LJ_TTAB - | bnez NODE:TMP2, <3 - |. nop - |4: - | move CARG1, RB - | b ->fff_restv // Not found, keep default result. - |. settp CARG1, AT - |5: - | bne CARG1, TISNIL, ->fff_restv - |. nop - | b <4 // Ditto for nil value. - |. nop - | - |6: - | sltiu AT, TMP2, LJ_TISNUM - | movn TMP2, TISNUM, AT - | dsll TMP2, TMP2, 3 - | dsubu TMP0, DISPATCH, TMP2 - | b <2 - |. ld TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0) - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback - | gettp TMP3, CARG2 - | ld TAB:TMP0, TAB:TMP1->metatable - | lbu TMP2, TAB:TMP1->marked - | daddiu AT, TMP3, -LJ_TTAB - | cleartp TAB:CARG2 - | or AT, AT, TAB:TMP0 - | bnez AT, ->fff_fallback - |. andi AT, TMP2, LJ_GC_BLACK // isblack(table) - | beqz AT, ->fff_restv - |. sd TAB:CARG2, TAB:TMP1->metatable - | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv - | - |.ffunc rawget - | ld CARG2, 0(BASE) - | sltiu AT, NARGS8:RC, 16 - | load_got lj_tab_get - | gettp TMP0, CARG2 - | cleartp CARG2 - | daddiu TMP0, TMP0, -LJ_TTAB - | or AT, AT, TMP0 - | bnez AT, ->fff_fallback - |. daddiu CARG3, BASE, 8 - | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - |. move CARG1, L - | b ->fff_restv - |. ld CARG1, 0(CRET1) - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | ld CARG1, 0(BASE) - | xori AT, NARGS8:RC, 8 // Exactly one number argument. - | gettp TMP1, CARG1 - | sltu TMP0, TISNUM, TMP1 - | or AT, AT, TMP0 - | bnez AT, ->fff_fallback - |. nop - | b ->fff_restv - |. nop - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | gettp TMP0, CARG1 - | daddiu AT, TMP0, -LJ_TSTR - | // A __tostring method in the string base metatable is ignored. - | beqz AT, ->fff_restv // String key? - | // Handle numbers inline, unless a number base metatable is present. - |. ld TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) - | sltu TMP0, TISNUM, TMP0 - | or TMP0, TMP0, TMP1 - | bnez TMP0, ->fff_fallback - |. sd BASE, L->base // Add frame since C call can throw. - | ffgccheck - |. sd PC, SAVE_PC // Redundant (but a defined value). - | load_got lj_strfmt_number - | move CARG1, L - | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) - |. move CARG2, BASE - | // Returns GCstr *. - | li AT, LJ_TSTR - | settp CRET1, AT - | b ->fff_restv - |. move CARG1, CRET1 - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc_1 next - | checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback - | daddu TMP2, BASE, NARGS8:RC - | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil. - | ld PC, FRAME_PC(BASE) - | load_got lj_tab_next - | sd BASE, L->base // Add frame since C call can throw. - | sd BASE, L->top // Dummy frame length is ok. - | daddiu CARG3, BASE, 8 - | sd PC, SAVE_PC - | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - |. move CARG1, L - | // Returns 0 at end of traversal. - | beqz CRET1, ->fff_restv // End of traversal: return nil. - |. move CARG1, TISNIL - | ld TMP0, 8(BASE) - | daddiu RA, BASE, -16 - | ld TMP2, 16(BASE) - | sd TMP0, 0(RA) - | sd TMP2, 8(RA) - | b ->fff_res - |. li RD, (2+1)*8 - | - |.ffunc_1 pairs - | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback - | ld PC, FRAME_PC(BASE) -#if LJ_52 - | ld TAB:TMP2, TAB:TMP1->metatable - | ld TMP0, CFUNC:RB->upvalue[0] - | bnez TAB:TMP2, ->fff_fallback -#else - | ld TMP0, CFUNC:RB->upvalue[0] -#endif - |. daddiu RA, BASE, -16 - | sd TISNIL, 0(BASE) - | sd CARG1, -8(BASE) - | sd TMP0, 0(RA) - | b ->fff_res - |. li RD, (3+1)*8 - | - |.ffunc_2 ipairs_aux - | checktab CARG1, ->fff_fallback - | checkint CARG2, ->fff_fallback - |. lw TMP0, TAB:CARG1->asize - | ld TMP1, TAB:CARG1->array - | ld PC, FRAME_PC(BASE) - | sextw TMP2, CARG2 - | addiu TMP2, TMP2, 1 - | sltu AT, TMP2, TMP0 - | daddiu RA, BASE, -16 - | zextw TMP0, TMP2 - | settp TMP0, TISNUM - | beqz AT, >2 // Not in array part? - |. sd TMP0, 0(RA) - | dsll TMP3, TMP2, 3 - | daddu TMP3, TMP1, TMP3 - | ld TMP1, 0(TMP3) - |1: - | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. - |. li RD, (0+1)*8 - | sd TMP1, -8(BASE) - | b ->fff_res - |. li RD, (2+1)*8 - |2: // Check for empty hash part first. Otherwise call C function. - | lw TMP0, TAB:CARG1->hmask - | load_got lj_tab_getinth - | beqz TMP0, ->fff_res - |. li RD, (0+1)*8 - | call_intern lj_tab_getinth // (GCtab *t, int32_t key) - |. move CARG2, TMP2 - | // Returns cTValue * or NULL. - | beqz CRET1, ->fff_res - |. li RD, (0+1)*8 - | b <1 - |. ld TMP1, 0(CRET1) - | - |.ffunc_1 ipairs - | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback - | ld PC, FRAME_PC(BASE) -#if LJ_52 - | ld TAB:TMP2, TAB:TMP1->metatable - | ld CFUNC:TMP0, CFUNC:RB->upvalue[0] - | bnez TAB:TMP2, ->fff_fallback -#else - | ld TMP0, CFUNC:RB->upvalue[0] -#endif - | daddiu RA, BASE, -16 - | dsll AT, TISNUM, 47 - | sd CARG1, -8(BASE) - | sd AT, 0(BASE) - | sd CFUNC:TMP0, 0(RA) - | b ->fff_res - |. li RD, (3+1)*8 - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | daddiu NARGS8:RC, NARGS8:RC, -8 - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | bltz NARGS8:RC, ->fff_fallback - |. move TMP2, BASE - | daddiu BASE, BASE, 16 - | // Remember active hook before pcall. - | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT - | andi TMP3, TMP3, 1 - | daddiu PC, TMP3, 16+FRAME_PCALL - | beqz NARGS8:RC, ->vm_call_dispatch - |1: - |. daddu TMP0, BASE, NARGS8:RC - |2: - | ld TMP1, -16(TMP0) - | sd TMP1, -8(TMP0) - | daddiu TMP0, TMP0, -8 - | bne TMP0, BASE, <2 - |. nop - | b ->vm_call_dispatch - |. nop - | - |.ffunc xpcall - | daddiu NARGS8:RC, NARGS8:RC, -16 - | ld CARG1, 0(BASE) - | ld CARG2, 8(BASE) - | bltz NARGS8:RC, ->fff_fallback - |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) - | gettp AT, CARG2 - | daddiu AT, AT, -LJ_TFUNC - | bnez AT, ->fff_fallback // Traceback must be a function. - |. move TMP2, BASE - | daddiu BASE, BASE, 24 - | // Remember active hook before pcall. - | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT - | sd CARG2, 0(TMP2) // Swap function and traceback. - | andi TMP3, TMP3, 1 - | sd CARG1, 8(TMP2) - | beqz NARGS8:RC, ->vm_call_dispatch - |. daddiu PC, TMP3, 24+FRAME_PCALL - | b <1 - |. nop - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | ld L:CARG1, CFUNC:RB->upvalue[0].gcr - | cleartp L:CARG1 - |.endif - | lbu TMP0, L:CARG1->status - | ld TMP1, L:CARG1->cframe - | ld CARG2, L:CARG1->top - | ld TMP2, L:CARG1->base - | addiu AT, TMP0, -LUA_YIELD - | daddu CARG3, CARG2, TMP0 - | daddiu TMP3, CARG2, 8 - | bgtz AT, ->fff_fallback // st > LUA_YIELD? - |. movn CARG2, TMP3, AT - | xor TMP2, TMP2, CARG3 - | bnez TMP1, ->fff_fallback // cframe != 0? - |. or AT, TMP2, TMP0 - | ld TMP0, L:CARG1->maxstack - | beqz AT, ->fff_fallback // base == top && st == 0? - |. ld PC, FRAME_PC(BASE) - | daddu TMP2, CARG2, NARGS8:RC - | sltu AT, TMP0, TMP2 - | bnez AT, ->fff_fallback // Stack overflow? - |. sd PC, SAVE_PC - | sd BASE, L->base - |1: - |.if resume - | daddiu BASE, BASE, 8 // Keep resumed thread in stack for GC. - | daddiu NARGS8:RC, NARGS8:RC, -8 - | daddiu TMP2, TMP2, -8 - |.endif - | sd TMP2, L:CARG1->top - | daddu TMP1, BASE, NARGS8:RC - | move CARG3, CARG2 - | sd BASE, L->top - |2: // Move args to coroutine. - | ld CRET1, 0(BASE) - | sltu AT, BASE, TMP1 - | beqz AT, >3 - |. daddiu BASE, BASE, 8 - | sd CRET1, 0(CARG3) - | b <2 - |. daddiu CARG3, CARG3, 8 - |3: - | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0) - |. move L:RA, L:CARG1 - | // Returns thread status. - |4: - | ld TMP2, L:RA->base - | sltiu AT, CRET1, LUA_YIELD+1 - | ld TMP3, L:RA->top - | li_vmstate INTERP - | ld BASE, L->base - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | st_vmstate - | beqz AT, >8 - |. dsubu RD, TMP3, TMP2 - | ld TMP0, L->maxstack - | beqz RD, >6 // No results? - |. daddu TMP1, BASE, RD - | sltu AT, TMP0, TMP1 - | bnez AT, >9 // Need to grow stack? - |. daddu TMP3, TMP2, RD - | sd TMP2, L:RA->top // Clear coroutine stack. - | move TMP1, BASE - |5: // Move results from coroutine. - | ld CRET1, 0(TMP2) - | daddiu TMP2, TMP2, 8 - | sltu AT, TMP2, TMP3 - | sd CRET1, 0(TMP1) - | bnez AT, <5 - |. daddiu TMP1, TMP1, 8 - |6: - | andi TMP0, PC, FRAME_TYPE - |.if resume - | mov_true TMP1 - | daddiu RA, BASE, -8 - | sd TMP1, -8(BASE) // Prepend true to results. - | daddiu RD, RD, 16 - |.else - | move RA, BASE - | daddiu RD, RD, 8 - |.endif - |7: - | sd PC, SAVE_PC - | beqz TMP0, ->BC_RET_Z - |. move MULTRES, RD - | b ->vm_return - |. nop - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | daddiu TMP3, TMP3, -8 - | mov_false TMP1 - | ld CRET1, 0(TMP3) - | sd TMP3, L:RA->top // Remove error from coroutine stack. - | li RD, (2+1)*8 - | sd TMP1, -8(BASE) // Prepend false to results. - | daddiu RA, BASE, -8 - | sd CRET1, 0(BASE) // Copy error message. - | b <7 - |. andi TMP0, PC, FRAME_TYPE - |.else - | load_got lj_ffh_coroutine_wrap_err - | move CARG2, L:RA - | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - |. move CARG1, L - |.endif - | - |9: // Handle stack expansion on return from yield. - | load_got lj_state_growstack - | srl CARG2, RD, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | b <4 - |. li CRET1, 0 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | ld TMP0, L->cframe - | daddu TMP1, BASE, NARGS8:RC - | sd BASE, L->base - | andi TMP0, TMP0, CFRAME_RESUME - | sd TMP1, L->top - | beqz TMP0, ->fff_fallback - |. li CRET1, LUA_YIELD - | sd r0, L->cframe - | b ->vm_leave_unw - |. sb CRET1, L->status - | - |//-- Math library ------------------------------------------------------- - | - |.ffunc_1 math_abs - | gettp CARG2, CARG1 - | daddiu AT, CARG2, -LJ_TISNUM - | bnez AT, >1 - |. sextw TMP1, CARG1 - | sra TMP0, TMP1, 31 // Extract sign. - | xor TMP1, TMP1, TMP0 - | dsubu CARG1, TMP1, TMP0 - | dsll TMP3, CARG1, 32 - | bgez TMP3, ->fff_restv - |. settp CARG1, TISNUM - | li CARG1, 0x41e0 // 2^31 as a double. - | b ->fff_restv - |. dsll CARG1, CARG1, 48 - |1: - | sltiu AT, CARG2, LJ_TISNUM - | beqz AT, ->fff_fallback - |. dextm CARG1, CARG1, 0, 30 - |// fallthrough - | - |->fff_restv: - | // CARG1 = TValue result. - | ld PC, FRAME_PC(BASE) - | daddiu RA, BASE, -16 - | sd CARG1, -16(BASE) - |->fff_res1: - | // RA = results, PC = return. - | li RD, (1+1)*8 - |->fff_res: - | // RA = results, RD = (nresults+1)*8, PC = return. - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->vm_return - |. move MULTRES, RD - | lw INS, -4(PC) - | decode_RB8a RB, INS - | decode_RB8b RB - |5: - | sltu AT, RD, RB - | bnez AT, >6 // More results expected? - |. decode_RA8a TMP0, INS - | decode_RA8b TMP0 - | ins_next1 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | dsubu BASE, RA, TMP0 - | ins_next2 - | - |6: // Fill up results with nil. - | daddu TMP1, RA, RD - | daddiu RD, RD, 8 - | b <5 - |. sd TISNIL, -8(TMP1) - | - |.macro math_extern, func - | .ffunc_n math_ .. func - | load_got func - | call_extern - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nn math_ .. func - |. load_got func - | call_extern - |. nop - | b ->fff_resn - |. nop - |.endmacro - | - |// TODO: Return integer type if result is integer (own sf implementation). - |.macro math_round, func - |->ff_math_ .. func: - | ld CARG1, 0(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. gettp TMP0, CARG1 - | beq TMP0, TISNUM, ->fff_restv - |. sltu AT, TMP0, TISNUM - | beqz AT, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - | bal ->vm_ .. func - |. nop - |.else - |. load_got func - | call_extern - |. nop - |.endif - | b ->fff_resn - |. nop - |.endmacro - | - | math_round floor - | math_round ceil - | - |.ffunc math_log - | li AT, 8 - | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. - |. ld CARG1, 0(BASE) - | checknum CARG1, ->fff_fallback - |. load_got log - |.if FPU - | call_extern - |. ldc1 FARG1, 0(BASE) - |.else - | call_extern - |. nop - |.endif - | b ->fff_resn - |. nop - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.if FPU - |.ffunc_n math_sqrt - |. sqrt.d FRET1, FARG1 - |// fallthrough to ->fff_resn - |.else - | math_extern sqrt - |.endif - | - |->fff_resn: - | ld PC, FRAME_PC(BASE) - | daddiu RA, BASE, -16 - | b ->fff_res1 - |.if FPU - |. sdc1 FRET1, 0(RA) - |.else - |. sd CRET1, 0(RA) - |.endif - | - | - |.ffunc_2 math_ldexp - | checknum CARG1, ->fff_fallback - | checkint CARG2, ->fff_fallback - |. load_got ldexp - | .FPU ldc1 FARG1, 0(BASE) - | call_extern - |. lw CARG2, 8+LO(BASE) - | b ->fff_resn - |. nop - | - |.ffunc_n math_frexp - | load_got frexp - | ld PC, FRAME_PC(BASE) - | call_extern - |. daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) - | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) - | daddiu RA, BASE, -16 - |.if FPU - | mtc1 TMP1, FARG2 - | sdc1 FRET1, 0(RA) - | cvt.d.w FARG2, FARG2 - | sdc1 FARG2, 8(RA) - |.else - | sd CRET1, 0(RA) - | zextw TMP1, TMP1 - | settp TMP1, TISNUM - | sd TMP1, 8(RA) - |.endif - | b ->fff_res - |. li RD, (2+1)*8 - | - |.ffunc_n math_modf - | load_got modf - | ld PC, FRAME_PC(BASE) - | call_extern - |. daddiu CARG2, BASE, -16 - | daddiu RA, BASE, -16 - |.if FPU - | sdc1 FRET1, -8(BASE) - |.else - | sd CRET1, -8(BASE) - |.endif - | b ->fff_res - |. li RD, (2+1)*8 - | - |.macro math_minmax, name, intins, fpins - | .ffunc_1 name - | daddu TMP3, BASE, NARGS8:RC - | checkint CARG1, >5 - |. daddiu TMP2, BASE, 8 - |1: // Handle integers. - | beq TMP2, TMP3, ->fff_restv - |. ld CARG2, 0(TMP2) - | checkint CARG2, >3 - |. sextw CARG1, CARG1 - | lw CARG2, LO(TMP2) - |. slt AT, CARG1, CARG2 - | intins CARG1, CARG2, AT - | daddiu TMP2, TMP2, 8 - | zextw CARG1, CARG1 - | b <1 - |. settp CARG1, TISNUM - | - |3: // Convert intermediate result to number and continue with number loop. - | checknum CARG2, ->fff_fallback - |.if FPU - |. mtc1 CARG1, FRET1 - | cvt.d.w FRET1, FRET1 - | b >7 - |. ldc1 FARG1, 0(TMP2) - |.else - |. nop - | bal ->vm_sfi2d_1 - |. nop - | b >7 - |. nop - |.endif - | - |5: - | .FPU ldc1 FRET1, 0(BASE) - | checknum CARG1, ->fff_fallback - |6: // Handle numbers. - |. ld CARG2, 0(TMP2) - | beq TMP2, TMP3, ->fff_resn - |.if FPU - | ldc1 FARG1, 0(TMP2) - |.else - | move CRET1, CARG1 - |.endif - | checknum CARG2, >8 - |. nop - |7: - |.if FPU - | c.olt.d FRET1, FARG1 - | fpins FRET1, FARG1 - |.else - | bal ->vm_sfcmpolt - |. nop - | intins CARG1, CARG2, CRET1 - |.endif - | b <6 - |. daddiu TMP2, TMP2, 8 - | - |8: // Convert integer to number and continue with number loop. - | checkint CARG2, ->fff_fallback - |.if FPU - |. lwc1 FARG1, LO(TMP2) - | b <7 - |. cvt.d.w FARG1, FARG1 - |.else - |. lw CARG2, LO(TMP2) - | bal ->vm_sfi2d_2 - |. nop - | b <7 - |. nop - |.endif - | - |.endmacro - | - | math_minmax math_min, movz, movf.d - | math_minmax math_max, movn, movt.d - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | ld CARG1, 0(BASE) - | gettp TMP0, CARG1 - | xori AT, NARGS8:RC, 8 - | daddiu TMP0, TMP0, -LJ_TSTR - | or AT, AT, TMP0 - | bnez AT, ->fff_fallback // Need exactly 1 string argument. - |. cleartp STR:CARG1 - | lw TMP0, STR:CARG1->len - | daddiu RA, BASE, -16 - | ld PC, FRAME_PC(BASE) - | sltu RD, r0, TMP0 - | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). - | addiu RD, RD, 1 - | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 - | settp TMP1, TISNUM - | b ->fff_res - |. sd TMP1, 0(RA) - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - |. nop - | ld CARG1, 0(BASE) - | gettp TMP0, CARG1 - | xori AT, NARGS8:RC, 8 // Exactly 1 argument. - | daddiu TMP0, TMP0, -LJ_TISNUM // Integer. - | li TMP1, 255 - | sextw CARG1, CARG1 - | or AT, AT, TMP0 - | sltu TMP1, TMP1, CARG1 // !(255 < n). - | or AT, AT, TMP1 - | bnez AT, ->fff_fallback - |. li CARG3, 1 - | daddiu CARG2, sp, TMPD_OFS - | sb CARG1, TMPD - |->fff_newstr: - | load_got lj_str_new - | sd BASE, L->base - | sd PC, SAVE_PC - | call_intern lj_str_new // (lua_State *L, char *str, size_t l) - |. move CARG1, L - | // Returns GCstr *. - | ld BASE, L->base - |->fff_resstr: - | li AT, LJ_TSTR - | settp CRET1, AT - | b ->fff_restv - |. move CARG1, CRET1 - | - |.ffunc string_sub - | ffgccheck - |. nop - | addiu AT, NARGS8:RC, -16 - | ld TMP0, 0(BASE) - | bltz AT, ->fff_fallback - |. gettp TMP3, TMP0 - | cleartp STR:CARG1, TMP0 - | ld CARG2, 8(BASE) - | beqz AT, >1 - |. li CARG4, -1 - | ld CARG3, 16(BASE) - | checkint CARG3, ->fff_fallback - |. sextw CARG4, CARG3 - |1: - | checkint CARG2, ->fff_fallback - |. li AT, LJ_TSTR - | bne TMP3, AT, ->fff_fallback - |. sextw CARG3, CARG2 - | lw CARG2, STR:CARG1->len - | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end - | slt AT, CARG4, r0 - | addiu TMP0, CARG2, 1 - | addu TMP1, CARG4, TMP0 - | slt TMP3, CARG3, r0 - | movn CARG4, TMP1, AT // if (end < 0) end += len+1 - | addu TMP1, CARG3, TMP0 - | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 - | li TMP2, 1 - | slt AT, CARG4, r0 - | slt TMP3, r0, CARG3 - | movn CARG4, r0, AT // if (end < 0) end = 0 - | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 - | slt AT, CARG2, CARG4 - | movn CARG4, CARG2, AT // if (end > len) end = len - | daddu CARG2, STR:CARG1, CARG3 - | subu CARG3, CARG4, CARG3 // len = end - start - | daddiu CARG2, CARG2, sizeof(GCstr)-1 - | bgez CARG3, ->fff_newstr - |. addiu CARG3, CARG3, 1 // len++ - |->fff_emptystr: // Return empty string. - | li AT, LJ_TSTR - | daddiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) - | b ->fff_restv - |. settp CARG1, AT - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - |. nop - | beqz NARGS8:RC, ->fff_fallback - |. ld CARG2, 0(BASE) - | checkstr STR:CARG2, ->fff_fallback - | daddiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) - | load_got lj_buf_putstr_ .. name - | ld TMP0, SBUF:CARG1->b - | sd L, SBUF:CARG1->L - | sd BASE, L->base - | sd TMP0, SBUF:CARG1->p - | call_intern extern lj_buf_putstr_ .. name - |. sd PC, SAVE_PC - | load_got lj_buf_tostr - | call_intern lj_buf_tostr - |. move SBUF:CARG1, SBUF:CRET1 - | b ->fff_resstr - |. ld BASE, L->base - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |->vm_tobit_fb: - | beqz TMP1, ->fff_fallback - |.if FPU - |. ldc1 FARG1, 0(BASE) - | add.d FARG1, FARG1, TOBIT - | mfc1 CRET1, FARG1 - | jr ra - |. zextw CRET1, CRET1 - |.else - |// FP number to bit conversion for soft-float. - |->vm_tobit: - | dsll TMP0, CARG1, 1 - | li CARG3, 1076 - | dsrl AT, TMP0, 53 - | dsubu CARG3, CARG3, AT - | sltiu AT, CARG3, 54 - | beqz AT, >1 - |. dextm TMP0, TMP0, 0, 20 - | dinsu TMP0, AT, 21, 21 - | slt AT, CARG1, r0 - | dsrlv CRET1, TMP0, CARG3 - | dsubu TMP0, r0, CRET1 - | movn CRET1, TMP0, AT - | jr ra - |. zextw CRET1, CRET1 - |1: - | jr ra - |. move CRET1, r0 - |.endif - | - |.macro .ffunc_bit, name - | .ffunc_1 bit_..name - | gettp TMP0, CARG1 - | beq TMP0, TISNUM, >6 - |. zextw CRET1, CARG1 - | bal ->vm_tobit_fb - |. sltiu TMP1, TMP0, LJ_TISNUM - |6: - |.endmacro - | - |.macro .ffunc_bit_op, name, bins - | .ffunc_bit name - | daddiu TMP2, BASE, 8 - | daddu TMP3, BASE, NARGS8:RC - |1: - | beq TMP2, TMP3, ->fff_resi - |. ld CARG1, 0(TMP2) - | gettp TMP0, CARG1 - |.if FPU - | bne TMP0, TISNUM, >2 - |. daddiu TMP2, TMP2, 8 - | zextw CARG1, CARG1 - | b <1 - |. bins CRET1, CRET1, CARG1 - |2: - | ldc1 FARG1, -8(TMP2) - | sltiu AT, TMP0, LJ_TISNUM - | beqz AT, ->fff_fallback - |. add.d FARG1, FARG1, TOBIT - | mfc1 CARG1, FARG1 - | zextw CARG1, CARG1 - | b <1 - |. bins CRET1, CRET1, CARG1 - |.else - | beq TMP0, TISNUM, >2 - |. move CRET2, CRET1 - | bal ->vm_tobit_fb - |. sltiu TMP1, TMP0, LJ_TISNUM - | move CARG1, CRET2 - |2: - | zextw CARG1, CARG1 - | bins CRET1, CRET1, CARG1 - | b <1 - |. daddiu TMP2, TMP2, 8 - |.endif - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, or - |.ffunc_bit_op bxor, xor - | - |.ffunc_bit bswap - | dsrl TMP0, CRET1, 8 - | dsrl TMP1, CRET1, 24 - | andi TMP2, TMP0, 0xff00 - | dins TMP1, CRET1, 24, 31 - | dins TMP2, TMP0, 16, 23 - | b ->fff_resi - |. or CRET1, TMP1, TMP2 - | - |.ffunc_bit bnot - | not CRET1, CRET1 - | b ->fff_resi - |. zextw CRET1, CRET1 - | - |.macro .ffunc_bit_sh, name, shins, shmod - | .ffunc_2 bit_..name - | gettp TMP0, CARG1 - | beq TMP0, TISNUM, >1 - |. nop - | bal ->vm_tobit_fb - |. sltiu TMP1, TMP0, LJ_TISNUM - | move CARG1, CRET1 - |1: - | gettp TMP0, CARG2 - | bne TMP0, TISNUM, ->fff_fallback - |. zextw CARG2, CARG2 - | sextw CARG1, CARG1 - |.if shmod == 1 - | negu CARG2, CARG2 - |.endif - | shins CRET1, CARG1, CARG2 - | b ->fff_resi - |. zextw CRET1, CRET1 - |.endmacro - | - |.ffunc_bit_sh lshift, sllv, 0 - |.ffunc_bit_sh rshift, srlv, 0 - |.ffunc_bit_sh arshift, srav, 0 - |.ffunc_bit_sh rol, rotrv, 1 - |.ffunc_bit_sh ror, rotrv, 0 - | - |.ffunc_bit tobit - |->fff_resi: - | ld PC, FRAME_PC(BASE) - | daddiu RA, BASE, -16 - | settp CRET1, TISNUM - | b ->fff_res1 - |. sd CRET1, -16(BASE) - | - |//----------------------------------------------------------------------- - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RB = CFUNC, RC = nargs*8 - | ld TMP3, CFUNC:RB->f - | daddu TMP1, BASE, NARGS8:RC - | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC. - | daddiu TMP0, TMP1, 8*LUA_MINSTACK - | ld TMP2, L->maxstack - | sd PC, SAVE_PC // Redundant (but a defined value). - | sltu AT, TMP2, TMP0 - | sd BASE, L->base - | sd TMP1, L->top - | bnez AT, >5 // Need to grow stack. - |. move CFUNCADDR, TMP3 - | jalr TMP3 // (lua_State *L) - |. move CARG1, L - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | ld BASE, L->base - | sll RD, CRET1, 3 - | bgtz CRET1, ->fff_res // Returned nresults+1? - |. daddiu RA, BASE, -16 - |1: // Returned 0 or -1: retry fast path. - | ld LFUNC:RB, FRAME_FUNC(BASE) - | ld TMP0, L->top - | cleartp LFUNC:RB - | bnez CRET1, ->vm_call_tail // Returned -1? - |. dsubu NARGS8:RC, TMP0, BASE - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | andi TMP0, PC, FRAME_TYPE - | li AT, -4 - | bnez TMP0, >3 - |. and TMP1, PC, AT - | lbu TMP1, OFS_RA(PC) - | sll TMP1, TMP1, 3 - | addiu TMP1, TMP1, 16 - |3: - | b ->vm_call_dispatch // Resolve again for tailcall. - |. dsubu TMP2, BASE, TMP1 - | - |5: // Grow stack for fallback handler. - | load_got lj_state_growstack - | li CARG2, LUA_MINSTACK - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | ld BASE, L->base - | b <1 - |. li CRET1, 0 // Force retry. - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | move MULTRES, ra - | load_got lj_gc_step - | sd BASE, L->base - | daddu TMP0, BASE, NARGS8:RC - | sd PC, SAVE_PC // Redundant (but a defined value). - | sd TMP0, L->top - | call_intern lj_gc_step // (lua_State *L) - |. move CARG1, L - | ld BASE, L->base - | move ra, MULTRES - | ld TMP0, L->top - | ld CFUNC:RB, FRAME_FUNC(BASE) - | cleartp CFUNC:RB - | jr ra - |. dsubu NARGS8:RC, TMP0, BASE - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent. - | bnez AT, >5 - | // Decrement the hookcount for consistency, but always do the call. - |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE - | bnez AT, >1 - |. addiu TMP2, TMP2, -1 - | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqz AT, >1 - |. nop - | b >1 - |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE // Hook already active? - | beqz AT, >1 - |5: // Re-dispatch to static ins. - |. ld AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. - | jr AT - |. nop - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, HOOK_ACTIVE // Hook already active? - | bnez AT, <5 - |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqz AT, <5 - |. addiu TMP2, TMP2, -1 - | beqz TMP2, >1 - |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi AT, TMP3, LUA_MASKLINE - | beqz AT, <5 - |1: - |. load_got lj_dispatch_ins - | sw MULTRES, SAVE_MULTRES - | move CARG2, PC - | sd BASE, L->base - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |. move CARG1, L - |3: - | ld BASE, L->base - |4: // Re-dispatch to static ins. - | lw INS, -4(PC) - | decode_OP8a TMP1, INS - | decode_OP8b TMP1 - | daddu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS - | ld AT, GG_DISP2STATIC(TMP0) - | decode_RA8a RA, INS - | decode_RD8b RD - | jr AT - | decode_RA8b RA - | - |->cont_hook: // Continue from hook yield. - | daddiu PC, PC, 4 - | b <4 - |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins. - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | ld LFUNC:TMP1, FRAME_FUNC(BASE) - | daddiu CARG1, DISPATCH, GG_DISP2J - | cleartp LFUNC:TMP1 - | sd PC, SAVE_PC - | ld TMP1, LFUNC:TMP1->pc - | move CARG2, PC - | sd L, DISPATCH_J(L)(DISPATCH) - | lbu TMP1, PC2PROTO(framesize)(TMP1) - | load_got lj_trace_hot - | sd BASE, L->base - | dsll TMP1, TMP1, 3 - | daddu TMP1, BASE, TMP1 - | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc) - |. sd TMP1, L->top - | b <3 - |. nop - |.endif - | - | - |->vm_callhook: // Dispatch target for call hooks. - |.if JIT - | b >1 - |.endif - |. move CARG2, PC - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | ori CARG2, PC, 1 - |1: - |.endif - | load_got lj_dispatch_call - | daddu TMP0, BASE, RC - | sd PC, SAVE_PC - | sd BASE, L->base - | dsubu RA, RA, BASE - | sd TMP0, L->top - | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc) - |. move CARG1, L - | // Returns ASMFunction. - | ld BASE, L->base - | ld TMP0, L->top - | sd r0, SAVE_PC // Invalidate for subsequent line hook. - | dsubu NARGS8:RC, TMP0, BASE - | daddu RA, BASE, RA - | ld LFUNC:RB, FRAME_FUNC(BASE) - | cleartp LFUNC:RB - | jr CRET1 - |. lw INS, -4(PC) - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, RB = meta base - | lw INS, -4(PC) - | ld TRACE:TMP2, -40(RB) // Save previous trace. - | decode_RA8a RC, INS - | daddiu AT, MULTRES, -8 - | cleartp TRACE:TMP2 - | decode_RA8b RC - | beqz AT, >2 - |. daddu RC, BASE, RC // Call base. - |1: // Move results down. - | ld CARG1, 0(RA) - | daddiu AT, AT, -8 - | daddiu RA, RA, 8 - | sd CARG1, 0(RC) - | bnez AT, <1 - |. daddiu RC, RC, 8 - |2: - | decode_RA8a RA, INS - | decode_RB8a RB, INS - | decode_RA8b RA - | decode_RB8b RB - | daddu RA, RA, RB - | daddu RA, BASE, RA - |3: - | sltu AT, RC, RA - | bnez AT, >9 // More results wanted? - |. nop - | - | lhu TMP3, TRACE:TMP2->traceno - | lhu RD, TRACE:TMP2->link - | beq RD, TMP3, ->cont_nop // Blacklisted. - |. load_got lj_dispatch_stitch - | bnez RD, =>BC_JLOOP // Jump to stitched trace. - |. sll RD, RD, 3 - | - | // Stitch a new trace to the previous trace. - | sw TMP3, DISPATCH_J(exitno)(DISPATCH) - | sd L, DISPATCH_J(L)(DISPATCH) - | sd BASE, L->base - | daddiu CARG1, DISPATCH, GG_DISP2J - | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - |. move CARG2, PC - | b ->cont_nop - |. ld BASE, L->base - | - |9: - | sd TISNIL, 0(RC) - | b <3 - |. daddiu RC, RC, 8 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | load_got lj_dispatch_profile - | sd MULTRES, SAVE_MULTRES - | move CARG2, PC - | sd BASE, L->base - | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - |. move CARG1, L - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | daddiu PC, PC, -4 - | b ->cont_nop - |. ld BASE, L->base -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro savex_, a, b - |.if FPU - | sdc1 f..a, a*8(sp) - | sdc1 f..b, b*8(sp) - | sd r..a, 32*8+a*8(sp) - | sd r..b, 32*8+b*8(sp) - |.else - | sd r..a, a*8(sp) - | sd r..b, b*8(sp) - |.endif - |.endmacro - | - |->vm_exit_handler: - |.if JIT - |.if FPU - | daddiu sp, sp, -(32*8+32*8) - |.else - | daddiu sp, sp, -(32*8) - |.endif - | savex_ 0, 1 - | savex_ 2, 3 - | savex_ 4, 5 - | savex_ 6, 7 - | savex_ 8, 9 - | savex_ 10, 11 - | savex_ 12, 13 - | savex_ 14, 15 - | savex_ 16, 17 - | savex_ 18, 19 - | savex_ 20, 21 - | savex_ 22, 23 - | savex_ 24, 25 - | savex_ 26, 27 - | savex_ 28, 30 - |.if FPU - | sdc1 f29, 29*8(sp) - | sdc1 f31, 31*8(sp) - | sd r0, 32*8+31*8(sp) // Clear RID_TMP. - | daddiu TMP2, sp, 32*8+32*8 // Recompute original value of sp. - | sd TMP2, 32*8+29*8(sp) // Store sp in RID_SP - |.else - | sd r0, 31*8(sp) // Clear RID_TMP. - | daddiu TMP2, sp, 32*8 // Recompute original value of sp. - | sd TMP2, 29*8(sp) // Store sp in RID_SP - |.endif - | li_vmstate EXIT - | daddiu DISPATCH, JGL, -GG_DISP2G-32768 - | lw TMP1, 0(TMP2) // Load exit number. - | st_vmstate - | ld L, DISPATCH_GL(cur_L)(DISPATCH) - | ld BASE, DISPATCH_GL(jit_base)(DISPATCH) - | load_got lj_trace_exit - | sd L, DISPATCH_J(L)(DISPATCH) - | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. - | sd BASE, L->base - | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. - | daddiu CARG1, DISPATCH, GG_DISP2J - | sd r0, DISPATCH_GL(jit_base)(DISPATCH) - | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) - |. move CARG2, sp - | // Returns MULTRES (unscaled) or negated error code. - | ld TMP1, L->cframe - | li AT, -4 - | ld BASE, L->base - | and sp, TMP1, AT - | ld PC, SAVE_PC // Get SAVE_PC. - | b >1 - |. sd L, SAVE_L // Set SAVE_L (on-trace resume/yield). - |.endif - |->vm_exit_interp: - |.if JIT - | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. - | ld L, SAVE_L - | daddiu DISPATCH, JGL, -GG_DISP2G-32768 - | sd BASE, L->base - |1: - | bltz CRET1, >9 // Check for error from exit. - |. ld LFUNC:RB, FRAME_FUNC(BASE) - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | dsll MULTRES, CRET1, 3 - | cleartp LFUNC:RB - | sd MULTRES, SAVE_MULTRES - | li TISNIL, LJ_TNIL - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | .FPU mtc1 TMP3, TOBIT - | ld TMP1, LFUNC:RB->pc - | sd r0, DISPATCH_GL(jit_base)(DISPATCH) - | ld KBASE, PC2PROTO(k)(TMP1) - | .FPU cvt.d.s TOBIT, TOBIT - | // Modified copy of ins_next which handles function header dispatch, too. - | lw INS, 0(PC) - | daddiu PC, PC, 4 - | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 - | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) - | decode_OP8a TMP1, INS - | decode_OP8b TMP1 - | sltiu TMP2, TMP1, BC_FUNCF*8 - | daddu TMP0, DISPATCH, TMP1 - | decode_RD8a RD, INS - | ld AT, 0(TMP0) - | decode_RA8a RA, INS - | beqz TMP2, >2 - |. decode_RA8b RA - | jr AT - |. decode_RD8b RD - |2: - | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? - | bnez TMP2, >3 - |. ld TMP1, FRAME_PC(BASE) - | // Check frame below fast function. - | andi TMP0, TMP1, FRAME_TYPE - | bnez TMP0, >3 // Trace stitching continuation? - |. nop - | // Otherwise set KBASE for Lua function below fast function. - | lw TMP2, -4(TMP1) - | decode_RA8a TMP0, TMP2 - | decode_RA8b TMP0 - | dsubu TMP1, BASE, TMP0 - | ld LFUNC:TMP2, -32(TMP1) - | cleartp LFUNC:TMP2 - | ld TMP1, LFUNC:TMP2->pc - | ld KBASE, PC2PROTO(k)(TMP1) - |3: - | daddiu RC, MULTRES, -8 - | jr AT - |. daddu RA, RA, BASE - | - |9: // Rethrow error from the right C frame. - | load_got lj_err_throw - | negu CARG2, CRET1 - | call_intern lj_err_throw // (lua_State *L, int errcode) - |. move CARG1, L - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Hard-float round to integer. - |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. - |.macro vm_round_hf, func - | lui TMP0, 0x4330 // Hiword of 2^52 (double). - | dsll TMP0, TMP0, 32 - | dmtc1 TMP0, f4 - | abs.d FRET2, FARG1 // |x| - | dmfc1 AT, FARG1 - | c.olt.d 0, FRET2, f4 - | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 - | bc1f 0, >1 // Truncate only if |x| < 2^52. - |. sub.d FRET1, FRET1, f4 - | slt AT, AT, r0 - |.if "func" == "ceil" - | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0. - |.else - | lui TMP0, 0x3ff0 // Hiword of +1 (double). - |.endif - |.if "func" == "trunc" - | dsll TMP0, TMP0, 32 - | dmtc1 TMP0, f4 - | c.olt.d 0, FRET2, FRET1 // |x| < result? - | sub.d FRET2, FRET1, f4 - | movt.d FRET1, FRET2, 0 // If yes, subtract +1. - | neg.d FRET2, FRET1 - | jr ra - |. movn.d FRET1, FRET2, AT // Merge sign bit back in. - |.else - | neg.d FRET2, FRET1 - | dsll TMP0, TMP0, 32 - | dmtc1 TMP0, f4 - | movn.d FRET1, FRET2, AT // Merge sign bit back in. - |.if "func" == "ceil" - | c.olt.d 0, FRET1, FARG1 // x > result? - |.else - | c.olt.d 0, FARG1, FRET1 // x < result? - |.endif - | sub.d FRET2, FRET1, f4 // If yes, subtract +-1. - | jr ra - |. movt.d FRET1, FRET2, 0 - |.endif - |1: - | jr ra - |. mov.d FRET1, FARG1 - |.endmacro - | - |.macro vm_round, func - |.if FPU - | vm_round_hf, func - |.endif - |.endmacro - | - |->vm_floor: - | vm_round floor - |->vm_ceil: - | vm_round ceil - |->vm_trunc: - |.if JIT - | vm_round trunc - |.endif - | - |// Soft-float integer to number conversion. - |.macro sfi2d, ARG - |.if not FPU - | beqz ARG, >9 // Handle zero first. - |. sra TMP0, ARG, 31 - | xor TMP1, ARG, TMP0 - | dsubu TMP1, TMP1, TMP0 // Absolute value in TMP1. - | dclz ARG, TMP1 - | addiu ARG, ARG, -11 - | li AT, 0x3ff+63-11-1 - | dsllv TMP1, TMP1, ARG // Align mantissa left with leading 1. - | subu ARG, AT, ARG // Exponent - 1. - | ins ARG, TMP0, 11, 11 // Sign | Exponent. - | dsll ARG, ARG, 52 // Align left. - | jr ra - |. daddu ARG, ARG, TMP1 // Add mantissa, increment exponent. - |9: - | jr ra - |. nop - |.endif - |.endmacro - | - |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1. - |->vm_sfi2d_1: - | sfi2d CARG1 - | - |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1. - |->vm_sfi2d_2: - | sfi2d CARG2 - | - |// Soft-float comparison. Equivalent to c.eq.d. - |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. - |->vm_sfcmpeq: - |.if not FPU - | dsll AT, CARG1, 1 - | dsll TMP0, CARG2, 1 - | or TMP1, AT, TMP0 - | beqz TMP1, >8 // Both args +-0: return 1. - |. lui TMP1, 0xffe0 - | dsll TMP1, TMP1, 32 - | sltu AT, TMP1, AT - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0; - |. xor AT, CARG1, CARG2 - | jr ra - |. sltiu CRET1, AT, 1 // Same values: return 1. - |8: - | jr ra - |. li CRET1, 1 - |9: - | jr ra - |. li CRET1, 0 - |.endif - | - |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. - |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. - |->vm_sfcmpult: - |.if not FPU - | b >1 - |. li CRET2, 1 - |.endif - | - |->vm_sfcmpolt: - |.if not FPU - | li CRET2, 0 - |1: - | dsll AT, CARG1, 1 - | dsll TMP0, CARG2, 1 - | or TMP1, AT, TMP0 - | beqz TMP1, >8 // Both args +-0: return 0. - |. lui TMP1, 0xffe0 - | dsll TMP1, TMP1, 32 - | sltu AT, TMP1, AT - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; - |. and AT, CARG1, CARG2 - | bltz AT, >5 // Both args negative? - |. nop - | jr ra - |. slt CRET1, CARG1, CARG2 - |5: // Swap conditions if both operands are negative. - | jr ra - |. slt CRET1, CARG2, CARG1 - |8: - | jr ra - |. nop - |9: - | jr ra - |. move CRET1, CRET2 - |.endif - | - |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. - |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. - |->vm_sfcmpolex: - |.if not FPU - | dsll AT, CARG1, 1 - | dsll TMP0, CARG2, 1 - | or TMP1, AT, TMP0 - | beqz TMP1, >8 // Both args +-0: return 1. - |. lui TMP1, 0xffe0 - | dsll TMP1, TMP1, 32 - | sltu AT, TMP1, AT - | sltu TMP0, TMP1, TMP0 - | or TMP1, AT, TMP0 - | bnez TMP1, >9 // Either arg is NaN: return 0; - |. and AT, CARG1, CARG2 - | xor AT, AT, TMP3 - | bltz AT, >5 // Both args negative? - |. nop - | jr ra - |. slt CRET1, CARG2, CARG1 - |5: // Swap conditions if both operands are negative. - | jr ra - |. slt CRET1, CARG1, CARG2 - |8: - | jr ra - |. li CRET1, 1 - |9: - | jr ra - |. li CRET1, 0 - |.endif - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. Callback slot number in r1, g in r2. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | saveregs - | ld CTSTATE, GL:r2->ctype_state - | daddiu DISPATCH, r2, GG_G2DISP - | load_got lj_ccallback_enter - | sw r1, CTSTATE->cb.slot - | sd CARG1, CTSTATE->cb.gpr[0] - | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] - | sd CARG2, CTSTATE->cb.gpr[1] - | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] - | sd CARG3, CTSTATE->cb.gpr[2] - | .FPU sdc1 FARG3, CTSTATE->cb.fpr[2] - | sd CARG4, CTSTATE->cb.gpr[3] - | .FPU sdc1 FARG4, CTSTATE->cb.fpr[3] - | sd CARG5, CTSTATE->cb.gpr[4] - | .FPU sdc1 FARG5, CTSTATE->cb.fpr[4] - | sd CARG6, CTSTATE->cb.gpr[5] - | .FPU sdc1 FARG6, CTSTATE->cb.fpr[5] - | sd CARG7, CTSTATE->cb.gpr[6] - | .FPU sdc1 FARG7, CTSTATE->cb.fpr[6] - | sd CARG8, CTSTATE->cb.gpr[7] - | .FPU sdc1 FARG8, CTSTATE->cb.fpr[7] - | daddiu TMP0, sp, CFRAME_SPACE - | sd TMP0, CTSTATE->cb.stack - | sd r0, SAVE_PC // Any value outside of bytecode is ok. - | move CARG2, sp - | call_intern lj_ccallback_enter // (CTState *cts, void *cf) - |. move CARG1, CTSTATE - | // Returns lua_State *. - | ld BASE, L:CRET1->base - | ld RC, L:CRET1->top - | move L, CRET1 - | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | ld LFUNC:RB, FRAME_FUNC(BASE) - | .FPU mtc1 TMP3, TOBIT - | li TISNIL, LJ_TNIL - | li TISNUM, LJ_TISNUM - | li_vmstate INTERP - | subu RC, RC, BASE - | cleartp LFUNC:RB - | st_vmstate - | .FPU cvt.d.s TOBIT, TOBIT - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | load_got lj_ccallback_leave - | ld CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) - | sd BASE, L->base - | sd RB, L->top - | sd L, CTSTATE->L - | move CARG2, RA - | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) - |. move CARG1, CTSTATE - | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] - | ld CRET1, CTSTATE->cb.gpr[0] - | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] - | b ->vm_leave_unw - |. ld CRET2, CTSTATE->cb.gpr[1] - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, CARG1 - | lw TMP1, CCSTATE->spadj - | lbu CARG2, CCSTATE->nsp - | move TMP2, sp - | dsubu sp, sp, TMP1 - | sd ra, -8(TMP2) - | sll CARG2, CARG2, 3 - | sd r16, -16(TMP2) - | sd CCSTATE, -24(TMP2) - | move r16, TMP2 - | daddiu TMP1, CCSTATE, offsetof(CCallState, stack) - | move TMP2, sp - | beqz CARG2, >2 - |. daddu TMP3, TMP1, CARG2 - |1: - | ld TMP0, 0(TMP1) - | daddiu TMP1, TMP1, 8 - | sltu AT, TMP1, TMP3 - | sd TMP0, 0(TMP2) - | bnez AT, <1 - |. daddiu TMP2, TMP2, 8 - |2: - | ld CFUNCADDR, CCSTATE->func - | .FPU ldc1 FARG1, CCSTATE->gpr[0] - | ld CARG2, CCSTATE->gpr[1] - | .FPU ldc1 FARG2, CCSTATE->gpr[1] - | ld CARG3, CCSTATE->gpr[2] - | .FPU ldc1 FARG3, CCSTATE->gpr[2] - | ld CARG4, CCSTATE->gpr[3] - | .FPU ldc1 FARG4, CCSTATE->gpr[3] - | ld CARG5, CCSTATE->gpr[4] - | .FPU ldc1 FARG5, CCSTATE->gpr[4] - | ld CARG6, CCSTATE->gpr[5] - | .FPU ldc1 FARG6, CCSTATE->gpr[5] - | ld CARG7, CCSTATE->gpr[6] - | .FPU ldc1 FARG7, CCSTATE->gpr[6] - | ld CARG8, CCSTATE->gpr[7] - | .FPU ldc1 FARG8, CCSTATE->gpr[7] - | jalr CFUNCADDR - |. ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. - | ld CCSTATE:TMP1, -24(r16) - | ld TMP2, -16(r16) - | ld ra, -8(r16) - | sd CRET1, CCSTATE:TMP1->gpr[0] - | sd CRET2, CCSTATE:TMP1->gpr[1] - |.if FPU - | sdc1 FRET1, CCSTATE:TMP1->fpr[0] - | sdc1 FRET2, CCSTATE:TMP1->fpr[1] - |.else - | sd CARG1, CCSTATE:TMP1->gpr[2] // 2nd FP struct field for soft-float. - |.endif - | move sp, r16 - | jr ra - |. move r16, TMP2 - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp - | daddu RA, BASE, RA - | daddu RD, BASE, RD - | ld ARGRA, 0(RA) - | ld ARGRD, 0(RD) - | lhu TMP2, OFS_RD(PC) - | gettp CARG3, ARGRA - | gettp CARG4, ARGRD - | bne CARG3, TISNUM, >2 - |. daddiu PC, PC, 4 - | bne CARG4, TISNUM, >5 - |. decode_RD4b TMP2 - | sextw ARGRA, ARGRA - | sextw ARGRD, ARGRD - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | slt AT, CARG1, CARG2 - | addu TMP2, TMP2, TMP3 - | movop TMP2, r0, AT - |1: - | daddu PC, PC, TMP2 - | ins_next - | - |2: // RA is not an integer. - | sltiu AT, CARG3, LJ_TISNUM - | beqz AT, ->vmeta_comp - |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sltiu AT, CARG4, LJ_TISNUM - | beqz AT, >4 - |. decode_RD4b TMP2 - |.if FPU - | ldc1 FRA, 0(RA) - | ldc1 FRD, 0(RD) - |.endif - |3: // RA and RD are both numbers. - |.if FPU - | fcomp f20, f22 - | addu TMP2, TMP2, TMP3 - | b <1 - |. fmovop TMP2, r0 - |.else - | bal sfcomp - |. addu TMP2, TMP2, TMP3 - | b <1 - |. movop TMP2, r0, CRET1 - |.endif - | - |4: // RA is a number, RD is not a number. - | bne CARG4, TISNUM, ->vmeta_comp - | // RA is a number, RD is an integer. Convert RD to a number. - |.if FPU - |. lwc1 FRD, LO(RD) - | ldc1 FRA, 0(RA) - | b <3 - |. cvt.d.w FRD, FRD - |.else - |.if "ARGRD" == "CARG1" - |. sextw CARG1, CARG1 - | bal ->vm_sfi2d_1 - |. nop - |.else - |. sextw CARG2, CARG2 - | bal ->vm_sfi2d_2 - |. nop - |.endif - | b <3 - |. nop - |.endif - | - |5: // RA is an integer, RD is not an integer - | sltiu AT, CARG4, LJ_TISNUM - | beqz AT, ->vmeta_comp - |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | // RA is an integer, RD is a number. Convert RA to a number. - |.if FPU - | lwc1 FRA, LO(RA) - | ldc1 FRD, 0(RD) - | b <3 - | cvt.d.w FRA, FRA - |.else - |.if "ARGRA" == "CARG1" - | bal ->vm_sfi2d_1 - |. sextw CARG1, CARG1 - |.else - | bal ->vm_sfi2d_2 - |. sextw CARG2, CARG2 - |.endif - | b <3 - |. nop - |.endif - |.endmacro - | - if (op == BC_ISLT) { - | bc_comp f20, f22, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt - } else if (op == BC_ISGE) { - | bc_comp f20, f22, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt - } else if (op == BC_ISLE) { - | bc_comp f22, f20, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult - } else { - | bc_comp f22, f20, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult - } - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1*8, RD = src2*8, JMP with RD = target - | daddu RA, BASE, RA - | daddiu PC, PC, 4 - | daddu RD, BASE, RD - | ld CARG1, 0(RA) - | lhu TMP2, -4+OFS_RD(PC) - | ld CARG2, 0(RD) - | gettp CARG3, CARG1 - | gettp CARG4, CARG2 - | sltu AT, TISNUM, CARG3 - | sltu TMP1, TISNUM, CARG4 - | or AT, AT, TMP1 - if (vk) { - | beqz AT, ->BC_ISEQN_Z - } else { - | beqz AT, ->BC_ISNEN_Z - } - | // Either or both types are not numbers. - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - |.if FFI - |. li AT, LJ_TCDATA - | beq CARG3, AT, ->vmeta_equal_cd - |.endif - | decode_RD4b TMP2 - |.if FFI - | beq CARG4, AT, ->vmeta_equal_cd - |. nop - |.endif - | bne CARG1, CARG2, >2 - |. addu TMP2, TMP2, TMP3 - | // Tag and value are equal. - if (vk) { - |->BC_ISEQV_Z: - | daddu PC, PC, TMP2 - } - |1: - | ins_next - | - |2: // Check if the tags are the same and it's a table or userdata. - | xor AT, CARG3, CARG4 // Same type? - | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? - | movn TMP0, r0, AT - if (vk) { - | beqz TMP0, <1 - } else { - | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. - } - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - |. cleartp TAB:TMP1, CARG1 - | ld TAB:TMP3, TAB:TMP1->metatable - if (vk) { - | beqz TAB:TMP3, <1 // No metatable? - |. nop - | lbu TMP3, TAB:TMP3->nomm - | andi TMP3, TMP3, 1<1 // Or 'no __eq' flag set? - } else { - | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable? - |. nop - | lbu TMP3, TAB:TMP3->nomm - | andi TMP3, TMP3, 1<BC_ISEQV_Z // Or 'no __eq' flag set? - } - |. nop - | b ->vmeta_equal // Handle __eq metamethod. - |. li TMP0, 1-vk // ne = 0 or 1. - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src*8, RD = str_const*8 (~), JMP with RD = target - | daddu RA, BASE, RA - | daddiu PC, PC, 4 - | ld CARG1, 0(RA) - | dsubu RD, KBASE, RD - | lhu TMP2, -4+OFS_RD(PC) - | ld CARG2, -8(RD) // KBASE-8-str_const*8 - |.if FFI - | gettp TMP0, CARG1 - | li AT, LJ_TCDATA - |.endif - | li TMP1, LJ_TSTR - | decode_RD4b TMP2 - |.if FFI - | beq TMP0, AT, ->vmeta_equal_cd - |.endif - |. settp CARG2, TMP1 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | xor TMP1, CARG1, CARG2 - | addu TMP2, TMP2, TMP3 - if (vk) { - | movn TMP2, r0, TMP1 - } else { - | movz TMP2, r0, TMP1 - } - | daddu PC, PC, TMP2 - | ins_next - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src*8, RD = num_const*8, JMP with RD = target - | daddu RA, BASE, RA - | daddu RD, KBASE, RD - | ld CARG1, 0(RA) - | ld CARG2, 0(RD) - | lhu TMP2, OFS_RD(PC) - | gettp CARG3, CARG1 - | gettp CARG4, CARG2 - | daddiu PC, PC, 4 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | bne CARG3, TISNUM, >3 - |. decode_RD4b TMP2 - | bne CARG4, TISNUM, >6 - |. addu TMP2, TMP2, TMP3 - | xor AT, CARG1, CARG2 - if (vk) { - | movn TMP2, r0, AT - |1: - | daddu PC, PC, TMP2 - |2: - } else { - | movz TMP2, r0, AT - |1: - |2: - | daddu PC, PC, TMP2 - } - | ins_next - | - |3: // RA is not an integer. - | sltu AT, CARG3, TISNUM - |.if FFI - | beqz AT, >8 - |.else - | beqz AT, <2 - |.endif - |. addu TMP2, TMP2, TMP3 - | sltu AT, CARG4, TISNUM - |.if FPU - | ldc1 f20, 0(RA) - | ldc1 f22, 0(RD) - |.endif - | beqz AT, >5 - |. nop - |4: // RA and RD are both numbers. - |.if FPU - | c.eq.d f20, f22 - | b <1 - if (vk) { - |. movf TMP2, r0 - } else { - |. movt TMP2, r0 - } - |.else - | bal ->vm_sfcmpeq - |. nop - | b <1 - if (vk) { - |. movz TMP2, r0, CRET1 - } else { - |. movn TMP2, r0, CRET1 - } - |.endif - | - |5: // RA is a number, RD is not a number. - |.if FFI - | bne CARG4, TISNUM, >9 - |.else - | bne CARG4, TISNUM, <2 - |.endif - | // RA is a number, RD is an integer. Convert RD to a number. - |.if FPU - |. lwc1 f22, LO(RD) - | b <4 - |. cvt.d.w f22, f22 - |.else - |. sextw CARG2, CARG2 - | bal ->vm_sfi2d_2 - |. nop - | b <4 - |. nop - |.endif - | - |6: // RA is an integer, RD is not an integer - | sltu AT, CARG4, TISNUM - |.if FFI - | beqz AT, >9 - |.else - | beqz AT, <2 - |.endif - | // RA is an integer, RD is a number. Convert RA to a number. - |.if FPU - |. lwc1 f20, LO(RA) - | ldc1 f22, 0(RD) - | b <4 - | cvt.d.w f20, f20 - |.else - |. sextw CARG1, CARG1 - | bal ->vm_sfi2d_1 - |. nop - | b <4 - |. nop - |.endif - | - |.if FFI - |8: - | li AT, LJ_TCDATA - | bne CARG3, AT, <2 - |. nop - | b ->vmeta_equal_cd - |. nop - |9: - | li AT, LJ_TCDATA - | bne CARG4, AT, <2 - |. nop - | b ->vmeta_equal_cd - |. nop - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target - | daddu RA, BASE, RA - | srl TMP1, RD, 3 - | ld TMP0, 0(RA) - | lhu TMP2, OFS_RD(PC) - | not TMP1, TMP1 - | gettp TMP0, TMP0 - | daddiu PC, PC, 4 - |.if FFI - | li AT, LJ_TCDATA - | beq TMP0, AT, ->vmeta_equal_cd - |.endif - |. xor TMP0, TMP0, TMP1 - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (vk) { - | movn TMP2, r0, TMP0 - } else { - | movz TMP2, r0, TMP0 - } - | daddu PC, PC, TMP2 - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst*8 or unused, RD = src*8, JMP with RD = target - | daddu RD, BASE, RD - | lhu TMP2, OFS_RD(PC) - | ld TMP0, 0(RD) - | daddiu PC, PC, 4 - | gettp TMP0, TMP0 - | sltiu TMP0, TMP0, LJ_TISTRUECOND - if (op == BC_IST || op == BC_ISF) { - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - if (op == BC_IST) { - | movz TMP2, r0, TMP0 - } else { - | movn TMP2, r0, TMP0 - } - | daddu PC, PC, TMP2 - } else { - | ld CRET1, 0(RD) - if (op == BC_ISTC) { - | beqz TMP0, >1 - } else { - | bnez TMP0, >1 - } - |. daddu RA, BASE, RA - | decode_RD4b TMP2 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | addu TMP2, TMP2, TMP3 - | sd CRET1, 0(RA) - | daddu PC, PC, TMP2 - |1: - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src*8, RD = -type*8 - | daddu TMP2, BASE, RA - | srl TMP1, RD, 3 - | ld TMP0, 0(TMP2) - | ins_next1 - | gettp TMP0, TMP0 - | daddu AT, TMP0, TMP1 - | bnez AT, ->vmeta_istype - |. ins_next2 - break; - case BC_ISNUM: - | // RA = src*8, RD = -(TISNUM-1)*8 - | daddu TMP2, BASE, RA - | ld TMP0, 0(TMP2) - | ins_next1 - | checknum TMP0, ->vmeta_istype - |. ins_next2 - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst*8, RD = src*8 - | daddu RD, BASE, RD - | daddu RA, BASE, RA - | ld CRET1, 0(RD) - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - break; - case BC_NOT: - | // RA = dst*8, RD = src*8 - | daddu RD, BASE, RD - | daddu RA, BASE, RA - | ld TMP0, 0(RD) - | li AT, LJ_TTRUE - | gettp TMP0, TMP0 - | sltu TMP0, AT, TMP0 - | addiu TMP0, TMP0, 1 - | dsll TMP0, TMP0, 47 - | not TMP0, TMP0 - | ins_next1 - | sd TMP0, 0(RA) - | ins_next2 - break; - case BC_UNM: - | // RA = dst*8, RD = src*8 - | daddu RB, BASE, RD - | ld CARG1, 0(RB) - | daddu RA, BASE, RA - | gettp CARG3, CARG1 - | bne CARG3, TISNUM, >2 - |. lui TMP1, 0x8000 - | sextw CARG1, CARG1 - | beq CARG1, TMP1, ->vmeta_unm // Meta handler deals with -2^31. - |. negu CARG1, CARG1 - | zextw CARG1, CARG1 - | settp CARG1, TISNUM - |1: - | ins_next1 - | sd CARG1, 0(RA) - | ins_next2 - |2: - | sltiu AT, CARG3, LJ_TISNUM - | beqz AT, ->vmeta_unm - |. dsll TMP1, TMP1, 32 - | b <1 - |. xor CARG1, CARG1, TMP1 - break; - case BC_LEN: - | // RA = dst*8, RD = src*8 - | daddu CARG2, BASE, RD - | daddu RA, BASE, RA - | ld TMP0, 0(CARG2) - | gettp TMP1, TMP0 - | daddiu AT, TMP1, -LJ_TSTR - | bnez AT, >2 - |. cleartp STR:CARG1, TMP0 - | lw CRET1, STR:CARG1->len - |1: - | settp CRET1, TISNUM - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - |2: - | daddiu AT, TMP1, -LJ_TTAB - | bnez AT, ->vmeta_len - |. nop -#if LJ_52 - | ld TAB:TMP2, TAB:CARG1->metatable - | bnez TAB:TMP2, >9 - |. nop - |3: -#endif - |->BC_LEN_Z: - | load_got lj_tab_len - | call_intern lj_tab_len // (GCtab *t) - |. nop - | // Returns uint32_t (but less than 2^31). - | b <1 - |. nop -#if LJ_52 - |9: - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_len - |. nop -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro fpmod, a, b, c - | bal ->vm_floor // floor(b/c) - |. div.d FARG1, b, c - | mul.d a, FRET1, c - | sub.d a, b, a // b - floor(b/c)*c - |.endmacro - - |.macro sfpmod - | daddiu sp, sp, -16 - | - | load_got __divdf3 - | sd CARG1, 0(sp) - | call_extern - |. sd CARG2, 8(sp) - | - | load_got floor - | call_extern - |. move CARG1, CRET1 - | - | load_got __muldf3 - | move CARG1, CRET1 - | call_extern - |. ld CARG2, 8(sp) - | - | load_got __subdf3 - | ld CARG1, 0(sp) - | call_extern - |. move CARG2, CRET1 - | - | daddiu sp, sp, 16 - |.endmacro - - |.macro ins_arithpre, label - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||switch (vk) { - ||case 0: - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | // RA = dst*8, RB = src1*8, RC = num_const*8 - | daddu RB, BASE, RB - |.if "label" ~= "none" - | b label - |.endif - |. daddu RC, KBASE, RC - || break; - ||case 1: - | decode_RB8a RC, INS - | decode_RB8b RC - | decode_RDtoRC8 RB, RD - | // RA = dst*8, RB = num_const*8, RC = src1*8 - | daddu RC, BASE, RC - |.if "label" ~= "none" - | b label - |.endif - |. daddu RB, KBASE, RB - || break; - ||default: - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | // RA = dst*8, RB = src1*8, RC = src2*8 - | daddu RB, BASE, RB - |.if "label" ~= "none" - | b label - |.endif - |. daddu RC, BASE, RC - || break; - ||} - |.endmacro - | - |.macro ins_arith, intins, fpins, fpcall, label - | ins_arithpre none - | - |.if "label" ~= "none" - |label: - |.endif - | - |// Used in 5. - | ld CARG1, 0(RB) - | ld CARG2, 0(RC) - | gettp TMP0, CARG1 - | gettp TMP1, CARG2 - | - |.if "intins" ~= "div" - | - | // Check for two integers. - | sextw CARG3, CARG1 - | bne TMP0, TISNUM, >5 - |. sextw CARG4, CARG2 - | bne TMP1, TISNUM, >5 - | - |.if "intins" == "addu" - |. intins CRET1, CARG3, CARG4 - | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. - | xor TMP2, CRET1, CARG4 - | and TMP1, TMP1, TMP2 - | bltz TMP1, ->vmeta_arith - |. daddu RA, BASE, RA - |.elif "intins" == "subu" - |. intins CRET1, CARG3, CARG4 - | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. - | xor TMP2, CARG3, CARG4 - | and TMP1, TMP1, TMP2 - | bltz TMP1, ->vmeta_arith - |. daddu RA, BASE, RA - |.elif "intins" == "mult" - |. intins CARG3, CARG4 - | mflo CRET1 - | mfhi TMP2 - | sra TMP1, CRET1, 31 - | bne TMP1, TMP2, ->vmeta_arith - |. daddu RA, BASE, RA - |.else - |. load_got lj_vm_modi - | beqz CARG4, ->vmeta_arith - |. daddu RA, BASE, RA - | move CARG1, CARG3 - | call_extern - |. move CARG2, CARG4 - |.endif - | - | zextw CRET1, CRET1 - | settp CRET1, TISNUM - | ins_next1 - | sd CRET1, 0(RA) - |3: - | ins_next2 - | - |.endif - | - |5: // Check for two numbers. - | .FPU ldc1 f20, 0(RB) - | sltu AT, TMP0, TISNUM - | sltu TMP0, TMP1, TISNUM - | .FPU ldc1 f22, 0(RC) - | and AT, AT, TMP0 - | beqz AT, ->vmeta_arith - |. daddu RA, BASE, RA - | - |.if FPU - | fpins FRET1, f20, f22 - |.elif "fpcall" == "sfpmod" - | sfpmod - |.else - | load_got fpcall - | call_extern - |. nop - |.endif - | - | ins_next1 - |.if "intins" ~= "div" - | b <3 - |.endif - |.if FPU - |. sdc1 FRET1, 0(RA) - |.else - |. sd CRET1, 0(RA) - |.endif - |.if "intins" == "div" - | ins_next2 - |.endif - | - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arith addu, add.d, __adddf3, none - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arith subu, sub.d, __subdf3, none - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith mult, mul.d, __muldf3, none - break; - case BC_DIVVN: - | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z - break; - case BC_DIVNV: case BC_DIVVV: - | ins_arithpre ->BC_DIVVN_Z - break; - case BC_MODVN: - | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z - break; - case BC_MODNV: case BC_MODVV: - | ins_arithpre ->BC_MODVN_Z - break; - case BC_POW: - | ins_arithpre none - | ld CARG1, 0(RB) - | ld CARG2, 0(RC) - | gettp TMP0, CARG1 - | gettp TMP1, CARG2 - | sltiu TMP0, TMP0, LJ_TISNUM - | sltiu TMP1, TMP1, LJ_TISNUM - | and AT, TMP0, TMP1 - | load_got pow - | beqz AT, ->vmeta_arith - |. daddu RA, BASE, RA - |.if FPU - | ldc1 FARG1, 0(RB) - | ldc1 FARG2, 0(RC) - |.endif - | call_extern - |. nop - | ins_next1 - |.if FPU - | sdc1 FRET1, 0(RA) - |.else - | sd CRET1, 0(RA) - |.endif - | ins_next2 - break; - - case BC_CAT: - | // RA = dst*8, RB = src_start*8, RC = src_end*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | dsubu CARG3, RC, RB - | sd BASE, L->base - | daddu CARG2, BASE, RC - | move MULTRES, RB - |->BC_CAT_Z: - | load_got lj_meta_cat - | srl CARG3, CARG3, 3 - | sd PC, SAVE_PC - | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left) - |. move CARG1, L - | // Returns NULL (finished) or TValue * (metamethod). - | bnez CRET1, ->vmeta_binop - |. ld BASE, L->base - | daddu RB, BASE, MULTRES - | ld CRET1, 0(RB) - | daddu RA, BASE, RA - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst*8, RD = str_const*8 (~) - | dsubu TMP1, KBASE, RD - | ins_next1 - | li TMP2, LJ_TSTR - | ld TMP0, -8(TMP1) // KBASE-8-str_const*8 - | daddu RA, BASE, RA - | settp TMP0, TMP2 - | sd TMP0, 0(RA) - | ins_next2 - break; - case BC_KCDATA: - |.if FFI - | // RA = dst*8, RD = cdata_const*8 (~) - | dsubu TMP1, KBASE, RD - | ins_next1 - | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8 - | li TMP2, LJ_TCDATA - | daddu RA, BASE, RA - | settp TMP0, TMP2 - | sd TMP0, 0(RA) - | ins_next2 - |.endif - break; - case BC_KSHORT: - | // RA = dst*8, RD = int16_literal*8 - | sra RD, INS, 16 - | daddu RA, BASE, RA - | zextw RD, RD - | ins_next1 - | settp RD, TISNUM - | sd RD, 0(RA) - | ins_next2 - break; - case BC_KNUM: - | // RA = dst*8, RD = num_const*8 - | daddu RD, KBASE, RD - | daddu RA, BASE, RA - | ld CRET1, 0(RD) - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - break; - case BC_KPRI: - | // RA = dst*8, RD = primitive_type*8 (~) - | daddu RA, BASE, RA - | dsll TMP0, RD, 44 - | not TMP0, TMP0 - | ins_next1 - | sd TMP0, 0(RA) - | ins_next2 - break; - case BC_KNIL: - | // RA = base*8, RD = end*8 - | daddu RA, BASE, RA - | sd TISNIL, 0(RA) - | daddiu RA, RA, 8 - | daddu RD, BASE, RD - |1: - | sd TISNIL, 0(RA) - | slt AT, RA, RD - | bnez AT, <1 - |. daddiu RA, RA, 8 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst*8, RD = uvnum*8 - | ld LFUNC:RB, FRAME_FUNC(BASE) - | daddu RA, BASE, RA - | cleartp LFUNC:RB - | daddu RD, RD, LFUNC:RB - | ld UPVAL:RB, LFUNC:RD->uvptr - | ins_next1 - | ld TMP1, UPVAL:RB->v - | ld CRET1, 0(TMP1) - | sd CRET1, 0(RA) - | ins_next2 - break; - case BC_USETV: - | // RA = uvnum*8, RD = src*8 - | ld LFUNC:RB, FRAME_FUNC(BASE) - | daddu RD, BASE, RD - | cleartp LFUNC:RB - | daddu RA, RA, LFUNC:RB - | ld UPVAL:RB, LFUNC:RA->uvptr - | ld CRET1, 0(RD) - | lbu TMP3, UPVAL:RB->marked - | ld CARG2, UPVAL:RB->v - | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbu TMP0, UPVAL:RB->closed - | gettp TMP2, RD - | sd CRET1, 0(CARG2) - | li AT, LJ_GC_BLACK|1 - | or TMP3, TMP3, TMP0 - | beq TMP3, AT, >2 // Upvalue is closed and black? - |. daddiu TMP2, TMP2, -(LJ_TNUMX+1) - |1: - | ins_next - | - |2: // Check if new value is collectable. - | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) - | beqz AT, <1 // tvisgcv(v) - |. cleartp GCOBJ:TMP1, RB - | lbu TMP3, GCOBJ:TMP1->gch.marked - | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) - | beqz TMP3, <1 - |. load_got lj_gc_barrieruv - | // Crossed a write barrier. Move the barrier forward. - | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) - |. daddiu CARG1, DISPATCH, GG_DISP2G - | b <1 - |. nop - break; - case BC_USETS: - | // RA = uvnum*8, RD = str_const*8 (~) - | ld LFUNC:RB, FRAME_FUNC(BASE) - | dsubu TMP1, KBASE, RD - | cleartp LFUNC:RB - | daddu RA, RA, LFUNC:RB - | ld UPVAL:RB, LFUNC:RA->uvptr - | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 - | lbu TMP2, UPVAL:RB->marked - | ld CARG2, UPVAL:RB->v - | lbu TMP3, STR:TMP1->marked - | andi AT, TMP2, LJ_GC_BLACK // isblack(uv) - | lbu TMP2, UPVAL:RB->closed - | li TMP0, LJ_TSTR - | settp TMP1, TMP0 - | bnez AT, >2 - |. sd TMP1, 0(CARG2) - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | beqz TMP2, <1 - |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str) - | beqz AT, <1 - |. load_got lj_gc_barrieruv - | // Crossed a write barrier. Move the barrier forward. - | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) - |. daddiu CARG1, DISPATCH, GG_DISP2G - | b <1 - |. nop - break; - case BC_USETN: - | // RA = uvnum*8, RD = num_const*8 - | ld LFUNC:RB, FRAME_FUNC(BASE) - | daddu RD, KBASE, RD - | cleartp LFUNC:RB - | daddu RA, RA, LFUNC:RB - | ld UPVAL:RB, LFUNC:RA->uvptr - | ld CRET1, 0(RD) - | ld TMP1, UPVAL:RB->v - | ins_next1 - | sd CRET1, 0(TMP1) - | ins_next2 - break; - case BC_USETP: - | // RA = uvnum*8, RD = primitive_type*8 (~) - | ld LFUNC:RB, FRAME_FUNC(BASE) - | dsll TMP0, RD, 44 - | cleartp LFUNC:RB - | daddu RA, RA, LFUNC:RB - | not TMP0, TMP0 - | ld UPVAL:RB, LFUNC:RA->uvptr - | ins_next1 - | ld TMP1, UPVAL:RB->v - | sd TMP0, 0(TMP1) - | ins_next2 - break; - - case BC_UCLO: - | // RA = level*8, RD = target - | ld TMP2, L->openupval - | branch_RD // Do this first since RD is not saved. - | load_got lj_func_closeuv - | sd BASE, L->base - | beqz TMP2, >1 - |. move CARG1, L - | call_intern lj_func_closeuv // (lua_State *L, TValue *level) - |. daddu CARG2, BASE, RA - | ld BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) - | load_got lj_func_newL_gc - | dsubu TMP1, KBASE, RD - | ld CARG3, FRAME_FUNC(BASE) - | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8 - | sd BASE, L->base - | sd PC, SAVE_PC - | cleartp CARG3 - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | call_intern lj_func_newL_gc - |. move CARG1, L - | // Returns GCfuncL *. - | li TMP0, LJ_TFUNC - | ld BASE, L->base - | ins_next1 - | settp CRET1, TMP0 - | daddu RA, BASE, RA - | sd CRET1, 0(RA) - | ins_next2 - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) - | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | sd BASE, L->base - | sd PC, SAVE_PC - | sltu AT, TMP0, TMP1 - | beqz AT, >5 - |1: - if (op == BC_TNEW) { - | load_got lj_tab_new - | srl CARG2, RD, 3 - | andi CARG2, CARG2, 0x7ff - | li TMP0, 0x801 - | addiu AT, CARG2, -0x7ff - | srl CARG3, RD, 14 - | movz CARG2, TMP0, AT - | // (lua_State *L, int32_t asize, uint32_t hbits) - | call_intern lj_tab_new - |. move CARG1, L - | // Returns Table *. - } else { - | load_got lj_tab_dup - | dsubu TMP1, KBASE, RD - | move CARG1, L - | call_intern lj_tab_dup // (lua_State *L, Table *kt) - |. ld CARG2, -8(TMP1) // KBASE-8-str_const*8 - | // Returns Table *. - } - | li TMP0, LJ_TTAB - | ld BASE, L->base - | ins_next1 - | daddu RA, BASE, RA - | settp CRET1, TMP0 - | sd CRET1, 0(RA) - | ins_next2 - |5: - | load_got lj_gc_step_fixtop - | move MULTRES, RD - | call_intern lj_gc_step_fixtop // (lua_State *L) - |. move CARG1, L - | b <1 - |. move RD, MULTRES - break; - - case BC_GGET: - | // RA = dst*8, RD = str_const*8 (~) - case BC_GSET: - | // RA = src*8, RD = str_const*8 (~) - | ld LFUNC:TMP2, FRAME_FUNC(BASE) - | dsubu TMP1, KBASE, RD - | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8 - | cleartp LFUNC:TMP2 - | ld TAB:RB, LFUNC:TMP2->env - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - |. daddu RA, BASE, RA - break; - - case BC_TGETV: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | daddu CARG2, BASE, RB - | daddu CARG3, BASE, RC - | ld TAB:RB, 0(CARG2) - | ld TMP2, 0(CARG3) - | daddu RA, BASE, RA - | checktab TAB:RB, ->vmeta_tgetv - | gettp TMP3, TMP2 - | bne TMP3, TISNUM, >5 // Integer key? - |. lw TMP0, TAB:RB->asize - | sextw TMP2, TMP2 - | ld TMP1, TAB:RB->array - | sltu AT, TMP2, TMP0 - | sll TMP2, TMP2, 3 - | beqz AT, ->vmeta_tgetv // Integer key and in array part? - |. daddu TMP2, TMP1, TMP2 - | ld AT, 0(TMP2) - | beq AT, TISNIL, >2 - |. ld CRET1, 0(TMP2) - |1: - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - | - |2: // Check for __index if table value is nil. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_tgetv - |. nop - | - |5: - | li AT, LJ_TSTR - | bne TMP3, AT, ->vmeta_tgetv - |. cleartp RC, TMP2 - | b ->BC_TGETS_Z // String key? - |. nop - break; - case BC_TGETS: - | // RA = dst*8, RB = table*8, RC = str_const*8 (~) - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RC8a RC, INS - | daddu CARG2, BASE, RB - | decode_RC8b RC - | ld TAB:RB, 0(CARG2) - | dsubu CARG3, KBASE, RC - | daddu RA, BASE, RA - | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8 - | checktab TAB:RB, ->vmeta_tgets1 - |->BC_TGETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | ld NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | li TMP3, LJ_TSTR - | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - | settp STR:RC, TMP3 // Tagged key to look for. - |1: - | ld CARG1, NODE:TMP2->key - | ld CRET1, NODE:TMP2->val - | ld NODE:TMP1, NODE:TMP2->next - | bne CARG1, RC, >4 - |. ld TAB:TMP3, TAB:RB->metatable - | beq CRET1, TISNIL, >5 // Key found, but nil value? - |. nop - |3: - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - | - |4: // Follow hash chain. - | bnez NODE:TMP1, <1 - |. move NODE:TMP2, NODE:TMP1 - | // End of hash chain: key not found, nil result. - | - |5: // Check for __index if table value is nil. - | beqz TAB:TMP3, <3 // No metatable: done. - |. move CRET1, TISNIL - | lbu TMP0, TAB:TMP3->nomm - | andi TMP0, TMP0, 1<vmeta_tgets - |. nop - break; - case BC_TGETB: - | // RA = dst*8, RB = table*8, RC = index*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | daddu CARG2, BASE, RB - | decode_RDtoRC8 RC, RD - | ld TAB:RB, 0(CARG2) - | daddu RA, BASE, RA - | srl TMP0, RC, 3 - | checktab TAB:RB, ->vmeta_tgetb - | lw TMP1, TAB:RB->asize - | ld TMP2, TAB:RB->array - | sltu AT, TMP0, TMP1 - | beqz AT, ->vmeta_tgetb - |. daddu RC, TMP2, RC - | ld AT, 0(RC) - | beq AT, TISNIL, >5 - |. ld CRET1, 0(RC) - |1: - | ins_next1 - | sd CRET1, 0(RA) - | ins_next2 - | - |5: // Check for __index if table value is nil. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP1, TAB:TMP2->nomm - | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! - |. nop - break; - case BC_TGETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | daddu RB, BASE, RB - | daddu RC, BASE, RC - | ld TAB:CARG1, 0(RB) - | lw CARG2, LO(RC) - | daddu RA, BASE, RA - | cleartp TAB:CARG1 - | lw TMP0, TAB:CARG1->asize - | ld TMP1, TAB:CARG1->array - | sltu AT, CARG2, TMP0 - | sll TMP2, CARG2, 3 - | beqz AT, ->vmeta_tgetr // In array part? - |. daddu CRET1, TMP1, TMP2 - | ld CARG2, 0(CRET1) - |->BC_TGETR_Z: - | ins_next1 - | sd CARG2, 0(RA) - | ins_next2 - break; - - case BC_TSETV: - | // RA = src*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | daddu CARG2, BASE, RB - | daddu CARG3, BASE, RC - | ld RB, 0(CARG2) - | ld TMP2, 0(CARG3) - | daddu RA, BASE, RA - | checktab RB, ->vmeta_tsetv - | checkint TMP2, >5 - |. sextw RC, TMP2 - | lw TMP0, TAB:RB->asize - | ld TMP1, TAB:RB->array - | sltu AT, RC, TMP0 - | sll TMP2, RC, 3 - | beqz AT, ->vmeta_tsetv // Integer key and in array part? - |. daddu TMP1, TMP1, TMP2 - | ld TMP0, 0(TMP1) - | lbu TMP3, TAB:RB->marked - | beq TMP0, TISNIL, >3 - |. ld CRET1, 0(RA) - |1: - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |. sd CRET1, 0(TMP1) - |2: - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP2, TAB:TMP2->nomm - | andi TMP2, TMP2, 1<vmeta_tsetv - |. nop - | - |5: - | gettp AT, TMP2 - | daddiu AT, AT, -LJ_TSTR - | bnez AT, ->vmeta_tsetv - |. nop - | b ->BC_TSETS_Z // String key? - |. cleartp STR:RC, TMP2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 - break; - case BC_TSETS: - | // RA = src*8, RB = table*8, RC = str_const*8 (~) - | decode_RB8a RB, INS - | decode_RB8b RB - | daddu CARG2, BASE, RB - | decode_RC8a RC, INS - | ld TAB:RB, 0(CARG2) - | decode_RC8b RC - | dsubu CARG3, KBASE, RC - | ld RC, -8(CARG3) // KBASE-8-str_const*8 - | daddu RA, BASE, RA - | cleartp STR:RC - | checktab TAB:RB, ->vmeta_tsets1 - |->BC_TSETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 - | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash - | ld NODE:TMP2, TAB:RB->node - | sb r0, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | sll TMP0, TMP1, 5 - | sll TMP1, TMP1, 3 - | subu TMP1, TMP0, TMP1 - | li TMP3, LJ_TSTR - | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - | settp STR:RC, TMP3 // Tagged key to look for. - |.if FPU - | ldc1 f20, 0(RA) - |.else - | ld CRET1, 0(RA) - |.endif - |1: - | ld TMP0, NODE:TMP2->key - | ld CARG2, NODE:TMP2->val - | ld NODE:TMP1, NODE:TMP2->next - | bne TMP0, RC, >5 - |. lbu TMP3, TAB:RB->marked - | beq CARG2, TISNIL, >4 // Key found, but nil value? - |. ld TAB:TMP0, TAB:RB->metatable - |2: - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |.if FPU - |. sdc1 f20, NODE:TMP2->val - |.else - |. sd CRET1, NODE:TMP2->val - |.endif - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | beqz TAB:TMP0, <2 // No metatable: done. - |. nop - | lbu TMP0, TAB:TMP0->nomm - | andi TMP0, TMP0, 1<vmeta_tsets - |. nop - | - |5: // Follow hash chain. - | bnez NODE:TMP1, <1 - |. move NODE:TMP2, NODE:TMP1 - | // End of hash chain: key not found, add a new one - | - | // But check for __newindex first. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, >6 // No metatable: continue. - |. daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | lbu TMP0, TAB:TMP2->nomm - | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |6: - | load_got lj_tab_newkey - | sd RC, 0(CARG3) - | sd BASE, L->base - | move CARG2, TAB:RB - | sd PC, SAVE_PC - | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k - |. move CARG1, L - | // Returns TValue *. - | ld BASE, L->base - |.if FPU - | b <3 // No 2nd write barrier needed. - |. sdc1 f20, 0(CRET1) - |.else - | ld CARG1, 0(RA) - | b <3 // No 2nd write barrier needed. - |. sd CARG1, 0(CRET1) - |.endif - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <3 - break; - case BC_TSETB: - | // RA = src*8, RB = table*8, RC = index*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | daddu CARG2, BASE, RB - | decode_RDtoRC8 RC, RD - | ld TAB:RB, 0(CARG2) - | daddu RA, BASE, RA - | srl TMP0, RC, 3 - | checktab RB, ->vmeta_tsetb - | lw TMP1, TAB:RB->asize - | ld TMP2, TAB:RB->array - | sltu AT, TMP0, TMP1 - | beqz AT, ->vmeta_tsetb - |. daddu RC, TMP2, RC - | ld TMP1, 0(RC) - | lbu TMP3, TAB:RB->marked - | beq TMP1, TISNIL, >5 - |1: - |. ld CRET1, 0(RA) - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |. sd CRET1, 0(RC) - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | ld TAB:TMP2, TAB:RB->metatable - | beqz TAB:TMP2, <1 // No metatable: done. - |. nop - | lbu TMP1, TAB:TMP2->nomm - | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! - |. nop - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0, <2 - break; - case BC_TSETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | daddu CARG1, BASE, RB - | daddu CARG3, BASE, RC - | ld TAB:CARG2, 0(CARG1) - | lw CARG3, LO(CARG3) - | cleartp TAB:CARG2 - | lbu TMP3, TAB:CARG2->marked - | lw TMP0, TAB:CARG2->asize - | ld TMP1, TAB:CARG2->array - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |. daddu RA, BASE, RA - |2: - | sltu AT, CARG3, TMP0 - | sll TMP2, CARG3, 3 - | beqz AT, ->vmeta_tsetr // In array part? - |. daddu CRET1, TMP1, TMP2 - |->BC_TSETR_Z: - | ld CARG1, 0(RA) - | ins_next1 - | sd CARG1, 0(CRET1) - | ins_next2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0, <2 - break; - - case BC_TSETM: - | // RA = base*8 (table at base-1), RD = num_const*8 (start index) - | daddu RA, BASE, RA - |1: - | daddu TMP3, KBASE, RD - | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. - | addiu TMP0, MULTRES, -8 - | lw TMP3, LO(TMP3) // Integer constant is in lo-word. - | beqz TMP0, >4 // Nothing to copy? - |. srl CARG3, TMP0, 3 - | cleartp CARG2 - | addu CARG3, CARG3, TMP3 - | lw TMP2, TAB:CARG2->asize - | sll TMP1, TMP3, 3 - | lbu TMP3, TAB:CARG2->marked - | ld CARG1, TAB:CARG2->array - | sltu AT, TMP2, CARG3 - | bnez AT, >5 - |. daddu TMP2, RA, TMP0 - | daddu TMP1, TMP1, CARG1 - | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) - |3: // Copy result slots to table. - | ld CRET1, 0(RA) - | daddiu RA, RA, 8 - | sltu AT, RA, TMP2 - | sd CRET1, 0(TMP1) - | bnez AT, <3 - |. daddiu TMP1, TMP1, 8 - | bnez TMP0, >7 - |. nop - |4: - | ins_next - | - |5: // Need to resize array part. - | load_got lj_tab_reasize - | sd BASE, L->base - | sd PC, SAVE_PC - | move BASE, RD - | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - |. move CARG1, L - | // Must not reallocate the stack. - | move RD, BASE - | b <1 - |. ld BASE, L->base // Reload BASE for lack of a saved register. - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0, <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 - | decode_RDtoRC8 NARGS8:RC, RD - | b ->BC_CALL_Z - |. addu NARGS8:RC, NARGS8:RC, MULTRES - break; - case BC_CALL: - | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 - | decode_RDtoRC8 NARGS8:RC, RD - |->BC_CALL_Z: - | move TMP2, BASE - | daddu BASE, BASE, RA - | ld LFUNC:RB, 0(BASE) - | daddiu BASE, BASE, 16 - | addiu NARGS8:RC, NARGS8:RC, -8 - | checkfunc RB, ->vmeta_call - | ins_call - break; - - case BC_CALLMT: - | // RA = base*8, (RB = 0,) RC = extra_nargs*8 - | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD. - | // Fall through. Assumes BC_CALLT follows. - break; - case BC_CALLT: - | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - | daddu RA, BASE, RA - | ld RB, 0(RA) - | move NARGS8:RC, RD - | ld TMP1, FRAME_PC(BASE) - | daddiu RA, RA, 16 - | addiu NARGS8:RC, NARGS8:RC, -8 - | checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt - |->BC_CALLT_Z: - | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. - | lbu TMP3, LFUNC:CARG3->ffid - | bnez TMP0, >7 - |. xori TMP2, TMP1, FRAME_VARG - |1: - | sd RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. - | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function? - | move TMP2, BASE - | move RB, CARG3 - | beqz NARGS8:RC, >3 - |. move TMP3, NARGS8:RC - |2: - | ld CRET1, 0(RA) - | daddiu RA, RA, 8 - | addiu TMP3, TMP3, -8 - | sd CRET1, 0(TMP2) - | bnez TMP3, <2 - |. daddiu TMP2, TMP2, 8 - |3: - | or TMP0, TMP0, AT - | beqz TMP0, >5 - |. nop - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | lw INS, -4(TMP1) - | decode_RA8a RA, INS - | decode_RA8b RA - | dsubu TMP1, BASE, RA - | ld TMP1, -32(TMP1) - | cleartp LFUNC:TMP1 - | ld TMP1, LFUNC:TMP1->pc - | b <4 - |. ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. - | - |7: // Tailcall from a vararg function. - | andi AT, TMP2, FRAME_TYPEP - | bnez AT, <1 // Vararg frame below? - |. dsubu TMP2, BASE, TMP2 // Relocate BASE down. - | move BASE, TMP2 - | ld TMP1, FRAME_PC(TMP2) - | b <1 - |. andi TMP0, TMP1, FRAME_TYPE - break; - - case BC_ITERC: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) - | move TMP2, BASE // Save old BASE fir vmeta_call. - | daddu BASE, BASE, RA - | ld RB, -24(BASE) - | ld CARG1, -16(BASE) - | ld CARG2, -8(BASE) - | li NARGS8:RC, 16 // Iterators get 2 arguments. - | sd RB, 0(BASE) // Copy callable. - | sd CARG1, 16(BASE) // Copy state. - | sd CARG2, 24(BASE) // Copy control var. - | daddiu BASE, BASE, 16 - | checkfunc RB, ->vmeta_call - | ins_call - break; - - case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | daddu RA, BASE, RA - | ld TAB:RB, -16(RA) - | lw RC, -8+LO(RA) // Get index from control var. - | cleartp TAB:RB - | daddiu PC, PC, 4 - | lw TMP0, TAB:RB->asize - | ld TMP1, TAB:RB->array - | dsll CARG3, TISNUM, 47 - |1: // Traverse array part. - | sltu AT, RC, TMP0 - | beqz AT, >5 // Index points after array part? - |. sll TMP3, RC, 3 - | daddu TMP3, TMP1, TMP3 - | ld CARG1, 0(TMP3) - | lhu RD, -4+OFS_RD(PC) - | or TMP2, RC, CARG3 - | beq CARG1, TISNIL, <1 // Skip holes in array part. - |. addiu RC, RC, 1 - | sd TMP2, 0(RA) - | sd CARG1, 8(RA) - | or TMP0, RC, CARG3 - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | decode_RD4b RD - | daddu RD, RD, TMP3 - | sw TMP0, -8+LO(RA) // Update control var. - | daddu PC, PC, RD - |3: - | ins_next - | - |5: // Traverse hash part. - | lw TMP1, TAB:RB->hmask - | subu RC, RC, TMP0 - | ld TMP2, TAB:RB->node - |6: - | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1. - | bnez AT, <3 - |. sll TMP3, RC, 5 - | sll RB, RC, 3 - | subu TMP3, TMP3, RB - | daddu NODE:TMP3, TMP3, TMP2 - | ld CARG1, 0(NODE:TMP3) - | lhu RD, -4+OFS_RD(PC) - | beq CARG1, TISNIL, <6 // Skip holes in hash part. - |. addiu RC, RC, 1 - | ld CARG2, NODE:TMP3->key - | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sd CARG1, 8(RA) - | addu RC, RC, TMP0 - | decode_RD4b RD - | addu RD, RD, TMP3 - | sd CARG2, 0(RA) - | daddu PC, PC, RD - | b <3 - |. sw RC, -8+LO(RA) // Update control var. - break; - - case BC_ISNEXT: - | // RA = base*8, RD = target (points to ITERN) - | daddu RA, BASE, RA - | srl TMP0, RD, 1 - | ld CFUNC:CARG1, -24(RA) - | daddu TMP0, PC, TMP0 - | ld CARG2, -16(RA) - | ld CARG3, -8(RA) - | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) - | checkfunc CFUNC:CARG1, >5 - | gettp CARG2, CARG2 - | daddiu CARG2, CARG2, -LJ_TTAB - | lbu TMP1, CFUNC:CARG1->ffid - | daddiu CARG3, CARG3, -LJ_TNIL - | or AT, CARG2, CARG3 - | daddiu TMP1, TMP1, -FF_next_N - | or AT, AT, TMP1 - | bnez AT, >5 - |. lui TMP1, 0xfffe - | daddu PC, TMP0, TMP2 - | ori TMP1, TMP1, 0x7fff - | dsll TMP1, TMP1, 32 - | sd TMP1, -8(RA) - |1: - | ins_next - |5: // Despecialize bytecode if any of the checks fail. - | li TMP3, BC_JMP - | li TMP1, BC_ITERC - | sb TMP3, -4+OFS_OP(PC) - | daddu PC, TMP0, TMP2 - | b <1 - |. sb TMP1, OFS_OP(PC) - break; - - case BC_VARG: - | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 - | ld TMP0, FRAME_PC(BASE) - | decode_RDtoRC8 RC, RD - | decode_RB8a RB, INS - | daddu RC, BASE, RC - | decode_RB8b RB - | daddu RA, BASE, RA - | daddiu RC, RC, FRAME_VARG - | daddu TMP2, RA, RB - | daddiu TMP3, BASE, -16 // TMP3 = vtop - | dsubu RC, RC, TMP0 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | beqz RB, >5 // Copy all varargs? - |. dsubu TMP1, TMP3, RC - | daddiu TMP2, TMP2, -16 - |1: // Copy vararg slots to destination slots. - | ld CARG1, 0(RC) - | sltu AT, RC, TMP3 - | daddiu RC, RC, 8 - | movz CARG1, TISNIL, AT - | sd CARG1, 0(RA) - | sltu AT, RA, TMP2 - | bnez AT, <1 - |. daddiu RA, RA, 8 - |3: - | ins_next - | - |5: // Copy all varargs. - | ld TMP0, L->maxstack - | blez TMP1, <3 // No vararg slots? - |. li MULTRES, 8 // MULTRES = (0+1)*8 - | daddu TMP2, RA, TMP1 - | sltu AT, TMP0, TMP2 - | bnez AT, >7 - |. daddiu MULTRES, TMP1, 8 - |6: - | ld CRET1, 0(RC) - | daddiu RC, RC, 8 - | sd CRET1, 0(RA) - | sltu AT, RC, TMP3 - | bnez AT, <6 // More vararg slots? - |. daddiu RA, RA, 8 - | b <3 - |. nop - | - |7: // Grow stack for varargs. - | load_got lj_state_growstack - | sd RA, L->top - | dsubu RA, RA, BASE - | sd BASE, L->base - | dsubu BASE, RC, BASE // Need delta, because BASE may change. - | sd PC, SAVE_PC - | srl CARG2, TMP1, 3 - | call_intern lj_state_growstack // (lua_State *L, int n) - |. move CARG1, L - | move RC, BASE - | ld BASE, L->base - | daddu RA, BASE, RA - | daddu RC, BASE, RC - | b <6 - |. daddiu TMP3, BASE, -16 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results*8, RD = extra_nresults*8 - | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. - | // Fall through. Assumes BC_RET follows. - break; - - case BC_RET: - | // RA = results*8, RD = (nresults+1)*8 - | ld PC, FRAME_PC(BASE) - | daddu RA, BASE, RA - | move MULTRES, RD - |1: - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->BC_RETV_Z - |. xori TMP1, PC, FRAME_VARG - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return - | lw INS, -4(PC) - | daddiu TMP2, BASE, -16 - | daddiu RC, RD, -8 - | decode_RA8a TMP0, INS - | decode_RB8a RB, INS - | decode_RA8b TMP0 - | decode_RB8b RB - | daddu TMP3, TMP2, RB - | beqz RC, >3 - |. dsubu BASE, TMP2, TMP0 - |2: - | ld CRET1, 0(RA) - | daddiu RA, RA, 8 - | daddiu RC, RC, -8 - | sd CRET1, 0(TMP2) - | bnez RC, <2 - |. daddiu TMP2, TMP2, 8 - |3: - | daddiu TMP3, TMP3, -8 - |5: - | sltu AT, TMP2, TMP3 - | bnez AT, >6 - |. ld LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | cleartp LFUNC:TMP1 - | ld TMP1, LFUNC:TMP1->pc - | ld KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | sd TISNIL, 0(TMP2) - | b <5 - |. daddiu TMP2, TMP2, 8 - | - |->BC_RETV_Z: // Non-standard return case. - | andi TMP2, TMP1, FRAME_TYPEP - | bnez TMP2, ->vm_return - |. nop - | // Return from vararg function: relocate BASE down. - | dsubu BASE, BASE, TMP1 - | b <1 - |. ld PC, FRAME_PC(BASE) - break; - - case BC_RET0: case BC_RET1: - | // RA = results*8, RD = (nresults+1)*8 - | ld PC, FRAME_PC(BASE) - | daddu RA, BASE, RA - | move MULTRES, RD - | andi TMP0, PC, FRAME_TYPE - | bnez TMP0, ->BC_RETV_Z - |. xori TMP1, PC, FRAME_VARG - | lw INS, -4(PC) - | daddiu TMP2, BASE, -16 - if (op == BC_RET1) { - | ld CRET1, 0(RA) - } - | decode_RB8a RB, INS - | decode_RA8a RA, INS - | decode_RB8b RB - | decode_RA8b RA - | dsubu BASE, TMP2, RA - if (op == BC_RET1) { - | sd CRET1, 0(TMP2) - } - |5: - | sltu AT, RD, RB - | bnez AT, >6 - |. ld TMP1, FRAME_FUNC(BASE) - | ins_next1 - | cleartp LFUNC:TMP1 - | ld TMP1, LFUNC:TMP1->pc - | ld KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | daddiu TMP2, TMP2, 8 - | daddiu RD, RD, 8 - | b <5 - if (op == BC_RET1) { - |. sd TISNIL, 0(TMP2) - } else { - |. sd TISNIL, -8(TMP2) - } - break; - - /* -- Loops and branches ------------------------------------------------ */ - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base*8, RD = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - | daddu RA, BASE, RA - | ld CARG1, FORL_IDX*8(RA) // IDX CARG1 - CARG3 type - | gettp CARG3, CARG1 - if (op != BC_JFORL) { - | srl RD, RD, 1 - | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) - | daddu TMP2, RD, TMP2 - } - if (!vk) { - | ld CARG2, FORL_STOP*8(RA) // STOP CARG2 - CARG4 type - | ld CRET1, FORL_STEP*8(RA) // STEP CRET1 - CRET2 type - | gettp CARG4, CARG2 - | bne CARG3, TISNUM, >5 - |. gettp CRET2, CRET1 - | bne CARG4, TISNUM, ->vmeta_for - |. sextw CARG3, CARG1 - | bne CRET2, TISNUM, ->vmeta_for - |. sextw CARG2, CARG2 - | dext AT, CRET1, 31, 0 - | slt CRET1, CARG2, CARG3 - | slt TMP1, CARG3, CARG2 - | movn CRET1, TMP1, AT - } else { - | bne CARG3, TISNUM, >5 - |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type - | ld CRET1, FORL_STOP*8(RA) // STOP CRET1 - CRET2 type - | sextw TMP3, CARG1 - | sextw CARG2, CARG2 - | sextw CRET1, CRET1 - | addu CARG1, TMP3, CARG2 - | xor TMP0, CARG1, TMP3 - | xor TMP1, CARG1, CARG2 - | and TMP0, TMP0, TMP1 - | slt TMP1, CARG1, CRET1 - | slt CRET1, CRET1, CARG1 - | slt AT, CARG2, r0 - | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. - | movn CRET1, TMP1, AT - | or CRET1, CRET1, TMP0 - | zextw CARG1, CARG1 - | settp CARG1, TISNUM - } - |1: - if (op == BC_FORI) { - | movz TMP2, r0, CRET1 - | daddu PC, PC, TMP2 - } else if (op == BC_JFORI) { - | daddu PC, PC, TMP2 - | lhu RD, -4+OFS_RD(PC) - } else if (op == BC_IFORL) { - | movn TMP2, r0, CRET1 - | daddu PC, PC, TMP2 - } - if (vk) { - | sd CARG1, FORL_IDX*8(RA) - } - | ins_next1 - | sd CARG1, FORL_EXT*8(RA) - |2: - if (op == BC_JFORI) { - | beqz CRET1, =>BC_JLOOP - |. decode_RD8b RD - } else if (op == BC_JFORL) { - | beqz CRET1, =>BC_JLOOP - } - | ins_next2 - | - |5: // FP loop. - |.if FPU - if (!vk) { - | ldc1 f0, FORL_IDX*8(RA) - | ldc1 f2, FORL_STOP*8(RA) - | sltiu TMP0, CARG3, LJ_TISNUM - | sltiu TMP1, CARG4, LJ_TISNUM - | sltiu AT, CRET2, LJ_TISNUM - | ld TMP3, FORL_STEP*8(RA) - | and TMP0, TMP0, TMP1 - | and AT, AT, TMP0 - | beqz AT, ->vmeta_for - |. slt TMP3, TMP3, r0 - | c.ole.d 0, f0, f2 - | c.ole.d 1, f2, f0 - | li CRET1, 1 - | movt CRET1, r0, 0 - | movt AT, r0, 1 - | b <1 - |. movn CRET1, AT, TMP3 - } else { - | ldc1 f0, FORL_IDX*8(RA) - | ldc1 f4, FORL_STEP*8(RA) - | ldc1 f2, FORL_STOP*8(RA) - | ld TMP3, FORL_STEP*8(RA) - | add.d f0, f0, f4 - | c.ole.d 0, f0, f2 - | c.ole.d 1, f2, f0 - | slt TMP3, TMP3, r0 - | li CRET1, 1 - | li AT, 1 - | movt CRET1, r0, 0 - | movt AT, r0, 1 - | movn CRET1, AT, TMP3 - if (op == BC_IFORL) { - | movn TMP2, r0, CRET1 - | daddu PC, PC, TMP2 - } - | sdc1 f0, FORL_IDX*8(RA) - | ins_next1 - | b <2 - |. sdc1 f0, FORL_EXT*8(RA) - } - |.else - if (!vk) { - | sltiu TMP0, CARG3, LJ_TISNUM - | sltiu TMP1, CARG4, LJ_TISNUM - | sltiu AT, CRET2, LJ_TISNUM - | and TMP0, TMP0, TMP1 - | and AT, AT, TMP0 - | beqz AT, ->vmeta_for - |. nop - | bal ->vm_sfcmpolex - |. lw TMP3, FORL_STEP*8+HI(RA) - | b <1 - |. nop - } else { - | load_got __adddf3 - | call_extern - |. sw TMP2, TMPD - | ld CARG2, FORL_STOP*8(RA) - | move CARG1, CRET1 - if ( op == BC_JFORL ) { - | lhu RD, -4+OFS_RD(PC) - | decode_RD8b RD - } - | bal ->vm_sfcmpolex - |. lw TMP3, FORL_STEP*8+HI(RA) - | b <1 - |. lw TMP2, TMPD - } - |.endif - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base*8, RD = target - | daddu RA, BASE, RA - | ld TMP1, 0(RA) - | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. - |. nop - if (op == BC_JITERL) { - | b =>BC_JLOOP - |. sd TMP1, -8(RA) - } else { - | branch_RD // Otherwise save control var + branch. - | sd TMP1, -8(RA) - } - |1: - | ins_next - break; - - case BC_LOOP: - | // RA = base*8, RD = target (loop extent) - | // Note: RA/RD is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base*8, RD = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base*8 (ignored), RD = traceno*8 - | ld TMP1, DISPATCH_J(trace)(DISPATCH) - | li AT, 0 - | daddu TMP1, TMP1, RD - | // Traces on MIPS don't store the trace number, so use 0. - | sd AT, DISPATCH_GL(vmstate)(DISPATCH) - | ld TRACE:TMP2, 0(TMP1) - | sd BASE, DISPATCH_GL(jit_base)(DISPATCH) - | ld TMP2, TRACE:TMP2->mcode - | sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH) - | jr TMP2 - |. daddiu JGL, DISPATCH, GG_DISP2G+32768 - |.endif - break; - - case BC_JMP: - | // RA = base*8 (only used by trace recorder), RD = target - | branch_RD - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | ld TMP2, L->maxstack - | lbu TMP1, -4+PC2PROTO(numparams)(PC) - | ld KBASE, -4+PC2PROTO(k)(PC) - | sltu AT, TMP2, RA - | bnez AT, ->vm_growstack_l - |. sll TMP1, TMP1, 3 - if (op != BC_JFUNCF) { - | ins_next1 - } - |2: - | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters. - | bnez AT, >3 - |. daddu AT, BASE, NARGS8:RC - if (op == BC_JFUNCF) { - | decode_RD8a RD, INS - | b =>BC_JLOOP - |. decode_RD8b RD - } else { - | ins_next2 - } - | - |3: // Clear missing parameters. - | sd TISNIL, 0(AT) - | b <2 - |. addiu NARGS8:RC, NARGS8:RC, 8 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | li TMP0, LJ_TFUNC - | daddu TMP1, BASE, RC - | ld TMP2, L->maxstack - | settp LFUNC:RB, TMP0 - | daddu TMP0, RA, RC - | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. - | daddiu TMP3, RC, 16+FRAME_VARG - | sltu AT, TMP0, TMP2 - | ld KBASE, -4+PC2PROTO(k)(PC) - | beqz AT, ->vm_growstack_l - |. sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. - | lbu TMP2, -4+PC2PROTO(numparams)(PC) - | move RA, BASE - | move RC, TMP1 - | ins_next1 - | beqz TMP2, >3 - |. daddiu BASE, TMP1, 16 - |1: - | ld TMP0, 0(RA) - | sltu AT, RA, RC // Less args than parameters? - | move CARG1, TMP0 - | movz TMP0, TISNIL, AT // Clear missing parameters. - | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). - | addiu TMP2, TMP2, -1 - | sd TMP0, 16(TMP1) - | daddiu TMP1, TMP1, 8 - | sd CARG1, 0(RA) - | bnez TMP2, <1 - |. daddiu RA, RA, 8 - |3: - | ins_next2 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | ld CFUNCADDR, CFUNC:RB->f - } else { - | ld CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH) - } - | daddu TMP1, RA, NARGS8:RC - | ld TMP2, L->maxstack - | daddu RC, BASE, NARGS8:RC - | sd BASE, L->base - | sltu AT, TMP2, TMP1 - | sd RC, L->top - | li_vmstate C - if (op == BC_FUNCCW) { - | ld CARG2, CFUNC:RB->f - } - | bnez AT, ->vm_growstack_c // Need to grow stack. - |. move CARG1, L - | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f]) - |. st_vmstate - | // Returns nresults. - | ld BASE, L->base - | sll RD, CRET1, 3 - | ld TMP1, L->top - | li_vmstate INTERP - | ld PC, FRAME_PC(BASE) // Fetch PC of caller. - | dsubu RA, TMP1, RD // RA = L->top - nresults*8 - | sd L, DISPATCH_GL(cur_L)(DISPATCH) - | b ->vm_returnc - |. st_vmstate - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.4byte .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.4byte 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 31\n" - "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.4byte .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.4byte .Lframe0\n" - "\t.8byte .Lbegin\n" - "\t.8byte %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x9f\n\t.sleb128 2*5\n" - "\t.byte 0x9e\n\t.sleb128 2*6\n", - fcofs, CFRAME_SIZE); - for (i = 23; i >= 16; i--) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i)); -#if !LJ_SOFTFP - for (i = 31; i >= 24; i--) - fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i)); -#endif - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.4byte .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.4byte .Lframe0\n" - "\t.4byte lj_vm_ffi_call\n" - "\t.4byte %d\n" - "\t.byte 0x9f\n\t.uleb128 2*1\n" - "\t.byte 0x90\n\t.uleb128 2*2\n" - "\t.byte 0xd\n\t.uleb128 0x10\n" - "\t.align 2\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif -#if !LJ_NO_UNWIND - /* NYI */ -#endif - break; - default: - break; - } -} - diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc deleted file mode 100644 index b4260ebc10..0000000000 --- a/src/vm_ppc.dasc +++ /dev/null @@ -1,5248 +0,0 @@ -|// Low-level VM code for PowerPC 32 bit or 32on64 bit mode. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -| -|.arch ppc -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// DynASM defines used by the PPC port: -|// -|// P64 64 bit pointers (only for GPR64 testing). -|// Note: see vm_ppc64.dasc for a full PPC64 _LP64 port. -|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). -|// Affects reg saves, stack layout, carry/overflow/dot flags etc. -|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). -|// TOC Need table of contents (64 bit or 32 bit variant, e.g. PS3). -|// Function pointers are really a struct: code, TOC, env (optional). -|// TOCENV Function pointers have an environment pointer, too (not on PS3). -|// PPE Power Processor Element of Cell (PS3) or Xenon (Xbox 360). -|// Must avoid (slow) micro-coded instructions. -| -|.if P64 -|.define TOC, 1 -|.define TOCENV, 1 -|.macro lpx, a, b, c; ldx a, b, c; .endmacro -|.macro lp, a, b; ld a, b; .endmacro -|.macro stp, a, b; std a, b; .endmacro -|.define decode_OPP, decode_OP8 -|.if FFI -|// Missing: Calling conventions, 64 bit regs, TOC. -|.error lib_ffi not yet implemented for PPC64 -|.endif -|.else -|.macro lpx, a, b, c; lwzx a, b, c; .endmacro -|.macro lp, a, b; lwz a, b; .endmacro -|.macro stp, a, b; stw a, b; .endmacro -|.define decode_OPP, decode_OP4 -|.endif -| -|// Convenience macros for TOC handling. -|.if TOC -|// Linker needs a TOC patch area for every external call relocation. -|.macro blex, target; bl extern target@plt; nop; .endmacro -|.macro .toc, a, b; a, b; .endmacro -|.if P64 -|.define TOC_OFS, 8 -|.define ENV_OFS, 16 -|.else -|.define TOC_OFS, 4 -|.define ENV_OFS, 8 -|.endif -|.else // No TOC. -|.macro blex, target; bl extern target@plt; .endmacro -|.macro .toc, a, b; .endmacro -|.endif -|.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro -| -|.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro -| -|.macro andix., y, a, i -|.if PPE -| rlwinm y, a, 0, 31-lj_fls(i), 31-lj_ffs(i) -| cmpwi y, 0 -|.else -| andi. y, a, i -|.endif -|.endmacro -| -|.macro clrso, reg -|.if PPE -| li reg, 0 -| mtxer reg -|.else -| mcrxr cr0 -|.endif -|.endmacro -| -|.macro checkov, reg, noov -|.if PPE -| mfxer reg -| add reg, reg, reg -| cmpwi reg, 0 -| li reg, 0 -| mtxer reg -| bgey noov -|.else -| mcrxr cr0 -| bley noov -|.endif -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Fixed register assignments for the interpreter. -|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) -| -|// The following must be C callee-save (but BASE is often refetched). -|.define BASE, r14 // Base of current Lua stack frame. -|.define KBASE, r15 // Constants of current Lua function. -|.define PC, r16 // Next PC. -|.define DISPATCH, r17 // Opcode dispatch table. -|.define LREG, r18 // Register holding lua_State (also in SAVE_L). -|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8. -|.define JGL, r31 // On-trace: global_State + 32768. -| -|// Constants for type-comparisons, stores and conversions. C callee-save. -|.define TISNUM, r22 -|.define TISNIL, r23 -|.define ZERO, r24 -|.define TOBIT, f30 // 2^52 + 2^51. -|.define TONUM, f31 // 2^52 + 2^51 + 2^31. -| -|// The following temporaries are not saved across C calls, except for RA. -|.define RA, r20 // Callee-save. -|.define RB, r10 -|.define RC, r11 -|.define RD, r12 -|.define INS, r7 // Overlaps CARG5. -| -|.define TMP0, r0 -|.define TMP1, r8 -|.define TMP2, r9 -|.define TMP3, r6 // Overlaps CARG4. -| -|// Saved temporaries. -|.define SAVE0, r21 -| -|// Calling conventions. -|.define CARG1, r3 -|.define CARG2, r4 -|.define CARG3, r5 -|.define CARG4, r6 // Overlaps TMP3. -|.define CARG5, r7 // Overlaps INS. -| -|.define FARG1, f1 -|.define FARG2, f2 -| -|.define CRET1, r3 -|.define CRET2, r4 -| -|.define TOCREG, r2 // TOC register (only used by C code). -|.define ENVREG, r11 // Environment pointer (nested C functions). -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|.if GPR64 -|.if FRAME32 -| -|// 456(sp) // \ 32/64 bit C frame info -|.define TONUM_LO, 452(sp) // | -|.define TONUM_HI, 448(sp) // | -|.define TMPD_LO, 444(sp) // | -|.define TMPD_HI, 440(sp) // | -|.define SAVE_CR, 432(sp) // | 64 bit CR save. -|.define SAVE_ERRF, 424(sp) // > Parameter save area. -|.define SAVE_NRES, 420(sp) // | -|.define SAVE_L, 416(sp) // | -|.define SAVE_PC, 412(sp) // | -|.define SAVE_MULTRES, 408(sp) // | -|.define SAVE_CFRAME, 400(sp) // / 64 bit C frame chain. -|// 392(sp) // Reserved. -|.define CFRAME_SPACE, 384 // Delta for sp. -|// Back chain for sp: 384(sp) <-- sp entering interpreter -|.define SAVE_LR, 376(sp) // 32 bit LR stored in hi-part. -|.define SAVE_GPR_, 232 // .. 232+18*8: 64 bit GPR saves. -|.define SAVE_FPR_, 88 // .. 88+18*8: 64 bit FPR saves. -|// 80(sp) // Needed for 16 byte stack frame alignment. -|// 16(sp) // Callee parameter save area (ABI mandated). -|// 8(sp) // Reserved -|// Back chain for sp: 0(sp) <-- sp while in interpreter -|// 32 bit sp stored in hi-part of 0(sp). -| -|.define TMPD_BLO, 447(sp) -|.define TMPD, TMPD_HI -|.define TONUM_D, TONUM_HI -| -|.else -| -|// 508(sp) // \ 32 bit C frame info. -|.define SAVE_ERRF, 472(sp) // | -|.define SAVE_NRES, 468(sp) // | -|.define SAVE_L, 464(sp) // > Parameter save area. -|.define SAVE_PC, 460(sp) // | -|.define SAVE_MULTRES, 456(sp) // | -|.define SAVE_CFRAME, 448(sp) // / 64 bit C frame chain. -|.define SAVE_LR, 416(sp) -|.define CFRAME_SPACE, 400 // Delta for sp. -|// Back chain for sp: 400(sp) <-- sp entering interpreter -|.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves. -|.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves. -|// 48(sp) // Callee parameter save area (ABI mandated). -|.define SAVE_TOC, 40(sp) // TOC save area. -|.define TMPD_LO, 36(sp) // \ Link editor temp (ABI mandated). -|.define TMPD_HI, 32(sp) // / -|.define TONUM_LO, 28(sp) // \ Compiler temp (ABI mandated). -|.define TONUM_HI, 24(sp) // / -|// Next frame lr: 16(sp) -|.define SAVE_CR, 8(sp) // 64 bit CR save. -|// Back chain for sp: 0(sp) <-- sp while in interpreter -| -|.define TMPD_BLO, 39(sp) -|.define TMPD, TMPD_HI -|.define TONUM_D, TONUM_HI -| -|.endif -|.else -| -|.define SAVE_LR, 276(sp) -|.define CFRAME_SPACE, 272 // Delta for sp. -|// Back chain for sp: 272(sp) <-- sp entering interpreter -|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. -|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. -|.define SAVE_CR, 52(sp) // 32 bit CR save. -|.define SAVE_ERRF, 48(sp) // 32 bit C frame info. -|.define SAVE_NRES, 44(sp) -|.define SAVE_CFRAME, 40(sp) -|.define SAVE_L, 36(sp) -|.define SAVE_PC, 32(sp) -|.define SAVE_MULTRES, 28(sp) -|.define UNUSED1, 24(sp) -|.define TMPD_LO, 20(sp) -|.define TMPD_HI, 16(sp) -|.define TONUM_LO, 12(sp) -|.define TONUM_HI, 8(sp) -|// Next frame lr: 4(sp) -|// Back chain for sp: 0(sp) <-- sp while in interpreter -| -|.define TMPD_BLO, 23(sp) -|.define TMPD, TMPD_HI -|.define TONUM_D, TONUM_HI -| -|.endif -| -|.macro save_, reg -|.if GPR64 -| std r..reg, SAVE_GPR_+(reg-14)*8(sp) -|.else -| stw r..reg, SAVE_GPR_+(reg-14)*4(sp) -|.endif -| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) -|.endmacro -|.macro rest_, reg -|.if GPR64 -| ld r..reg, SAVE_GPR_+(reg-14)*8(sp) -|.else -| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) -|.endif -| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) -|.endmacro -| -|.macro saveregs -|.if GPR64 and not FRAME32 -| stdu sp, -CFRAME_SPACE(sp) -|.else -| stwu sp, -CFRAME_SPACE(sp) -|.endif -| save_ 14; save_ 15; save_ 16 -| mflr r0 -| save_ 17; save_ 18; save_ 19; save_ 20; save_ 21; save_ 22 -|.if GPR64 and not FRAME32 -| std r0, SAVE_LR -|.else -| stw r0, SAVE_LR -|.endif -| save_ 23; save_ 24; save_ 25 -| mfcr r0 -| save_ 26; save_ 27; save_ 28; save_ 29; save_ 30; save_ 31 -|.if GPR64 -| std r0, SAVE_CR -|.else -| stw r0, SAVE_CR -|.endif -| .toc std TOCREG, SAVE_TOC -|.endmacro -| -|.macro restoreregs -|.if GPR64 and not FRAME32 -| ld r0, SAVE_LR -|.else -| lwz r0, SAVE_LR -|.endif -|.if GPR64 -| ld r12, SAVE_CR -|.else -| lwz r12, SAVE_CR -|.endif -| rest_ 14; rest_ 15; rest_ 16; rest_ 17; rest_ 18; rest_ 19 -| mtlr r0; -|.if PPE; mtocrf 0x20, r12; .else; mtcrf 0x38, r12; .endif -| rest_ 20; rest_ 21; rest_ 22; rest_ 23; rest_ 24; rest_ 25 -|.if PPE; mtocrf 0x10, r12; .endif -| rest_ 26; rest_ 27; rest_ 28; rest_ 29; rest_ 30; rest_ 31 -|.if PPE; mtocrf 0x08, r12; .endif -| addi sp, sp, CFRAME_SPACE -|.endmacro -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|//----------------------------------------------------------------------- -| -|// Trap for not-yet-implemented parts. -|.macro NYI; tw 4, sp, sp; .endmacro -| -|// int/FP conversions. -|.macro tonum_i, freg, reg -| xoris reg, reg, 0x8000 -| stw reg, TONUM_LO -| lfd freg, TONUM_D -| fsub freg, freg, TONUM -|.endmacro -| -|.macro tonum_u, freg, reg -| stw reg, TONUM_LO -| lfd freg, TONUM_D -| fsub freg, freg, TOBIT -|.endmacro -| -|.macro toint, reg, freg, tmpfreg -| fctiwz tmpfreg, freg -| stfd tmpfreg, TMPD -| lwz reg, TMPD_LO -|.endmacro -| -|.macro toint, reg, freg -| toint reg, freg, freg -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Access to frame relative to BASE. -|.define FRAME_PC, -8 -|.define FRAME_FUNC, -4 -| -|// Instruction decode. -|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro -|.macro decode_OP8, dst, ins; rlwinm dst, ins, 3, 21, 28; .endmacro -|.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro -|.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro -|.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro -|.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro -| -|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro -|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro -| -|// Instruction fetch. -|.macro ins_NEXT1 -| lwz INS, 0(PC) -| addi PC, PC, 4 -|.endmacro -|// Instruction decode+dispatch. Note: optimized for e300! -|.macro ins_NEXT2 -| decode_OPP TMP1, INS -| lpx TMP0, DISPATCH, TMP1 -| mtctr TMP0 -| decode_RB8 RB, INS -| decode_RD8 RD, INS -| decode_RA8 RA, INS -| decode_RC8 RC, INS -| bctr -|.endmacro -|.macro ins_NEXT -| ins_NEXT1 -| ins_NEXT2 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -| .define ins_next1, ins_NEXT1 -| .define ins_next2, ins_NEXT2 -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next1 -| .endmacro -| .macro ins_next2 -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| lwz PC, LFUNC:RB->pc -| lwz INS, 0(PC) -| addi PC, PC, 4 -| decode_OPP TMP1, INS -| decode_RA8 RA, INS -| lpx TMP0, DISPATCH, TMP1 -| add RA, RA, BASE -| mtctr TMP0 -| bctr -|.endmacro -| -|.macro ins_call -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| stw PC, FRAME_PC(BASE) -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Macros to test operand types. -|.macro checknum, reg; cmplw reg, TISNUM; .endmacro -|.macro checknum, cr, reg; cmplw cr, reg, TISNUM; .endmacro -|.macro checkstr, reg; cmpwi reg, LJ_TSTR; .endmacro -|.macro checktab, reg; cmpwi reg, LJ_TTAB; .endmacro -|.macro checkfunc, reg; cmpwi reg, LJ_TFUNC; .endmacro -|.macro checknil, reg; cmpwi reg, LJ_TNIL; .endmacro -| -|.macro branch_RD -| srwi TMP0, RD, 1 -| addis PC, PC, -(BCBIAS_J*4 >> 16) -| add PC, PC, TMP0 -|.endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro hotcheck, delta, target -| rlwinm TMP1, PC, 31, 25, 30 -| addi TMP1, TMP1, GG_DISP2HOT -| lhzx TMP2, DISPATCH, TMP1 -| addic. TMP2, TMP2, -delta -| sthx TMP2, DISPATCH, TMP1 -| blt target -|.endmacro -| -|.macro hotloop -| hotcheck HOTCOUNT_LOOP, ->vm_hotloop -|.endmacro -| -|.macro hotcall -| hotcheck HOTCOUNT_CALL, ->vm_hotcall -|.endmacro -| -|// Set current VM state. Uses TMP0. -|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro -|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp -| lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) -| // Assumes LJ_GC_BLACK is 0x04. -| rlwinm mark, mark, 0, 30, 28 // black2gray(tab) -| stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH) -| stb mark, tab->marked -| stw tmp, tab->gclist -|.endmacro -| -|//----------------------------------------------------------------------- - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: TMP2 = previous base. - | andix. TMP0, PC, FRAME_P - | li TMP1, LJ_TTRUE - | beq ->cont_dispatch - | - | // Return from pcall or xpcall fast func. - | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame. - | mr BASE, TMP2 // Restore caller base. - | // Prepending may overwrite the pcall frame, so do it at the end. - | stwu TMP1, FRAME_PC(RA) // Prepend true to results. - | - |->vm_returnc: - | addi RD, RD, 8 // RD = (nresults+1)*8. - | andix. TMP0, PC, FRAME_TYPE - | cmpwi cr1, RD, 0 - | li CRET1, LUA_YIELD - | beq cr1, ->vm_unwind_c_eh - | mr MULTRES, RD - | beq ->BC_RET_Z // Handle regular return to Lua. - | - |->vm_return: - | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return - | // TMP0 = PC & FRAME_TYPE - | cmpwi TMP0, FRAME_C - | rlwinm TMP2, PC, 0, 0, 28 - | li_vmstate C - | sub TMP2, BASE, TMP2 // TMP2 = previous base. - | bney ->vm_returnp - | - | addic. TMP1, RD, -8 - | stp TMP2, L->base - | lwz TMP2, SAVE_NRES - | subi BASE, BASE, 8 - | st_vmstate - | slwi TMP2, TMP2, 3 - | beq >2 - |1: - | addic. TMP1, TMP1, -8 - | lfd f0, 0(RA) - | addi RA, RA, 8 - | stfd f0, 0(BASE) - | addi BASE, BASE, 8 - | bney <1 - | - |2: - | cmpw TMP2, RD // More/less results wanted? - | bne >6 - |3: - | stp BASE, L->top // Store new top. - | - |->vm_leave_cp: - | lp TMP0, SAVE_CFRAME // Restore previous C frame. - | li CRET1, 0 // Ok return status for vm_pcall. - | stp TMP0, L->cframe - | - |->vm_leave_unw: - | restoreregs - | blr - | - |6: - | ble >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - | lwz TMP1, L->maxstack - | cmplw BASE, TMP1 - | bge >8 - | stw TISNIL, 0(BASE) - | addi RD, RD, 8 - | addi BASE, BASE, 8 - | b <2 - | - |7: // Less results wanted. - | subfic TMP3, TMP2, 0 // LUA_MULTRET+1 case? - | sub TMP0, RD, TMP2 - | subfe TMP1, TMP1, TMP1 // TMP1 = TMP2 == 0 ? 0 : -1 - | and TMP0, TMP0, TMP1 - | sub BASE, BASE, TMP0 // Either keep top or shrink it. - | b <3 - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | stp BASE, L->top // Save current top held in BASE (yes). - | mr SAVE0, RD - | srwi CARG2, TMP2, 3 - | mr CARG1, L - | bl extern lj_state_growstack // (lua_State *L, int n) - | lwz TMP2, SAVE_NRES - | mr RD, SAVE0 - | slwi TMP2, TMP2, 3 - | lp BASE, L->top // Need the (realloced) L->top in BASE. - | b <2 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | mr sp, CARG1 - | mr CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | lwz L, SAVE_L - | .toc ld TOCREG, SAVE_TOC - | li TMP0, ~LJ_VMST_C - | lwz GL:TMP1, L->glref - | stw TMP0, GL:TMP1->vmstate - | b ->vm_leave_unw - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - |.if GPR64 - | rldicr sp, CARG1, 0, 61 - |.else - | rlwinm sp, CARG1, 0, 0, 29 - |.endif - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | lwz L, SAVE_L - | .toc ld TOCREG, SAVE_TOC - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp BASE, L->base - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | li ZERO, 0 - | stw TMP3, TMPD - | li TMP1, LJ_TFALSE - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | li TISNIL, LJ_TNIL - | li_vmstate INTERP - | lfs TOBIT, TMPD - | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. - | la RA, -8(BASE) // Results start at BASE-8. - | stw TMP3, TMPD - | addi DISPATCH, DISPATCH, GG_G2DISP - | stw TMP1, 0(RA) // Prepend false to error message. - | li RD, 16 // 2 results: false + error message. - | st_vmstate - | lfs TONUM, TMPD - | b ->vm_returnc - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | li CARG2, LUA_MINSTACK - | b >2 - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | add RC, BASE, RC - | sub RA, RA, BASE - | stp BASE, L->base - | addi PC, PC, 4 // Must point after first instruction. - | stp RC, L->top - | srwi CARG2, RA, 3 - |2: - | // L->base = new base, L->top = top - | stw PC, SAVE_PC - | mr CARG1, L - | bl extern lj_state_growstack // (lua_State *L, int n) - | lp BASE, L->base - | lp RC, L->top - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | sub RC, RC, BASE - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | mr L, CARG1 - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | mr BASE, CARG2 - | lbz TMP1, L->status - | stw L, SAVE_L - | li PC, FRAME_CP - | addi TMP0, sp, CFRAME_RESUME - | addi DISPATCH, DISPATCH, GG_G2DISP - | stw CARG3, SAVE_NRES - | cmplwi TMP1, 0 - | stw CARG3, SAVE_ERRF - | stp CARG3, SAVE_CFRAME - | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | stp TMP0, L->cframe - | beq >3 - | - | // Resume after yield (like a return). - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | mr RA, BASE - | lp BASE, L->base - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp TMP1, L->top - | lwz PC, FRAME_PC(BASE) - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | stb CARG3, L->status - | stw TMP3, TMPD - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD - | sub RD, TMP1, BASE - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | addi RD, RD, 8 - | stw TMP0, TONUM_HI - | li_vmstate INTERP - | li ZERO, 0 - | st_vmstate - | andix. TMP0, PC, FRAME_TYPE - | mr MULTRES, RD - | lfs TONUM, TMPD - | li TISNIL, LJ_TNIL - | beq ->BC_RET_Z - | b ->vm_return - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | li PC, FRAME_CP - | stw CARG4, SAVE_ERRF - | b >1 - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | li PC, FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | lp TMP1, L:CARG1->cframe - | mr L, CARG1 - | stw CARG3, SAVE_NRES - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | stw CARG1, SAVE_L - | mr BASE, CARG2 - | addi DISPATCH, DISPATCH, GG_G2DISP - | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | stp TMP1, SAVE_CFRAME - | stp sp, L->cframe // Add our C frame to cframe chain. - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp TMP1, L->top - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | add PC, PC, BASE - | stw TMP3, TMPD - | li ZERO, 0 - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD - | sub PC, PC, TMP2 // PC = frame delta + frame type - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | sub NARGS8:RC, TMP1, BASE - | stw TMP0, TONUM_HI - | li_vmstate INTERP - | lfs TONUM, TMPD - | li TISNIL, LJ_TNIL - | st_vmstate - | - |->vm_call_dispatch: - | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC - | lwz TMP0, FRAME_PC(BASE) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | checkfunc TMP0; bne ->vmeta_call - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | mr L, CARG1 - | lwz TMP0, L:CARG1->stack - | stw CARG1, SAVE_L - | lp TMP1, L->top - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). - | lp TMP1, L->cframe - | addi DISPATCH, DISPATCH, GG_G2DISP - | .toc lp CARG4, 0(CARG4) - | li TMP2, 0 - | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. - | stw TMP2, SAVE_ERRF // No error function. - | stp TMP1, SAVE_CFRAME - | stp sp, L->cframe // Add our C frame to cframe chain. - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | mtctr CARG4 - | bctrl // (lua_State *L, lua_CFunction func, void *ud) - |.if PPE - | mr BASE, CRET1 - | cmpwi CRET1, 0 - |.else - | mr. BASE, CRET1 - |.endif - | li PC, FRAME_CP - | bne <3 // Else continue with the call. - | b ->vm_leave_cp // No base? Just remove C frame. - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the - |// stack, so BASE doesn't need to be reloaded across these calls. - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 - | lwz TMP0, -12(BASE) // Continuation. - | mr RB, BASE - | mr BASE, TMP2 // Restore caller BASE. - | lwz LFUNC:TMP1, FRAME_FUNC(TMP2) - |.if FFI - | cmplwi TMP0, 1 - |.endif - | lwz PC, -16(RB) // Restore PC from [cont|PC]. - | subi TMP2, RD, 8 - | lwz TMP1, LFUNC:TMP1->pc - | stwx TISNIL, RA, TMP2 // Ensure one valid arg. - |.if FFI - | ble >1 - |.endif - | lwz KBASE, PC2PROTO(k)(TMP1) - | // BASE = base, RA = resultptr, RB = meta base - | mtctr TMP0 - | bctr // Jump to continuation. - | - |.if FFI - |1: - | beq ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: tailcall from C function. - | subi TMP1, RB, 16 - | sub RC, TMP1, BASE - | b ->vm_call_tail - |.endif - | - |->cont_cat: // RA = resultptr, RB = meta base - | lwz INS, -4(PC) - | subi CARG2, RB, 16 - | decode_RB8 SAVE0, INS - | lfd f0, 0(RA) - | add TMP1, BASE, SAVE0 - | stp BASE, L->base - | cmplw TMP1, CARG2 - | sub CARG3, CARG2, TMP1 - | decode_RA8 RA, INS - | stfd f0, 0(CARG2) - | bney ->BC_CAT_Z - | stfdx f0, BASE, RA - | b ->cont_nop - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | li TMP0, LJ_TSTR - | decode_RB8 RB, INS - | stw STR:RC, 4(CARG3) - | add CARG2, BASE, RB - | stw TMP0, 0(CARG3) - | b >1 - | - |->vmeta_tgets: - | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) - | li TMP0, LJ_TTAB - | stw TAB:RB, 4(CARG2) - | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) - | stw TMP0, 0(CARG2) - | li TMP1, LJ_TSTR - | stw STR:RC, 4(CARG3) - | stw TMP1, 0(CARG3) - | b >1 - | - |->vmeta_tgetb: // TMP0 = index - |.if not DUALNUM - | tonum_u f0, TMP0 - |.endif - | decode_RB8 RB, INS - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | add CARG2, BASE, RB - |.if DUALNUM - | stw TISNUM, 0(CARG3) - | stw TMP0, 4(CARG3) - |.else - | stfd f0, 0(CARG3) - |.endif - | b >1 - | - |->vmeta_tgetv: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG2, BASE, RB - | add CARG3, BASE, RC - |1: - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | cmplwi CRET1, 0 - | beq >3 - | lfd f0, 0(CRET1) - | ins_next1 - | stfdx f0, BASE, RA - | ins_next2 - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | subfic TMP1, BASE, FRAME_CONT - | lp BASE, L->top - | stw PC, -16(BASE) // [cont|PC] - | add PC, TMP1, BASE - | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | li NARGS8:RC, 16 // 2 args for func(t, k). - | b ->vm_call_dispatch_f - | - |->vmeta_tgetr: - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | cmplwi CRET1, 0 - | beq >1 - | lfd f14, 0(CRET1) - | b ->BC_TGETR_Z - |1: - | stwx TISNIL, BASE, RA - | b ->cont_nop - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | li TMP0, LJ_TSTR - | decode_RB8 RB, INS - | stw STR:RC, 4(CARG3) - | add CARG2, BASE, RB - | stw TMP0, 0(CARG3) - | b >1 - | - |->vmeta_tsets: - | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) - | li TMP0, LJ_TTAB - | stw TAB:RB, 4(CARG2) - | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) - | stw TMP0, 0(CARG2) - | li TMP1, LJ_TSTR - | stw STR:RC, 4(CARG3) - | stw TMP1, 0(CARG3) - | b >1 - | - |->vmeta_tsetb: // TMP0 = index - |.if not DUALNUM - | tonum_u f0, TMP0 - |.endif - | decode_RB8 RB, INS - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | add CARG2, BASE, RB - |.if DUALNUM - | stw TISNUM, 0(CARG3) - | stw TMP0, 4(CARG3) - |.else - | stfd f0, 0(CARG3) - |.endif - | b >1 - | - |->vmeta_tsetv: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG2, BASE, RB - | add CARG3, BASE, RC - |1: - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | cmplwi CRET1, 0 - | lfdx f0, BASE, RA - | beq >3 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | ins_next1 - | stfd f0, 0(CRET1) - | ins_next2 - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | subfic TMP1, BASE, FRAME_CONT - | lp BASE, L->top - | stw PC, -16(BASE) // [cont|PC] - | add PC, TMP1, BASE - | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | li NARGS8:RC, 24 // 3 args for func(t, k, v) - | stfd f0, 16(BASE) // Copy value to third argument. - | b ->vm_call_dispatch_f - | - |->vmeta_tsetr: - | stp BASE, L->base - | stw PC, SAVE_PC - | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - | // Returns TValue *. - | stfd f14, 0(CRET1) - | b ->cont_nop - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | mr CARG1, L - | subi PC, PC, 4 - |.if DUALNUM - | mr CARG2, RA - |.else - | add CARG2, BASE, RA - |.endif - | stw PC, SAVE_PC - |.if DUALNUM - | mr CARG3, RD - |.else - | add CARG3, BASE, RD - |.endif - | stp BASE, L->base - | decode_OP1 CARG4, INS - | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - | // Returns 0/1 or TValue * (metamethod). - |3: - | cmplwi CRET1, 1 - | bgt ->vmeta_binop - | subfic CRET1, CRET1, 0 - |4: - | lwz INS, 0(PC) - | addi PC, PC, 4 - | decode_RD4 TMP2, INS - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | and TMP2, TMP2, CRET1 - | add PC, PC, TMP2 - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | lwz INS, -4(PC) - | lfd f0, 0(RA) - | decode_RA8 TMP1, INS - | stfdx f0, BASE, TMP1 - | b ->cont_nop - | - |->cont_condt: // RA = resultptr - | lwz TMP0, 0(RA) - | .gpr64 extsw TMP0, TMP0 - | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true. - | subfe CRET1, CRET1, CRET1 - | not CRET1, CRET1 - | b <4 - | - |->cont_condf: // RA = resultptr - | lwz TMP0, 0(RA) - | .gpr64 extsw TMP0, TMP0 - | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false. - | subfe CRET1, CRET1, CRET1 - | b <4 - | - |->vmeta_equal: - | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. - | subi PC, PC, 4 - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - | - |->vmeta_equal_cd: - |.if FFI - | mr CARG2, INS - | subi PC, PC, 4 - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - |.endif - | - |->vmeta_istype: - | subi PC, PC, 4 - | stp BASE, L->base - | srwi CARG2, RA, 3 - | mr CARG1, L - | srwi CARG3, RD, 3 - | stw PC, SAVE_PC - | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - | b ->cont_nop - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_arith_nv: - | add CARG3, KBASE, RC - | add CARG4, BASE, RB - | b >1 - |->vmeta_arith_nv2: - |.if DUALNUM - | mr CARG3, RC - | mr CARG4, RB - | b >1 - |.endif - | - |->vmeta_unm: - | mr CARG3, RD - | mr CARG4, RD - | b >1 - | - |->vmeta_arith_vn: - | add CARG3, BASE, RB - | add CARG4, KBASE, RC - | b >1 - | - |->vmeta_arith_vv: - | add CARG3, BASE, RB - | add CARG4, BASE, RC - |.if DUALNUM - | b >1 - |.endif - |->vmeta_arith_vn2: - |->vmeta_arith_vv2: - |.if DUALNUM - | mr CARG3, RB - | mr CARG4, RC - |.endif - |1: - | add CARG2, BASE, RA - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS. - | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - | // Returns NULL (finished) or TValue * (metamethod). - | cmplwi CRET1, 0 - | beq ->cont_nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | sub TMP1, CRET1, BASE - | stw PC, -16(CRET1) // [cont|PC] - | mr TMP2, BASE - | addi PC, TMP1, FRAME_CONT - | mr BASE, CRET1 - | li NARGS8:RC, 16 // 2 args for func(o1, o2). - | b ->vm_call_dispatch - | - |->vmeta_len: -#if LJ_52 - | mr SAVE0, CARG1 -#endif - | mr CARG2, RD - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_len // (lua_State *L, TValue *o) - | // Returns NULL (retry) or TValue * (metamethod base). -#if LJ_52 - | cmplwi CRET1, 0 - | bne ->vmeta_binop // Binop call for compatibility. - | mr CARG1, SAVE0 - | b ->BC_LEN_Z -#else - | b ->vmeta_binop // Binop call for compatibility. -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // TMP2 = old base, BASE = new base, RC = nargs*8 - | mr CARG1, L - | stp TMP2, L->base // This is the callers base! - | subi CARG2, BASE, 8 - | stw PC, SAVE_PC - | add CARG3, BASE, RC - | mr SAVE0, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | mr CARG1, L - | stp BASE, L->base - | subi CARG2, RA, 8 - | stw PC, SAVE_PC - | add CARG3, RA, RC - | mr SAVE0, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | lwz TMP1, FRAME_PC(BASE) - | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. - | lwz LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here. - | b ->BC_CALLT_Z - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | mr CARG1, L - | stp BASE, L->base - | mr CARG2, RA - | stw PC, SAVE_PC - | mr SAVE0, INS - | bl extern lj_meta_for // (lua_State *L, TValue *base) - |.if JIT - | decode_OP1 TMP0, SAVE0 - |.endif - | decode_RA8 RA, SAVE0 - |.if JIT - | cmpwi TMP0, BC_JFORI - |.endif - | decode_RD8 RD, SAVE0 - |.if JIT - | beqy =>BC_JFORI - |.endif - | b =>BC_FORI - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz CARG1, 4(BASE) - | blt ->fff_fallback - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lwz CARG4, 8(BASE) - | lwz CARG1, 4(BASE) - | lwz CARG2, 12(BASE) - | blt ->fff_fallback - |.endmacro - | - |.macro .ffunc_n, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lfd FARG1, 0(BASE) - | blt ->fff_fallback - | checknum CARG3; bge ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lfd FARG1, 0(BASE) - | lwz CARG4, 8(BASE) - | lfd FARG2, 8(BASE) - | blt ->fff_fallback - | checknum CARG3; bge ->fff_fallback - | checknum CARG4; bge ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. - |.macro ffgccheck - | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | cmplw TMP0, TMP1 - | bgel ->fff_gcstep - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | li TMP1, LJ_TFALSE - | la RA, -8(BASE) - | cmplw cr1, CARG3, TMP1 - | lwz PC, FRAME_PC(BASE) - | bge cr1, ->fff_fallback - | stw CARG3, 0(RA) - | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. - | stw CARG1, 4(RA) - | beq ->fff_res // Done if exactly 1 argument. - | li TMP1, 8 - | subi RC, RC, 8 - |1: - | cmplw TMP1, RC - | lfdx f0, BASE, TMP1 - | stfdx f0, RA, TMP1 - | addi TMP1, TMP1, 8 - | bney <1 - | b ->fff_res - | - |.ffunc type - | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) - | blt ->fff_fallback - | .gpr64 extsw CARG1, CARG1 - | subfc TMP0, TISNUM, CARG1 - | subfe TMP2, CARG1, CARG1 - | orc TMP1, TMP2, TMP0 - | addi TMP1, TMP1, ~LJ_TISNUM+1 - | slwi TMP1, TMP1, 3 - | la TMP2, CFUNC:RB->upvalue - | lfdx FARG1, TMP2, TMP1 - | b ->fff_resn - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | checktab CARG3; bne >6 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | lwz TAB:CARG1, TAB:CARG1->metatable - |2: - | li CARG3, LJ_TNIL - | cmplwi TAB:CARG1, 0 - | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) - | beq ->fff_restv - | lwz TMP0, TAB:CARG1->hmask - | li CARG3, LJ_TTAB // Use metatable as default result. - | lwz TMP1, STR:RC->hash - | lwz NODE:TMP2, TAB:CARG1->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | slwi TMP0, TMP1, 5 - | slwi TMP1, TMP1, 3 - | sub TMP1, TMP0, TMP1 - | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |3: // Rearranged logic, because we expect _not_ to find the key. - | lwz CARG4, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val - | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) - | checkstr CARG4; bne >4 - | cmpw TMP0, STR:RC; beq >5 - |4: - | lwz NODE:TMP2, NODE:TMP2->next - | cmplwi NODE:TMP2, 0 - | beq ->fff_restv // Not found, keep default result. - | b <3 - |5: - | checknil CARG2 - | beq ->fff_restv // Ditto for nil value. - | mr CARG3, CARG2 // Return value of mt.__metatable. - | mr CARG1, TMP1 - | b ->fff_restv - | - |6: - | cmpwi CARG3, LJ_TUDATA; beq <1 - | .gpr64 extsw CARG3, CARG3 - | subfc TMP0, TISNUM, CARG3 - | subfe TMP2, CARG3, CARG3 - | orc TMP1, TMP2, TMP0 - | addi TMP1, TMP1, ~LJ_TISNUM+1 - | slwi TMP1, TMP1, 2 - | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH) - | lwzx TAB:CARG1, TMP2, TMP1 - | b <2 - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | checktab CARG3; bne ->fff_fallback - | lwz TAB:TMP1, TAB:CARG1->metatable - | checktab CARG4; bne ->fff_fallback - | cmplwi TAB:TMP1, 0 - | lbz TMP3, TAB:CARG1->marked - | bne ->fff_fallback - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - | stw TAB:CARG2, TAB:CARG1->metatable - | beq ->fff_restv - | barrierback TAB:CARG1, TMP3, TMP0 - | b ->fff_restv - | - |.ffunc rawget - | cmplwi NARGS8:RC, 16 - | lwz CARG4, 0(BASE) - | lwz TAB:CARG2, 4(BASE) - | blt ->fff_fallback - | checktab CARG4; bne ->fff_fallback - | la CARG3, 8(BASE) - | mr CARG1, L - | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // Returns cTValue *. - | lfd FARG1, 0(CRET1) - | b ->fff_resn - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) - | lfd FARG1, 0(BASE) - | bne ->fff_fallback // Exactly one argument. - | checknum CARG1; bgt ->fff_fallback - | b ->fff_resn - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | checkstr CARG3 - | // A __tostring method in the string base metatable is ignored. - | beq ->fff_restv // String key? - | // Handle numbers inline, unless a number base metatable is present. - | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) - | checknum CARG3 - | cmplwi cr1, TMP0, 0 - | stp BASE, L->base // Add frame since C call can throw. - | crorc 4*cr0+eq, 4*cr0+gt, 4*cr1+eq - | stw PC, SAVE_PC // Redundant (but a defined value). - | beq ->fff_fallback - | ffgccheck - | mr CARG1, L - | mr CARG2, BASE - |.if DUALNUM - | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) - |.else - | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np) - |.endif - | // Returns GCstr *. - | li CARG3, LJ_TSTR - | b ->fff_restv - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc next - | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) - | lwz TAB:CARG2, 4(BASE) - | blt ->fff_fallback - | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. - | checktab CARG1 - | lwz PC, FRAME_PC(BASE) - | bne ->fff_fallback - | stp BASE, L->base // Add frame since C call can throw. - | mr CARG1, L - | stp BASE, L->top // Dummy frame length is ok. - | la CARG3, 8(BASE) - | stw PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. - | cmplwi CRET1, 0 - | li CARG3, LJ_TNIL - | beq ->fff_restv // End of traversal: return nil. - | lfd f0, 8(BASE) // Copy key and value to results. - | la RA, -8(BASE) - | lfd f1, 16(BASE) - | stfd f0, 0(RA) - | li RD, (2+1)*8 - | stfd f1, 8(RA) - | b ->fff_res - | - |.ffunc_1 pairs - | checktab CARG3 - | lwz PC, FRAME_PC(BASE) - | bne ->fff_fallback -#if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable - | lfd f0, CFUNC:RB->upvalue[0] - | cmplwi TAB:TMP2, 0 - | la RA, -8(BASE) - | bne ->fff_fallback -#else - | lfd f0, CFUNC:RB->upvalue[0] - | la RA, -8(BASE) -#endif - | stw TISNIL, 8(BASE) - | li RD, (3+1)*8 - | stfd f0, 0(RA) - | b ->fff_res - | - |.ffunc ipairs_aux - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lwz TAB:CARG1, 4(BASE) - | lwz CARG4, 8(BASE) - |.if DUALNUM - | lwz TMP2, 12(BASE) - |.else - | lfd FARG2, 8(BASE) - |.endif - | blt ->fff_fallback - | checktab CARG3 - | checknum cr1, CARG4 - | lwz PC, FRAME_PC(BASE) - |.if DUALNUM - | bne ->fff_fallback - | bne cr1, ->fff_fallback - |.else - | lus TMP0, 0x3ff0 - | stw ZERO, TMPD_LO - | bne ->fff_fallback - | stw TMP0, TMPD_HI - | bge cr1, ->fff_fallback - | lfd FARG1, TMPD - | toint TMP2, FARG2, f0 - |.endif - | lwz TMP0, TAB:CARG1->asize - | lwz TMP1, TAB:CARG1->array - |.if not DUALNUM - | fadd FARG2, FARG2, FARG1 - |.endif - | addi TMP2, TMP2, 1 - | la RA, -8(BASE) - | cmplw TMP0, TMP2 - |.if DUALNUM - | stw TISNUM, 0(RA) - | slwi TMP3, TMP2, 3 - | stw TMP2, 4(RA) - |.else - | slwi TMP3, TMP2, 3 - | stfd FARG2, 0(RA) - |.endif - | ble >2 // Not in array part? - | lwzx TMP2, TMP1, TMP3 - | lfdx f0, TMP1, TMP3 - |1: - | checknil TMP2 - | li RD, (0+1)*8 - | beq ->fff_res // End of iteration, return 0 results. - | li RD, (2+1)*8 - | stfd f0, 8(RA) - | b ->fff_res - |2: // Check for empty hash part first. Otherwise call C function. - | lwz TMP0, TAB:CARG1->hmask - | cmplwi TMP0, 0 - | li RD, (0+1)*8 - | beq ->fff_res - | mr CARG2, TMP2 - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | cmplwi CRET1, 0 - | li RD, (0+1)*8 - | beq ->fff_res - | lwz TMP2, 0(CRET1) - | lfd f0, 0(CRET1) - | b <1 - | - |.ffunc_1 ipairs - | checktab CARG3 - | lwz PC, FRAME_PC(BASE) - | bne ->fff_fallback -#if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable - | lfd f0, CFUNC:RB->upvalue[0] - | cmplwi TAB:TMP2, 0 - | la RA, -8(BASE) - | bne ->fff_fallback -#else - | lfd f0, CFUNC:RB->upvalue[0] - | la RA, -8(BASE) -#endif - |.if DUALNUM - | stw TISNUM, 8(BASE) - |.else - | stw ZERO, 8(BASE) - |.endif - | stw ZERO, 12(BASE) - | li RD, (3+1)*8 - | stfd f0, 0(RA) - | b ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | cmplwi NARGS8:RC, 8 - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | blt ->fff_fallback - | mr TMP2, BASE - | la BASE, 8(BASE) - | // Remember active hook before pcall. - | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31 - | subi NARGS8:RC, NARGS8:RC, 8 - | addi PC, TMP3, 8+FRAME_PCALL - | b ->vm_call_dispatch - | - |.ffunc xpcall - | cmplwi NARGS8:RC, 16 - | lwz CARG4, 8(BASE) - | lfd FARG2, 8(BASE) - | lfd FARG1, 0(BASE) - | blt ->fff_fallback - | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) - | mr TMP2, BASE - | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. - | la BASE, 16(BASE) - | // Remember active hook before pcall. - | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 - | stfd FARG2, 0(TMP2) // Swap function and traceback. - | subi NARGS8:RC, NARGS8:RC, 16 - | stfd FARG1, 8(TMP2) - | addi PC, TMP1, 16+FRAME_PCALL - | b ->vm_call_dispatch - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | cmpwi CARG3, LJ_TTHREAD; bne ->fff_fallback - |.else - |.ffunc coroutine_wrap_aux - | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr - |.endif - | lbz TMP0, L:CARG1->status - | lp TMP1, L:CARG1->cframe - | lp CARG2, L:CARG1->top - | cmplwi cr0, TMP0, LUA_YIELD - | lp TMP2, L:CARG1->base - | cmplwi cr1, TMP1, 0 - | lwz TMP0, L:CARG1->maxstack - | cmplw cr7, CARG2, TMP2 - | lwz PC, FRAME_PC(BASE) - | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0 - | add TMP2, CARG2, NARGS8:RC - | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD - | cmplw cr1, TMP2, TMP0 - | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt - | stw PC, SAVE_PC - | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov - | stp BASE, L->base - | blt cr6, ->fff_fallback - |1: - |.if resume - | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. - | subi NARGS8:RC, NARGS8:RC, 8 - | subi TMP2, TMP2, 8 - |.endif - | stp TMP2, L:CARG1->top - | li TMP1, 0 - | stp BASE, L->top - |2: // Move args to coroutine. - | cmpw TMP1, NARGS8:RC - | lfdx f0, BASE, TMP1 - | beq >3 - | stfdx f0, CARG2, TMP1 - | addi TMP1, TMP1, 8 - | b <2 - |3: - | li CARG3, 0 - | mr L:SAVE0, L:CARG1 - | li CARG4, 0 - | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) - | // Returns thread status. - |4: - | lp TMP2, L:SAVE0->base - | cmplwi CRET1, LUA_YIELD - | lp TMP3, L:SAVE0->top - | li_vmstate INTERP - | lp BASE, L->base - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | st_vmstate - | bgt >8 - | sub RD, TMP3, TMP2 - | lwz TMP0, L->maxstack - | cmplwi RD, 0 - | add TMP1, BASE, RD - | beq >6 // No results? - | cmplw TMP1, TMP0 - | li TMP1, 0 - | bgt >9 // Need to grow stack? - | - | subi TMP3, RD, 8 - | stp TMP2, L:SAVE0->top // Clear coroutine stack. - |5: // Move results from coroutine. - | cmplw TMP1, TMP3 - | lfdx f0, TMP2, TMP1 - | stfdx f0, BASE, TMP1 - | addi TMP1, TMP1, 8 - | bne <5 - |6: - | andix. TMP0, PC, FRAME_TYPE - |.if resume - | li TMP1, LJ_TTRUE - | la RA, -8(BASE) - | stw TMP1, -8(BASE) // Prepend true to results. - | addi RD, RD, 16 - |.else - | mr RA, BASE - | addi RD, RD, 8 - |.endif - |7: - | stw PC, SAVE_PC - | mr MULTRES, RD - | beq ->BC_RET_Z - | b ->vm_return - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | andix. TMP0, PC, FRAME_TYPE - | la TMP3, -8(TMP3) - | li TMP1, LJ_TFALSE - | lfd f0, 0(TMP3) - | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. - | li RD, (2+1)*8 - | stw TMP1, -8(BASE) // Prepend false to results. - | la RA, -8(BASE) - | stfd f0, 0(BASE) // Copy error message. - | b <7 - |.else - | mr CARG1, L - | mr CARG2, L:SAVE0 - | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - |.endif - | - |9: // Handle stack expansion on return from yield. - | mr CARG1, L - | srwi CARG2, RD, 3 - | bl extern lj_state_growstack // (lua_State *L, int n) - | li CRET1, 0 - | b <4 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | lp TMP0, L->cframe - | add TMP1, BASE, NARGS8:RC - | stp BASE, L->base - | andix. TMP0, TMP0, CFRAME_RESUME - | stp TMP1, L->top - | li CRET1, LUA_YIELD - | beq ->fff_fallback - | stp ZERO, L->cframe - | stb CRET1, L->status - | b ->vm_leave_unw - | - |//-- Math library ------------------------------------------------------- - | - |.ffunc_1 math_abs - | checknum CARG3 - |.if DUALNUM - | bne >2 - | srawi TMP1, CARG1, 31 - | xor TMP2, TMP1, CARG1 - |.if GPR64 - | lus TMP0, 0x8000 - | sub CARG1, TMP2, TMP1 - | cmplw CARG1, TMP0 - | beq >1 - |.else - | sub. CARG1, TMP2, TMP1 - | blt >1 - |.endif - |->fff_resi: - | lwz PC, FRAME_PC(BASE) - | la RA, -8(BASE) - | stw TISNUM, -8(BASE) - | stw CRET1, -4(BASE) - | b ->fff_res1 - |1: - | lus CARG3, 0x41e0 // 2^31. - | li CARG1, 0 - | b ->fff_restv - |2: - |.endif - | bge ->fff_fallback - | rlwinm CARG3, CARG3, 0, 1, 31 - | // Fallthrough. - | - |->fff_restv: - | // CARG3/CARG1 = TValue result. - | lwz PC, FRAME_PC(BASE) - | stw CARG3, -8(BASE) - | la RA, -8(BASE) - | stw CARG1, -4(BASE) - |->fff_res1: - | // RA = results, PC = return. - | li RD, (1+1)*8 - |->fff_res: - | // RA = results, RD = (nresults+1)*8, PC = return. - | andix. TMP0, PC, FRAME_TYPE - | mr MULTRES, RD - | bney ->vm_return - | lwz INS, -4(PC) - | decode_RB8 RB, INS - |5: - | cmplw RB, RD // More results expected? - | decode_RA8 TMP0, INS - | bgt >6 - | ins_next1 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | sub BASE, RA, TMP0 - | ins_next2 - | - |6: // Fill up results with nil. - | subi TMP1, RD, 8 - | addi RD, RD, 8 - | stwx TISNIL, RA, TMP1 - | b <5 - | - |.macro math_extern, func - | .ffunc_n math_ .. func - | blex func - | b ->fff_resn - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nn math_ .. func - | blex func - | b ->fff_resn - |.endmacro - | - |.macro math_round, func - | .ffunc_1 math_ .. func - | checknum CARG3; beqy ->fff_restv - | rlwinm TMP2, CARG3, 12, 21, 31 - | bge ->fff_fallback - | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023 - | cmplwi cr1, TMP2, 31 // 0 <= exp < 31? - | subfic TMP0, TMP2, 31 - | blt >3 - | slwi TMP1, CARG3, 11 - | srwi TMP3, CARG1, 21 - | oris TMP1, TMP1, 0x8000 - | addi TMP2, TMP2, 1 - | or TMP1, TMP1, TMP3 - | slwi CARG2, CARG1, 11 - | bge cr1, >4 - | slw TMP3, TMP1, TMP2 - | srw RD, TMP1, TMP0 - | or TMP3, TMP3, CARG2 - | srawi TMP2, CARG3, 31 - |.if "func" == "floor" - | and TMP1, TMP3, TMP2 - | addic TMP0, TMP1, -1 - | subfe TMP1, TMP0, TMP1 - | add CARG1, RD, TMP1 - | xor CARG1, CARG1, TMP2 - | sub CARG1, CARG1, TMP2 - | b ->fff_resi - |.else - | andc TMP1, TMP3, TMP2 - | addic TMP0, TMP1, -1 - | subfe TMP1, TMP0, TMP1 - | add CARG1, RD, TMP1 - | cmpw CARG1, RD - | xor CARG1, CARG1, TMP2 - | sub CARG1, CARG1, TMP2 - | bge ->fff_resi - | // Overflow to 2^31. - | lus CARG3, 0x41e0 // 2^31. - | li CARG1, 0 - | b ->fff_restv - |.endif - |3: // |x| < 1 - | slwi TMP2, CARG3, 1 - | srawi TMP1, CARG3, 31 - | or TMP2, CARG1, TMP2 // ztest = (hi+hi) | lo - |.if "func" == "floor" - | and TMP1, TMP2, TMP1 // (ztest & sign) == 0 ? 0 : -1 - | subfic TMP2, TMP1, 0 - | subfe CARG1, CARG1, CARG1 - |.else - | andc TMP1, TMP2, TMP1 // (ztest & ~sign) == 0 ? 0 : 1 - | addic TMP2, TMP1, -1 - | subfe CARG1, TMP2, TMP1 - |.endif - | b ->fff_resi - |4: // exp >= 31. Check for -(2^31). - | xoris TMP1, TMP1, 0x8000 - | srawi TMP2, CARG3, 31 - |.if "func" == "floor" - | or TMP1, TMP1, CARG2 - |.endif - |.if PPE - | orc TMP1, TMP1, TMP2 - | cmpwi TMP1, 0 - |.else - | orc. TMP1, TMP1, TMP2 - |.endif - | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq - | lus CARG1, 0x8000 // -(2^31). - | beqy ->fff_resi - |5: - | lfd FARG1, 0(BASE) - | blex func - | b ->fff_resn - |.endmacro - | - |.if DUALNUM - | math_round floor - | math_round ceil - |.else - | // NYI: use internal implementation. - | math_extern floor - | math_extern ceil - |.endif - | - |.if SQRT - |.ffunc_n math_sqrt - | fsqrt FARG1, FARG1 - | b ->fff_resn - |.else - | math_extern sqrt - |.endif - | - |.ffunc math_log - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lfd FARG1, 0(BASE) - | bne ->fff_fallback // Need exactly 1 argument. - | checknum CARG3; bge ->fff_fallback - | blex log - | b ->fff_resn - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.if DUALNUM - |.ffunc math_ldexp - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) - | lfd FARG1, 0(BASE) - | lwz CARG4, 8(BASE) - |.if GPR64 - | lwz CARG2, 12(BASE) - |.else - | lwz CARG1, 12(BASE) - |.endif - | blt ->fff_fallback - | checknum CARG3; bge ->fff_fallback - | checknum CARG4; bne ->fff_fallback - |.else - |.ffunc_nn math_ldexp - |.if GPR64 - | toint CARG2, FARG2 - |.else - | toint CARG1, FARG2 - |.endif - |.endif - | blex ldexp - | b ->fff_resn - | - |.ffunc_n math_frexp - |.if GPR64 - | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) - |.else - | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) - |.endif - | lwz PC, FRAME_PC(BASE) - | blex frexp - | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH) - | la RA, -8(BASE) - |.if not DUALNUM - | tonum_i FARG2, TMP1 - |.endif - | stfd FARG1, 0(RA) - | li RD, (2+1)*8 - |.if DUALNUM - | stw TISNUM, 8(RA) - | stw TMP1, 12(RA) - |.else - | stfd FARG2, 8(RA) - |.endif - | b ->fff_res - | - |.ffunc_n math_modf - |.if GPR64 - | la CARG2, -8(BASE) - |.else - | la CARG1, -8(BASE) - |.endif - | lwz PC, FRAME_PC(BASE) - | blex modf - | la RA, -8(BASE) - | stfd FARG1, 0(BASE) - | li RD, (2+1)*8 - | b ->fff_res - | - |.macro math_minmax, name, ismax - |.if DUALNUM - | .ffunc_1 name - | checknum CARG3 - | addi TMP1, BASE, 8 - | add TMP2, BASE, NARGS8:RC - | bne >4 - |1: // Handle integers. - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 - | lwz CARG2, 4(TMP1) - | bge cr1, ->fff_resi - | checknum CARG4 - | xoris TMP0, CARG1, 0x8000 - | xoris TMP3, CARG2, 0x8000 - | bne >3 - | subfc TMP3, TMP3, TMP0 - | subfe TMP0, TMP0, TMP0 - |.if ismax - | andc TMP3, TMP3, TMP0 - |.else - | and TMP3, TMP3, TMP0 - |.endif - | add CARG1, TMP3, CARG2 - |.if GPR64 - | rldicl CARG1, CARG1, 0, 32 - |.endif - | addi TMP1, TMP1, 8 - | b <1 - |3: - | bge ->fff_fallback - | // Convert intermediate result to number and continue below. - | tonum_i FARG1, CARG1 - | lfd FARG2, 0(TMP1) - | b >6 - |4: - | lfd FARG1, 0(BASE) - | bge ->fff_fallback - |5: // Handle numbers. - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 - | lfd FARG2, 0(TMP1) - | bge cr1, ->fff_resn - | checknum CARG4; bge >7 - |6: - | fsub f0, FARG1, FARG2 - | addi TMP1, TMP1, 8 - |.if ismax - | fsel FARG1, f0, FARG1, FARG2 - |.else - | fsel FARG1, f0, FARG2, FARG1 - |.endif - | b <5 - |7: // Convert integer to number and continue above. - | lwz CARG2, 4(TMP1) - | bne ->fff_fallback - | tonum_i FARG2, CARG2 - | b <6 - |.else - | .ffunc_n name - | li TMP1, 8 - |1: - | lwzx CARG2, BASE, TMP1 - | lfdx FARG2, BASE, TMP1 - | cmplw cr1, TMP1, NARGS8:RC - | checknum CARG2 - | bge cr1, ->fff_resn - | bge ->fff_fallback - | fsub f0, FARG1, FARG2 - | addi TMP1, TMP1, 8 - |.if ismax - | fsel FARG1, f0, FARG1, FARG2 - |.else - | fsel FARG1, f0, FARG2, FARG1 - |.endif - | b <1 - |.endif - |.endmacro - | - | math_minmax math_min, 0 - | math_minmax math_max, 1 - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz STR:CARG1, 4(BASE) - | bne ->fff_fallback // Need exactly 1 argument. - | checkstr CARG3 - | bne ->fff_fallback - | lwz TMP0, STR:CARG1->len - |.if DUALNUM - | lbz CARG1, STR:CARG1[1] // Access is always ok (NUL at end). - | li RD, (0+1)*8 - | lwz PC, FRAME_PC(BASE) - | cmplwi TMP0, 0 - | la RA, -8(BASE) - | beqy ->fff_res - | b ->fff_resi - |.else - | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). - | addic TMP3, TMP0, -1 // RD = ((str->len != 0)+1)*8 - | subfe RD, TMP3, TMP0 - | stw TMP1, TONUM_LO // Inlined tonum_u f0, TMP1. - | addi RD, RD, 1 - | lfd f0, TONUM_D - | la RA, -8(BASE) - | lwz PC, FRAME_PC(BASE) - | fsub f0, f0, TOBIT - | slwi RD, RD, 3 - | stfd f0, 0(RA) - | b ->fff_res - |.endif - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - |.if DUALNUM - | lwz TMP0, 4(BASE) - | bne ->fff_fallback // Exactly 1 argument. - | checknum CARG3; bne ->fff_fallback - | la CARG2, 7(BASE) - |.else - | lfd FARG1, 0(BASE) - | bne ->fff_fallback // Exactly 1 argument. - | checknum CARG3; bge ->fff_fallback - | toint TMP0, FARG1 - | la CARG2, TMPD_BLO - |.endif - | li CARG3, 1 - | cmplwi TMP0, 255; bgt ->fff_fallback - |->fff_newstr: - | mr CARG1, L - | stp BASE, L->base - | stw PC, SAVE_PC - | bl extern lj_str_new // (lua_State *L, char *str, size_t l) - |->fff_resstr: - | // Returns GCstr *. - | lp BASE, L->base - | li CARG3, LJ_TSTR - | b ->fff_restv - | - |.ffunc string_sub - | ffgccheck - | cmplwi NARGS8:RC, 16 - | lwz CARG3, 16(BASE) - |.if not DUALNUM - | lfd f0, 16(BASE) - |.endif - | lwz TMP0, 0(BASE) - | lwz STR:CARG1, 4(BASE) - | blt ->fff_fallback - | lwz CARG2, 8(BASE) - |.if DUALNUM - | lwz TMP1, 12(BASE) - |.else - | lfd f1, 8(BASE) - |.endif - | li TMP2, -1 - | beq >1 - |.if DUALNUM - | checknum CARG3 - | lwz TMP2, 20(BASE) - | bne ->fff_fallback - |1: - | checknum CARG2; bne ->fff_fallback - |.else - | checknum CARG3; bge ->fff_fallback - | toint TMP2, f0 - |1: - | checknum CARG2; bge ->fff_fallback - |.endif - | checkstr TMP0; bne ->fff_fallback - |.if not DUALNUM - | toint TMP1, f1 - |.endif - | lwz TMP0, STR:CARG1->len - | cmplw TMP0, TMP2 // len < end? (unsigned compare) - | addi TMP3, TMP2, 1 - | blt >5 - |2: - | cmpwi TMP1, 0 // start <= 0? - | add TMP3, TMP1, TMP0 - | ble >7 - |3: - | sub CARG3, TMP2, TMP1 - | addi CARG2, STR:CARG1, #STR-1 - | srawi TMP0, CARG3, 31 - | addi CARG3, CARG3, 1 - | add CARG2, CARG2, TMP1 - | andc CARG3, CARG3, TMP0 - |.if GPR64 - | rldicl CARG2, CARG2, 0, 32 - | rldicl CARG3, CARG3, 0, 32 - |.endif - | b ->fff_newstr - | - |5: // Negative end or overflow. - | cmpw TMP0, TMP2 // len >= end? (signed compare) - | add TMP2, TMP0, TMP3 // Negative end: end = end+len+1. - | bge <2 - | mr TMP2, TMP0 // Overflow: end = len. - | b <2 - | - |7: // Negative start or underflow. - | .gpr64 extsw TMP1, TMP1 - | addic CARG3, TMP1, -1 - | subfe CARG3, CARG3, CARG3 - | srawi CARG2, TMP3, 31 // Note: modifies carry. - | andc TMP3, TMP3, CARG3 - | andc TMP1, TMP3, CARG2 - | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) - | b <3 - | - |.macro ffstring_op, name - | .ffunc string_ .. name - | ffgccheck - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz STR:CARG2, 4(BASE) - | blt ->fff_fallback - | checkstr CARG3 - | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH) - | bne ->fff_fallback - | lwz TMP0, SBUF:CARG1->b - | stw L, SBUF:CARG1->L - | stp BASE, L->base - | stw PC, SAVE_PC - | stw TMP0, SBUF:CARG1->p - | bl extern lj_buf_putstr_ .. name - | bl extern lj_buf_tostr - | b ->fff_resstr - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |.macro .ffunc_bit, name - |.if DUALNUM - | .ffunc_1 bit_..name - | checknum CARG3; bnel ->fff_tobit_fb - |.else - | .ffunc_n bit_..name - | fadd FARG1, FARG1, TOBIT - | stfd FARG1, TMPD - | lwz CARG1, TMPD_LO - |.endif - |.endmacro - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name - | addi TMP1, BASE, 8 - | add TMP2, BASE, NARGS8:RC - |1: - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 - |.if DUALNUM - | lwz CARG2, 4(TMP1) - |.else - | lfd FARG1, 0(TMP1) - |.endif - | bgey cr1, ->fff_resi - | checknum CARG4 - |.if DUALNUM - | bnel ->fff_bitop_fb - |.else - | fadd FARG1, FARG1, TOBIT - | bge ->fff_fallback - | stfd FARG1, TMPD - | lwz CARG2, TMPD_LO - |.endif - | ins CARG1, CARG1, CARG2 - | addi TMP1, TMP1, 8 - | b <1 - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, or - |.ffunc_bit_op bxor, xor - | - |.ffunc_bit bswap - | rotlwi TMP0, CARG1, 8 - | rlwimi TMP0, CARG1, 24, 0, 7 - | rlwimi TMP0, CARG1, 24, 16, 23 - | mr CRET1, TMP0 - | b ->fff_resi - | - |.ffunc_bit bnot - | not CRET1, CARG1 - | b ->fff_resi - | - |.macro .ffunc_bit_sh, name, ins, shmod - |.if DUALNUM - | .ffunc_2 bit_..name - | checknum CARG3; bnel ->fff_tobit_fb - | // Note: no inline conversion from number for 2nd argument! - | checknum CARG4; bne ->fff_fallback - |.else - | .ffunc_nn bit_..name - | fadd FARG1, FARG1, TOBIT - | fadd FARG2, FARG2, TOBIT - | stfd FARG1, TMPD - | lwz CARG1, TMPD_LO - | stfd FARG2, TMPD - | lwz CARG2, TMPD_LO - |.endif - |.if shmod == 1 - | rlwinm CARG2, CARG2, 0, 27, 31 - |.elif shmod == 2 - | neg CARG2, CARG2 - |.endif - | ins CRET1, CARG1, CARG2 - | b ->fff_resi - |.endmacro - | - |.ffunc_bit_sh lshift, slw, 1 - |.ffunc_bit_sh rshift, srw, 1 - |.ffunc_bit_sh arshift, sraw, 1 - |.ffunc_bit_sh rol, rotlw, 0 - |.ffunc_bit_sh ror, rotlw, 2 - | - |.ffunc_bit tobit - |.if DUALNUM - | b ->fff_resi - |.else - |->fff_resi: - | tonum_i FARG1, CRET1 - |.endif - |->fff_resn: - | lwz PC, FRAME_PC(BASE) - | la RA, -8(BASE) - | stfd FARG1, -8(BASE) - | b ->fff_res1 - | - |// Fallback FP number to bit conversion. - |->fff_tobit_fb: - |.if DUALNUM - | lfd FARG1, 0(BASE) - | bgt ->fff_fallback - | fadd FARG1, FARG1, TOBIT - | stfd FARG1, TMPD - | lwz CARG1, TMPD_LO - | blr - |.endif - |->fff_bitop_fb: - |.if DUALNUM - | lfd FARG1, 0(TMP1) - | bgt ->fff_fallback - | fadd FARG1, FARG1, TOBIT - | stfd FARG1, TMPD - | lwz CARG2, TMPD_LO - | blr - |.endif - | - |//----------------------------------------------------------------------- - | - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RB = CFUNC, RC = nargs*8 - | lp TMP3, CFUNC:RB->f - | add TMP1, BASE, NARGS8:RC - | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC. - | addi TMP0, TMP1, 8*LUA_MINSTACK - | lwz TMP2, L->maxstack - | stw PC, SAVE_PC // Redundant (but a defined value). - | .toc lp TMP3, 0(TMP3) - | cmplw TMP0, TMP2 - | stp BASE, L->base - | stp TMP1, L->top - | mr CARG1, L - | bgt >5 // Need to grow stack. - | mtctr TMP3 - | bctrl // (lua_State *L) - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | lp BASE, L->base - | cmpwi CRET1, 0 - | slwi RD, CRET1, 3 - | la RA, -8(BASE) - | bgt ->fff_res // Returned nresults+1? - |1: // Returned 0 or -1: retry fast path. - | lp TMP0, L->top - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | sub NARGS8:RC, TMP0, BASE - | bne ->vm_call_tail // Returned -1? - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | andix. TMP0, PC, FRAME_TYPE - | rlwinm TMP1, PC, 0, 0, 28 - | bne >3 - | lwz INS, -4(PC) - | decode_RA8 TMP1, INS - | addi TMP1, TMP1, 8 - |3: - | sub TMP2, BASE, TMP1 - | b ->vm_call_dispatch // Resolve again for tailcall. - | - |5: // Grow stack for fallback handler. - | li CARG2, LUA_MINSTACK - | bl extern lj_state_growstack // (lua_State *L, int n) - | lp BASE, L->base - | cmpw TMP0, TMP0 // Set 4*cr0+eq to force retry. - | b <1 - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | mflr SAVE0 - | stp BASE, L->base - | add TMP0, BASE, NARGS8:RC - | stw PC, SAVE_PC // Redundant (but a defined value). - | stp TMP0, L->top - | mr CARG1, L - | bl extern lj_gc_step // (lua_State *L) - | lp BASE, L->base - | mtlr SAVE0 - | lp TMP0, L->top - | sub NARGS8:RC, TMP0, BASE - | lwz CFUNC:RB, FRAME_FUNC(BASE) - | blr - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andix. TMP0, TMP3, HOOK_VMEVENT // No recording while in vmevent. - | bne >5 - | // Decrement the hookcount for consistency, but always do the call. - | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andix. TMP0, TMP3, HOOK_ACTIVE - | bne >1 - | subi TMP2, TMP2, 1 - | andi. TMP0, TMP3, LUA_MASKLINE|LUA_MASKCOUNT - | beqy >1 - | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | b >1 - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andix. TMP0, TMP3, HOOK_ACTIVE // Hook already active? - | beq >1 - |5: // Re-dispatch to static ins. - | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OPP TMP1, INS. - | lpx TMP0, DISPATCH, TMP1 - | mtctr TMP0 - | bctr - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andix. TMP0, TMP3, HOOK_ACTIVE // Hook already active? - | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0 - | bne <5 - | - | cmpwi cr1, TMP0, 0 - | addic. TMP2, TMP2, -1 - | beq cr1, <5 - | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | beq >1 - | bge cr1, <5 - |1: - | mr CARG1, L - | stw MULTRES, SAVE_MULTRES - | mr CARG2, PC - | stp BASE, L->base - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |3: - | lp BASE, L->base - |4: // Re-dispatch to static ins. - | lwz INS, -4(PC) - | decode_OPP TMP1, INS - | decode_RB8 RB, INS - | addi TMP1, TMP1, GG_DISP2STATIC - | decode_RD8 RD, INS - | lpx TMP0, DISPATCH, TMP1 - | decode_RA8 RA, INS - | decode_RC8 RC, INS - | mtctr TMP0 - | bctr - | - |->cont_hook: // Continue from hook yield. - | addi PC, PC, 4 - | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins. - | b <4 - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | lwz LFUNC:TMP1, FRAME_FUNC(BASE) - | addi CARG1, DISPATCH, GG_DISP2J - | stw PC, SAVE_PC - | lwz TMP1, LFUNC:TMP1->pc - | mr CARG2, PC - | stw L, DISPATCH_J(L)(DISPATCH) - | lbz TMP1, PC2PROTO(framesize)(TMP1) - | stp BASE, L->base - | slwi TMP1, TMP1, 3 - | add TMP1, BASE, TMP1 - | stp TMP1, L->top - | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) - | b <3 - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - | mr CARG2, PC - |.if JIT - | b >1 - |.endif - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | ori CARG2, PC, 1 - |1: - |.endif - | add TMP0, BASE, RC - | stw PC, SAVE_PC - | mr CARG1, L - | stp BASE, L->base - | sub RA, RA, BASE - | stp TMP0, L->top - | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) - | // Returns ASMFunction. - | lp BASE, L->base - | lp TMP0, L->top - | stw ZERO, SAVE_PC // Invalidate for subsequent line hook. - | sub NARGS8:RC, TMP0, BASE - | add RA, BASE, RA - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | lwz INS, -4(PC) - | mtctr CRET1 - | bctr - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // RA = resultptr, RB = meta base - | lwz INS, -4(PC) - | lwz TRACE:TMP2, -20(RB) // Save previous trace. - | addic. TMP1, MULTRES, -8 - | decode_RA8 RC, INS // Call base. - | beq >2 - |1: // Move results down. - | lfd f0, 0(RA) - | addic. TMP1, TMP1, -8 - | addi RA, RA, 8 - | stfdx f0, BASE, RC - | addi RC, RC, 8 - | bne <1 - |2: - | decode_RA8 RA, INS - | decode_RB8 RB, INS - | add RA, RA, RB - |3: - | cmplw RA, RC - | bgt >9 // More results wanted? - | - | lhz TMP3, TRACE:TMP2->traceno - | lhz RD, TRACE:TMP2->link - | cmpw RD, TMP3 - | cmpwi cr1, RD, 0 - | beq ->cont_nop // Blacklisted. - | slwi RD, RD, 3 - | bne cr1, =>BC_JLOOP // Jump to stitched trace. - | - | // Stitch a new trace to the previous trace. - | stw TMP3, DISPATCH_J(exitno)(DISPATCH) - | stp L, DISPATCH_J(L)(DISPATCH) - | stp BASE, L->base - | addi CARG1, DISPATCH, GG_DISP2J - | mr CARG2, PC - | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) - | lp BASE, L->base - | b ->cont_nop - | - |9: - | stwx TISNIL, BASE, RC - | addi RC, RC, 8 - | b <3 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | mr CARG1, L - | stw MULTRES, SAVE_MULTRES - | mr CARG2, PC - | stp BASE, L->base - | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | lp BASE, L->base - | subi PC, PC, 4 - | b ->cont_nop -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro savex_, a, b, c, d - | stfd f..a, 16+a*8(sp) - | stfd f..b, 16+b*8(sp) - | stfd f..c, 16+c*8(sp) - | stfd f..d, 16+d*8(sp) - |.endmacro - | - |->vm_exit_handler: - |.if JIT - | addi sp, sp, -(16+32*8+32*4) - | stmw r2, 16+32*8+2*4(sp) - | addi DISPATCH, JGL, -GG_DISP2G-32768 - | li CARG2, ~LJ_VMST_EXIT - | lwz CARG1, 16+32*8+32*4(sp) // Get stack chain. - | stw CARG2, DISPATCH_GL(vmstate)(DISPATCH) - | savex_ 0,1,2,3 - | stw CARG1, 0(sp) // Store extended stack chain. - | clrso TMP1 - | savex_ 4,5,6,7 - | addi CARG2, sp, 16+32*8+32*4 // Recompute original value of sp. - | savex_ 8,9,10,11 - | stw CARG2, 16+32*8+1*4(sp) // Store sp in RID_SP. - | savex_ 12,13,14,15 - | mflr CARG3 - | li TMP1, 0 - | savex_ 16,17,18,19 - | stw TMP1, 16+32*8+0*4(sp) // Clear RID_TMP. - | savex_ 20,21,22,23 - | lhz CARG4, 2(CARG3) // Load trace number. - | savex_ 24,25,26,27 - | lwz L, DISPATCH_GL(cur_L)(DISPATCH) - | savex_ 28,29,30,31 - | sub CARG3, TMP0, CARG3 // Compute exit number. - | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) - | srwi CARG3, CARG3, 2 - | stp L, DISPATCH_J(L)(DISPATCH) - | subi CARG3, CARG3, 2 - | stp BASE, L->base - | stw CARG4, DISPATCH_J(parent)(DISPATCH) - | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH) - | addi CARG1, DISPATCH, GG_DISP2J - | stw CARG3, DISPATCH_J(exitno)(DISPATCH) - | addi CARG2, sp, 16 - | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) - | // Returns MULTRES (unscaled) or negated error code. - | lp TMP1, L->cframe - | lwz TMP2, 0(sp) - | lp BASE, L->base - |.if GPR64 - | rldicr sp, TMP1, 0, 61 - |.else - | rlwinm sp, TMP1, 0, 0, 29 - |.endif - | lwz PC, SAVE_PC // Get SAVE_PC. - | stw TMP2, 0(sp) - | stw L, SAVE_L // Set SAVE_L (on-trace resume/yield). - | b >1 - |.endif - |->vm_exit_interp: - |.if JIT - | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. - | lwz L, SAVE_L - | addi DISPATCH, JGL, -GG_DISP2G-32768 - | stp BASE, L->base - |1: - | cmpwi CARG1, 0 - | blt >9 // Check for error from exit. - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | slwi MULTRES, CARG1, 3 - | li TMP2, 0 - | stw MULTRES, SAVE_MULTRES - | lwz TMP1, LFUNC:RB->pc - | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH) - | lwz KBASE, PC2PROTO(k)(TMP1) - | // Setup type comparison constants. - | li TISNUM, LJ_TISNUM - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | stw TMP3, TMPD - | li ZERO, 0 - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | li TISNIL, LJ_TNIL - | stw TMP0, TONUM_HI - | lfs TONUM, TMPD - | // Modified copy of ins_next which handles function header dispatch, too. - | lwz INS, 0(PC) - | addi PC, PC, 4 - | // Assumes TISNIL == ~LJ_VMST_INTERP == -1. - | stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) - | decode_OPP TMP1, INS - | decode_RA8 RA, INS - | lpx TMP0, DISPATCH, TMP1 - | mtctr TMP0 - | cmplwi TMP1, BC_FUNCF*4 // Function header? - | bge >2 - | decode_RB8 RB, INS - | decode_RD8 RD, INS - | decode_RC8 RC, INS - | bctr - |2: - | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function? - | blt >3 - | // Check frame below fast function. - | lwz TMP1, FRAME_PC(BASE) - | andix. TMP0, TMP1, FRAME_TYPE - | bney >3 // Trace stitching continuation? - | // Otherwise set KBASE for Lua function below fast function. - | lwz TMP2, -4(TMP1) - | decode_RA8 TMP0, TMP2 - | sub TMP1, BASE, TMP0 - | lwz LFUNC:TMP2, -12(TMP1) - | lwz TMP1, LFUNC:TMP2->pc - | lwz KBASE, PC2PROTO(k)(TMP1) - |3: - | subi RC, MULTRES, 8 - | add RA, RA, BASE - | bctr - | - |9: // Rethrow error from the right C frame. - | neg CARG2, CARG1 - | mr CARG1, L - | bl extern lj_err_throw // (lua_State *L, int errcode) - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// NYI: Use internal implementations of floor, ceil, trunc. - | - |->vm_modi: - | divwo. TMP0, CARG1, CARG2 - | bso >1 - |.if GPR64 - | xor CARG3, CARG1, CARG2 - | cmpwi CARG3, 0 - |.else - | xor. CARG3, CARG1, CARG2 - |.endif - | mullw TMP0, TMP0, CARG2 - | sub CARG1, CARG1, TMP0 - | bgelr - | cmpwi CARG1, 0; beqlr - | add CARG1, CARG1, CARG2 - | blr - |1: - | cmpwi CARG2, 0 - | li CARG1, 0 - | beqlr - | clrso TMP0 // Clear SO for -2147483648 % -1 and return 0. - | blr - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |// void lj_vm_cachesync(void *start, void *end) - |// Flush D-Cache and invalidate I-Cache. Assumes 32 byte cache line size. - |// This is a good lower bound, except for very ancient PPC models. - |->vm_cachesync: - |.if JIT or FFI - | // Compute start of first cache line and number of cache lines. - | rlwinm CARG1, CARG1, 0, 0, 26 - | sub CARG2, CARG2, CARG1 - | addi CARG2, CARG2, 31 - | rlwinm. CARG2, CARG2, 27, 5, 31 - | beqlr - | mtctr CARG2 - | mr CARG3, CARG1 - |1: // Flush D-Cache. - | dcbst r0, CARG1 - | addi CARG1, CARG1, 32 - | bdnz <1 - | sync - | mtctr CARG2 - |1: // Invalidate I-Cache. - | icbi r0, CARG3 - | addi CARG3, CARG3, 32 - | bdnz <1 - | isync - | blr - |.endif - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. Callback slot number in r11, g in r12. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - | saveregs - | lwz CTSTATE, GL:r12->ctype_state - | addi DISPATCH, r12, GG_G2DISP - | stw r11, CTSTATE->cb.slot - | stw r3, CTSTATE->cb.gpr[0] - | stfd f1, CTSTATE->cb.fpr[0] - | stw r4, CTSTATE->cb.gpr[1] - | stfd f2, CTSTATE->cb.fpr[1] - | stw r5, CTSTATE->cb.gpr[2] - | stfd f3, CTSTATE->cb.fpr[2] - | stw r6, CTSTATE->cb.gpr[3] - | stfd f4, CTSTATE->cb.fpr[3] - | stw r7, CTSTATE->cb.gpr[4] - | stfd f5, CTSTATE->cb.fpr[4] - | stw r8, CTSTATE->cb.gpr[5] - | stfd f6, CTSTATE->cb.fpr[5] - | stw r9, CTSTATE->cb.gpr[6] - | stfd f7, CTSTATE->cb.fpr[6] - | stw r10, CTSTATE->cb.gpr[7] - | stfd f8, CTSTATE->cb.fpr[7] - | addi TMP0, sp, CFRAME_SPACE+8 - | stw TMP0, CTSTATE->cb.stack - | mr CARG1, CTSTATE - | stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok. - | mr CARG2, sp - | bl extern lj_ccallback_enter // (CTState *cts, void *cf) - | // Returns lua_State *. - | lp BASE, L:CRET1->base - | li TISNUM, LJ_TISNUM // Setup type comparison constants. - | lp RC, L:CRET1->top - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | li ZERO, 0 - | mr L, CRET1 - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | stw TMP0, TONUM_HI - | li TISNIL, LJ_TNIL - | li_vmstate INTERP - | lfs TOBIT, TMPD - | stw TMP3, TMPD - | sub RC, RC, BASE - | st_vmstate - | lfs TONUM, TMPD - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | lwz CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) - | stp BASE, L->base - | stp RB, L->top - | stp L, CTSTATE->L - | mr CARG1, CTSTATE - | mr CARG2, RA - | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) - | lwz CRET1, CTSTATE->cb.gpr[0] - | lfd FARG1, CTSTATE->cb.fpr[0] - | lwz CRET2, CTSTATE->cb.gpr[1] - | b ->vm_leave_unw - |.endif - | - |->vm_ffi_call: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - | .type CCSTATE, CCallState, CARG1 - | lwz TMP1, CCSTATE->spadj - | mflr TMP0 - | lbz CARG2, CCSTATE->nsp - | lbz CARG3, CCSTATE->nfpr - | neg TMP1, TMP1 - | stw TMP0, 4(sp) - | cmpwi cr1, CARG3, 0 - | mr TMP2, sp - | addic. CARG2, CARG2, -1 - | stwux sp, sp, TMP1 - | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. - | stw r14, -4(TMP2) - | stw CCSTATE, -8(TMP2) - | mr r14, TMP2 - | la TMP1, CCSTATE->stack - | slwi CARG2, CARG2, 2 - | blty >2 - | la TMP2, 8(sp) - |1: - | lwzx TMP0, TMP1, CARG2 - | stwx TMP0, TMP2, CARG2 - | addic. CARG2, CARG2, -4 - | bge <1 - |2: - | bney cr1, >3 - | lfd f1, CCSTATE->fpr[0] - | lfd f2, CCSTATE->fpr[1] - | lfd f3, CCSTATE->fpr[2] - | lfd f4, CCSTATE->fpr[3] - | lfd f5, CCSTATE->fpr[4] - | lfd f6, CCSTATE->fpr[5] - | lfd f7, CCSTATE->fpr[6] - | lfd f8, CCSTATE->fpr[7] - |3: - | lp TMP0, CCSTATE->func - | lwz CARG2, CCSTATE->gpr[1] - | lwz CARG3, CCSTATE->gpr[2] - | lwz CARG4, CCSTATE->gpr[3] - | lwz CARG5, CCSTATE->gpr[4] - | mtctr TMP0 - | lwz r8, CCSTATE->gpr[5] - | lwz r9, CCSTATE->gpr[6] - | lwz r10, CCSTATE->gpr[7] - | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. - | bctrl - | lwz CCSTATE:TMP1, -8(r14) - | lwz TMP2, -4(r14) - | lwz TMP0, 4(r14) - | stw CARG1, CCSTATE:TMP1->gpr[0] - | stfd FARG1, CCSTATE:TMP1->fpr[0] - | stw CARG2, CCSTATE:TMP1->gpr[1] - | mtlr TMP0 - | stw CARG3, CCSTATE:TMP1->gpr[2] - | mr sp, r14 - | stw CARG4, CCSTATE:TMP1->gpr[3] - | mr r14, TMP2 - | blr - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.if DUALNUM - | lwzux TMP0, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux TMP1, RD, BASE - | lwz TMP2, -4(PC) - | checknum cr0, TMP0 - | lwz CARG3, 4(RD) - | decode_RD4 TMP2, TMP2 - | checknum cr1, TMP1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | bne cr0, >7 - | bne cr1, >8 - | cmpw CARG2, CARG3 - if (op == BC_ISLT) { - | bge >2 - } else if (op == BC_ISGE) { - | blt >2 - } else if (op == BC_ISLE) { - | bgt >2 - } else { - | ble >2 - } - |1: - | add PC, PC, TMP2 - |2: - | ins_next - | - |7: // RA is not an integer. - | bgt cr0, ->vmeta_comp - | // RA is a number. - | lfd f0, 0(RA) - | bgt cr1, ->vmeta_comp - | blt cr1, >4 - | // RA is a number, RD is an integer. - | tonum_i f1, CARG3 - | b >5 - | - |8: // RA is an integer, RD is not an integer. - | bgt cr1, ->vmeta_comp - | // RA is an integer, RD is a number. - | tonum_i f0, CARG2 - |4: - | lfd f1, 0(RD) - |5: - | fcmpu cr0, f0, f1 - if (op == BC_ISLT) { - | bge <2 - } else if (op == BC_ISGE) { - | blt <2 - } else if (op == BC_ISLE) { - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq - | bge <2 - } else { - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq - | blt <2 - } - | b <1 - |.else - | lwzx TMP0, BASE, RA - | addi PC, PC, 4 - | lfdx f0, BASE, RA - | lwzx TMP1, BASE, RD - | checknum cr0, TMP0 - | lwz TMP2, -4(PC) - | lfdx f1, BASE, RD - | checknum cr1, TMP1 - | decode_RD4 TMP2, TMP2 - | bge cr0, ->vmeta_comp - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | bge cr1, ->vmeta_comp - | fcmpu cr0, f0, f1 - if (op == BC_ISLT) { - | bge >1 - } else if (op == BC_ISGE) { - | blt >1 - } else if (op == BC_ISLE) { - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq - | bge >1 - } else { - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq - | blt >1 - } - | add PC, PC, TMP2 - |1: - | ins_next - |.endif - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1*8, RD = src2*8, JMP with RD = target - |.if DUALNUM - | lwzux TMP0, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux TMP1, RD, BASE - | checknum cr0, TMP0 - | lwz TMP2, -4(PC) - | checknum cr1, TMP1 - | decode_RD4 TMP2, TMP2 - | lwz CARG3, 4(RD) - | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - if (vk) { - | ble cr7, ->BC_ISEQN_Z - } else { - | ble cr7, ->BC_ISNEN_Z - } - |.else - | lwzux TMP0, RA, BASE - | lwz TMP2, 0(PC) - | lfd f0, 0(RA) - | addi PC, PC, 4 - | lwzux TMP1, RD, BASE - | checknum cr0, TMP0 - | decode_RD4 TMP2, TMP2 - | lfd f1, 0(RD) - | checknum cr1, TMP1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | bge cr0, >5 - | bge cr1, >5 - | fcmpu cr0, f0, f1 - if (vk) { - | bne >1 - | add PC, PC, TMP2 - } else { - | beq >1 - | add PC, PC, TMP2 - } - |1: - | ins_next - |.endif - |5: // Either or both types are not numbers. - |.if not DUALNUM - | lwz CARG2, 4(RA) - | lwz CARG3, 4(RD) - |.endif - |.if FFI - | cmpwi cr7, TMP0, LJ_TCDATA - | cmpwi cr5, TMP1, LJ_TCDATA - |.endif - | not TMP3, TMP0 - | cmplw TMP0, TMP1 - | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? - |.if FFI - | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq - |.endif - | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? - |.if FFI - | beq cr7, ->vmeta_equal_cd - |.endif - | cmplw cr5, CARG2, CARG3 - | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. - | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. - | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. - | mr SAVE0, PC - | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. - | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. - if (vk) { - | bne cr0, >6 - | add PC, PC, TMP2 - |6: - } else { - | beq cr0, >6 - | add PC, PC, TMP2 - |6: - } - |.if DUALNUM - | bge cr0, >2 // Done if 1 or 2. - |1: - | ins_next - |2: - |.else - | blt cr0, <1 // Done if 1 or 2. - |.endif - | blt cr6, <1 // Done if not tab/ud. - | - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | lwz TAB:TMP2, TAB:CARG2->metatable - | li CARG4, 1-vk // ne = 0 or 1. - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable? - | lbz TMP2, TAB:TMP2->nomm - | andix. TMP2, TMP2, 1<vmeta_equal // Handle __eq metamethod. - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src*8, RD = str_const*8 (~), JMP with RD = target - | lwzux TMP0, RA, BASE - | srwi RD, RD, 1 - | lwz STR:TMP3, 4(RA) - | lwz TMP2, 0(PC) - | subfic RD, RD, -4 - | addi PC, PC, 4 - |.if FFI - | cmpwi TMP0, LJ_TCDATA - |.endif - | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4 - | .gpr64 extsw TMP0, TMP0 - | subfic TMP0, TMP0, LJ_TSTR - |.if FFI - | beq ->vmeta_equal_cd - |.endif - | sub TMP1, STR:TMP1, STR:TMP3 - | or TMP0, TMP0, TMP1 - | decode_RD4 TMP2, TMP2 - | subfic TMP0, TMP0, 0 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | subfe TMP1, TMP1, TMP1 - if (vk) { - | andc TMP2, TMP2, TMP1 - } else { - | and TMP2, TMP2, TMP1 - } - | add PC, PC, TMP2 - | ins_next - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src*8, RD = num_const*8, JMP with RD = target - |.if DUALNUM - | lwzux TMP0, RA, BASE - | addi PC, PC, 4 - | lwz CARG2, 4(RA) - | lwzux TMP1, RD, KBASE - | checknum cr0, TMP0 - | lwz TMP2, -4(PC) - | checknum cr1, TMP1 - | decode_RD4 TMP2, TMP2 - | lwz CARG3, 4(RD) - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - if (vk) { - |->BC_ISEQN_Z: - } else { - |->BC_ISNEN_Z: - } - | bne cr0, >7 - | bne cr1, >8 - | cmpw CARG2, CARG3 - |4: - |.else - if (vk) { - |->BC_ISEQN_Z: // Dummy label. - } else { - |->BC_ISNEN_Z: // Dummy label. - } - | lwzx TMP0, BASE, RA - | addi PC, PC, 4 - | lfdx f0, BASE, RA - | lwz TMP2, -4(PC) - | lfdx f1, KBASE, RD - | decode_RD4 TMP2, TMP2 - | checknum TMP0 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | bge >3 - | fcmpu cr0, f0, f1 - |.endif - if (vk) { - | bne >1 - | add PC, PC, TMP2 - |1: - |.if not FFI - |3: - |.endif - } else { - | beq >2 - |1: - |.if not FFI - |3: - |.endif - | add PC, PC, TMP2 - |2: - } - | ins_next - |.if FFI - |3: - | cmpwi TMP0, LJ_TCDATA - | beq ->vmeta_equal_cd - | b <1 - |.endif - |.if DUALNUM - |7: // RA is not an integer. - | bge cr0, <3 - | // RA is a number. - | lfd f0, 0(RA) - | blt cr1, >1 - | // RA is a number, RD is an integer. - | tonum_i f1, CARG3 - | b >2 - | - |8: // RA is an integer, RD is a number. - | tonum_i f0, CARG2 - |1: - | lfd f1, 0(RD) - |2: - | fcmpu cr0, f0, f1 - | b <4 - |.endif - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target - | lwzx TMP0, BASE, RA - | srwi TMP1, RD, 3 - | lwz TMP2, 0(PC) - | not TMP1, TMP1 - | addi PC, PC, 4 - |.if FFI - | cmpwi TMP0, LJ_TCDATA - |.endif - | sub TMP0, TMP0, TMP1 - |.if FFI - | beq ->vmeta_equal_cd - |.endif - | decode_RD4 TMP2, TMP2 - | .gpr64 extsw TMP0, TMP0 - | addic TMP0, TMP0, -1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - | subfe TMP1, TMP1, TMP1 - if (vk) { - | and TMP2, TMP2, TMP1 - } else { - | andc TMP2, TMP2, TMP1 - } - | add PC, PC, TMP2 - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst*8 or unused, RD = src*8, JMP with RD = target - | lwzx TMP0, BASE, RD - | lwz INS, 0(PC) - | addi PC, PC, 4 - if (op == BC_IST || op == BC_ISF) { - | .gpr64 extsw TMP0, TMP0 - | subfic TMP0, TMP0, LJ_TTRUE - | decode_RD4 TMP2, INS - | subfe TMP1, TMP1, TMP1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) - if (op == BC_IST) { - | andc TMP2, TMP2, TMP1 - } else { - | and TMP2, TMP2, TMP1 - } - | add PC, PC, TMP2 - } else { - | li TMP1, LJ_TFALSE - | lfdx f0, BASE, RD - | cmplw TMP0, TMP1 - if (op == BC_ISTC) { - | bge >1 - } else { - | blt >1 - } - | addis PC, PC, -(BCBIAS_J*4 >> 16) - | decode_RD4 TMP2, INS - | stfdx f0, BASE, RA - | add PC, PC, TMP2 - |1: - } - | ins_next - break; - - case BC_ISTYPE: - | // RA = src*8, RD = -type*8 - | lwzx TMP0, BASE, RA - | srwi TMP1, RD, 3 - | ins_next1 - |.if not PPE and not GPR64 - | add. TMP0, TMP0, TMP1 - |.else - | neg TMP1, TMP1 - | cmpw TMP0, TMP1 - |.endif - | bne ->vmeta_istype - | ins_next2 - break; - case BC_ISNUM: - | // RA = src*8, RD = -(TISNUM-1)*8 - | lwzx TMP0, BASE, RA - | ins_next1 - | checknum TMP0 - | bge ->vmeta_istype - | ins_next2 - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst*8, RD = src*8 - | ins_next1 - | lfdx f0, BASE, RD - | stfdx f0, BASE, RA - | ins_next2 - break; - case BC_NOT: - | // RA = dst*8, RD = src*8 - | ins_next1 - | lwzx TMP0, BASE, RD - | .gpr64 extsw TMP0, TMP0 - | subfic TMP1, TMP0, LJ_TTRUE - | adde TMP0, TMP0, TMP1 - | stwx TMP0, BASE, RA - | ins_next2 - break; - case BC_UNM: - | // RA = dst*8, RD = src*8 - | lwzux TMP1, RD, BASE - | lwz TMP0, 4(RD) - | checknum TMP1 - |.if DUALNUM - | bne >5 - |.if GPR64 - | lus TMP2, 0x8000 - | neg TMP0, TMP0 - | cmplw TMP0, TMP2 - | beq >4 - |.else - | nego. TMP0, TMP0 - | bso >4 - |1: - |.endif - | ins_next1 - | stwux TISNUM, RA, BASE - | stw TMP0, 4(RA) - |3: - | ins_next2 - |4: - |.if not GPR64 - | // Potential overflow. - | checkov TMP1, <1 // Ignore unrelated overflow. - |.endif - | lus TMP1, 0x41e0 // 2^31. - | li TMP0, 0 - | b >7 - |.endif - |5: - | bge ->vmeta_unm - | xoris TMP1, TMP1, 0x8000 - |7: - | ins_next1 - | stwux TMP1, RA, BASE - | stw TMP0, 4(RA) - |.if DUALNUM - | b <3 - |.else - | ins_next2 - |.endif - break; - case BC_LEN: - | // RA = dst*8, RD = src*8 - | lwzux TMP0, RD, BASE - | lwz CARG1, 4(RD) - | checkstr TMP0; bne >2 - | lwz CRET1, STR:CARG1->len - |1: - |.if DUALNUM - | ins_next1 - | stwux TISNUM, RA, BASE - | stw CRET1, 4(RA) - |.else - | tonum_u f0, CRET1 // Result is a non-negative integer. - | ins_next1 - | stfdx f0, BASE, RA - |.endif - | ins_next2 - |2: - | checktab TMP0; bne ->vmeta_len -#if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable - | cmplwi TAB:TMP2, 0 - | bne >9 - |3: -#endif - |->BC_LEN_Z: - | bl extern lj_tab_len // (GCtab *t) - | // Returns uint32_t (but less than 2^31). - | b <1 -#if LJ_52 - |9: - | lbz TMP0, TAB:TMP2->nomm - | andix. TMP0, TMP0, 1<vmeta_len -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro ins_arithpre - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | lwzx TMP1, BASE, RB - | .if DUALNUM - | lwzx TMP2, KBASE, RC - | .endif - | lfdx f14, BASE, RB - | lfdx f15, KBASE, RC - | .if DUALNUM - | checknum cr0, TMP1 - | checknum cr1, TMP2 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vn - | .else - | checknum TMP1; bge ->vmeta_arith_vn - | .endif - || break; - ||case 1: - | lwzx TMP1, BASE, RB - | .if DUALNUM - | lwzx TMP2, KBASE, RC - | .endif - | lfdx f15, BASE, RB - | lfdx f14, KBASE, RC - | .if DUALNUM - | checknum cr0, TMP1 - | checknum cr1, TMP2 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_nv - | .else - | checknum TMP1; bge ->vmeta_arith_nv - | .endif - || break; - ||default: - | lwzx TMP1, BASE, RB - | lwzx TMP2, BASE, RC - | lfdx f14, BASE, RB - | lfdx f15, BASE, RC - | checknum cr0, TMP1 - | checknum cr1, TMP2 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vv - || break; - ||} - |.endmacro - | - |.macro ins_arithfallback, ins - ||switch (vk) { - ||case 0: - | ins ->vmeta_arith_vn2 - || break; - ||case 1: - | ins ->vmeta_arith_nv2 - || break; - ||default: - | ins ->vmeta_arith_vv2 - || break; - ||} - |.endmacro - | - |.macro intmod, a, b, c - | bl ->vm_modi - |.endmacro - | - |.macro fpmod, a, b, c - |->BC_MODVN_Z: - | fdiv FARG1, b, c - | // NYI: Use internal implementation of floor. - | blex floor // floor(b/c) - | fmul a, FARG1, c - | fsub a, b, a // b - floor(b/c)*c - |.endmacro - | - |.macro ins_arithfp, fpins - | ins_arithpre - |.if "fpins" == "fpmod_" - | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - |.else - | fpins f0, f14, f15 - | ins_next1 - | stfdx f0, BASE, RA - | ins_next2 - |.endif - |.endmacro - | - |.macro ins_arithdn, intins, fpins - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, KBASE - | lwz CARG1, 4(RB) - | checknum cr0, TMP1 - | lwz CARG2, 4(RC) - || break; - ||case 1: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, KBASE - | lwz CARG2, 4(RB) - | checknum cr0, TMP1 - | lwz CARG1, 4(RC) - || break; - ||default: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, BASE - | lwz CARG1, 4(RB) - | checknum cr0, TMP1 - | lwz CARG2, 4(RC) - || break; - ||} - | checknum cr1, TMP2 - | bne >5 - | bne cr1, >5 - | intins CARG1, CARG1, CARG2 - | bso >4 - |1: - | ins_next1 - | stwux TISNUM, RA, BASE - | stw CARG1, 4(RA) - |2: - | ins_next2 - |4: // Overflow. - | checkov TMP0, <1 // Ignore unrelated overflow. - | ins_arithfallback b - |5: // FP variant. - ||if (vk == 1) { - | lfd f15, 0(RB) - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | lfd f14, 0(RC) - ||} else { - | lfd f14, 0(RB) - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | lfd f15, 0(RC) - ||} - | ins_arithfallback bge - |.if "fpins" == "fpmod_" - | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - |.else - | fpins f0, f14, f15 - | ins_next1 - | stfdx f0, BASE, RA - | b <2 - |.endif - |.endmacro - | - |.macro ins_arith, intins, fpins - |.if DUALNUM - | ins_arithdn intins, fpins - |.else - | ins_arithfp fpins - |.endif - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - |.if GPR64 - |.macro addo32., y, a, b - | // Need to check overflow for (a<<32) + (b<<32). - | rldicr TMP0, a, 32, 31 - | rldicr TMP3, b, 32, 31 - | addo. TMP0, TMP0, TMP3 - | add y, a, b - |.endmacro - | ins_arith addo32., fadd - |.else - | ins_arith addo., fadd - |.endif - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - |.if GPR64 - |.macro subo32., y, a, b - | // Need to check overflow for (a<<32) - (b<<32). - | rldicr TMP0, a, 32, 31 - | rldicr TMP3, b, 32, 31 - | subo. TMP0, TMP0, TMP3 - | sub y, a, b - |.endmacro - | ins_arith subo32., fsub - |.else - | ins_arith subo., fsub - |.endif - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith mullwo., fmul - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arithfp fdiv - break; - case BC_MODVN: - | ins_arith intmod, fpmod - break; - case BC_MODNV: case BC_MODVV: - | ins_arith intmod, fpmod_ - break; - case BC_POW: - | // NYI: (partial) integer arithmetic. - | lwzx TMP1, BASE, RB - | lfdx FARG1, BASE, RB - | lwzx TMP2, BASE, RC - | lfdx FARG2, BASE, RC - | checknum cr0, TMP1 - | checknum cr1, TMP2 - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | bge ->vmeta_arith_vv - | blex pow - | ins_next1 - | stfdx FARG1, BASE, RA - | ins_next2 - break; - - case BC_CAT: - | // RA = dst*8, RB = src_start*8, RC = src_end*8 - | sub CARG3, RC, RB - | stp BASE, L->base - | add CARG2, BASE, RC - | mr SAVE0, RB - |->BC_CAT_Z: - | stw PC, SAVE_PC - | mr CARG1, L - | srwi CARG3, CARG3, 3 - | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) - | // Returns NULL (finished) or TValue * (metamethod). - | cmplwi CRET1, 0 - | lp BASE, L->base - | bne ->vmeta_binop - | ins_next1 - | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. - | stfdx f0, BASE, RA - | ins_next2 - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst*8, RD = str_const*8 (~) - | srwi TMP1, RD, 1 - | subfic TMP1, TMP1, -4 - | ins_next1 - | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 - | li TMP2, LJ_TSTR - | stwux TMP2, RA, BASE - | stw TMP0, 4(RA) - | ins_next2 - break; - case BC_KCDATA: - |.if FFI - | // RA = dst*8, RD = cdata_const*8 (~) - | srwi TMP1, RD, 1 - | subfic TMP1, TMP1, -4 - | ins_next1 - | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 - | li TMP2, LJ_TCDATA - | stwux TMP2, RA, BASE - | stw TMP0, 4(RA) - | ins_next2 - |.endif - break; - case BC_KSHORT: - | // RA = dst*8, RD = int16_literal*8 - |.if DUALNUM - | slwi RD, RD, 13 - | srawi RD, RD, 16 - | ins_next1 - | stwux TISNUM, RA, BASE - | stw RD, 4(RA) - | ins_next2 - |.else - | // The soft-float approach is faster. - | slwi RD, RD, 13 - | srawi TMP1, RD, 31 - | xor TMP2, TMP1, RD - | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) - | cntlzw TMP3, TMP2 - | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 - | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa - | subfic TMP3, RD, 0 - | slwi TMP1, TMP1, 20 - | rlwimi RD, TMP2, 21, 1, 31 // hi = sign(x) | (mantissa>>11) - | subfe TMP0, TMP0, TMP0 - | add RD, RD, TMP1 // hi = hi + exponent-1 - | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi - | ins_next1 - | stwux RD, RA, BASE - | stw ZERO, 4(RA) - | ins_next2 - |.endif - break; - case BC_KNUM: - | // RA = dst*8, RD = num_const*8 - | ins_next1 - | lfdx f0, KBASE, RD - | stfdx f0, BASE, RA - | ins_next2 - break; - case BC_KPRI: - | // RA = dst*8, RD = primitive_type*8 (~) - | srwi TMP1, RD, 3 - | not TMP0, TMP1 - | ins_next1 - | stwx TMP0, BASE, RA - | ins_next2 - break; - case BC_KNIL: - | // RA = base*8, RD = end*8 - | stwx TISNIL, BASE, RA - | addi RA, RA, 8 - |1: - | stwx TISNIL, BASE, RA - | cmpw RA, RD - | addi RA, RA, 8 - | blt <1 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst*8, RD = uvnum*8 - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RD, RD, 1 - | addi RD, RD, offsetof(GCfuncL, uvptr) - | lwzx UPVAL:RB, LFUNC:RB, RD - | ins_next1 - | lwz TMP1, UPVAL:RB->v - | lfd f0, 0(TMP1) - | stfdx f0, BASE, RA - | ins_next2 - break; - case BC_USETV: - | // RA = uvnum*8, RD = src*8 - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | lfdux f0, RD, BASE - | lwzx UPVAL:RB, LFUNC:RB, RA - | lbz TMP3, UPVAL:RB->marked - | lwz CARG2, UPVAL:RB->v - | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbz TMP0, UPVAL:RB->closed - | lwz TMP2, 0(RD) - | stfd f0, 0(CARG2) - | cmplwi cr1, TMP0, 0 - | lwz TMP1, 4(RD) - | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq - | subi TMP2, TMP2, (LJ_TNUMX+1) - | bne >2 // Upvalue is closed and black? - |1: - | ins_next - | - |2: // Check if new value is collectable. - | cmplwi TMP2, LJ_TISGCV - (LJ_TNUMX+1) - | bge <1 // tvisgcv(v) - | lbz TMP3, GCOBJ:TMP1->gch.marked - | andix. TMP3, TMP3, LJ_GC_WHITES // iswhite(v) - | la CARG1, GG_DISP2G(DISPATCH) - | // Crossed a write barrier. Move the barrier forward. - | beq <1 - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | b <1 - break; - case BC_USETS: - | // RA = uvnum*8, RD = str_const*8 (~) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi TMP1, RD, 1 - | srwi RA, RA, 1 - | subfic TMP1, TMP1, -4 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4 - | lwzx UPVAL:RB, LFUNC:RB, RA - | lbz TMP3, UPVAL:RB->marked - | lwz CARG2, UPVAL:RB->v - | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbz TMP3, STR:TMP1->marked - | lbz TMP2, UPVAL:RB->closed - | li TMP0, LJ_TSTR - | stw STR:TMP1, 4(CARG2) - | stw TMP0, 0(CARG2) - | bne >2 - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | andix. TMP3, TMP3, LJ_GC_WHITES // iswhite(str) - | cmplwi cr1, TMP2, 0 - | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq - | la CARG1, GG_DISP2G(DISPATCH) - | // Crossed a write barrier. Move the barrier forward. - | beq <1 - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | b <1 - break; - case BC_USETN: - | // RA = uvnum*8, RD = num_const*8 - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | lfdx f0, KBASE, RD - | lwzx UPVAL:RB, LFUNC:RB, RA - | ins_next1 - | lwz TMP1, UPVAL:RB->v - | stfd f0, 0(TMP1) - | ins_next2 - break; - case BC_USETP: - | // RA = uvnum*8, RD = primitive_type*8 (~) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | srwi TMP0, RD, 3 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | not TMP0, TMP0 - | lwzx UPVAL:RB, LFUNC:RB, RA - | ins_next1 - | lwz TMP1, UPVAL:RB->v - | stw TMP0, 0(TMP1) - | ins_next2 - break; - - case BC_UCLO: - | // RA = level*8, RD = target - | lwz TMP1, L->openupval - | branch_RD // Do this first since RD is not saved. - | stp BASE, L->base - | cmplwi TMP1, 0 - | mr CARG1, L - | beq >1 - | add CARG2, BASE, RA - | bl extern lj_func_closeuv // (lua_State *L, TValue *level) - | lp BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) - | srwi TMP1, RD, 1 - | stp BASE, L->base - | subfic TMP1, TMP1, -4 - | stw PC, SAVE_PC - | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 - | mr CARG1, L - | lwz CARG3, FRAME_FUNC(BASE) - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | bl extern lj_func_newL_gc - | // Returns GCfuncL *. - | lp BASE, L->base - | li TMP0, LJ_TFUNC - | stwux TMP0, RA, BASE - | stw LFUNC:CRET1, 4(RA) - | ins_next - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) - | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | mr CARG1, L - | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | stp BASE, L->base - | cmplw TMP0, TMP1 - | stw PC, SAVE_PC - | bge >5 - |1: - if (op == BC_TNEW) { - | rlwinm CARG2, RD, 29, 21, 31 - | rlwinm CARG3, RD, 18, 27, 31 - | cmpwi CARG2, 0x7ff; beq >3 - |2: - | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) - | // Returns Table *. - } else { - | srwi TMP1, RD, 1 - | subfic TMP1, TMP1, -4 - | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 - | bl extern lj_tab_dup // (lua_State *L, Table *kt) - | // Returns Table *. - } - | lp BASE, L->base - | li TMP0, LJ_TTAB - | stwux TMP0, RA, BASE - | stw TAB:CRET1, 4(RA) - | ins_next - if (op == BC_TNEW) { - |3: - | li CARG2, 0x801 - | b <2 - } - |5: - | mr SAVE0, RD - | bl extern lj_gc_step_fixtop // (lua_State *L) - | mr RD, SAVE0 - | mr CARG1, L - | b <1 - break; - - case BC_GGET: - | // RA = dst*8, RD = str_const*8 (~) - case BC_GSET: - | // RA = src*8, RD = str_const*8 (~) - | lwz LFUNC:TMP2, FRAME_FUNC(BASE) - | srwi TMP1, RD, 1 - | lwz TAB:RB, LFUNC:TMP2->env - | subfic TMP1, TMP1, -4 - | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - break; - - case BC_TGETV: - | // RA = dst*8, RB = table*8, RC = key*8 - | lwzux CARG1, RB, BASE - | lwzux CARG2, RC, BASE - | lwz TAB:RB, 4(RB) - |.if DUALNUM - | lwz RC, 4(RC) - |.else - | lfd f0, 0(RC) - |.endif - | checktab CARG1 - | checknum cr1, CARG2 - | bne ->vmeta_tgetv - |.if DUALNUM - | lwz TMP0, TAB:RB->asize - | bne cr1, >5 - | lwz TMP1, TAB:RB->array - | cmplw TMP0, RC - | slwi TMP2, RC, 3 - |.else - | bge cr1, >5 - | // Convert number key to integer, check for integerness and range. - | fctiwz f1, f0 - | fadd f2, f0, TOBIT - | stfd f1, TMPD - | lwz TMP0, TAB:RB->asize - | fsub f2, f2, TOBIT - | lwz TMP2, TMPD_LO - | lwz TMP1, TAB:RB->array - | fcmpu cr1, f0, f2 - | cmplw cr0, TMP0, TMP2 - | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq - | slwi TMP2, TMP2, 3 - |.endif - | ble ->vmeta_tgetv // Integer key and in array part? - | lwzx TMP0, TMP1, TMP2 - | lfdx f14, TMP1, TMP2 - | checknil TMP0; beq >2 - |1: - | ins_next1 - | stfdx f14, BASE, RA - | ins_next2 - | - |2: // Check for __index if table value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable: done. - | lbz TMP0, TAB:TMP2->nomm - | andix. TMP0, TMP0, 1<vmeta_tgetv - | - |5: - | checkstr CARG2; bne ->vmeta_tgetv - |.if not DUALNUM - | lwz STR:RC, 4(RC) - |.endif - | b ->BC_TGETS_Z // String key? - break; - case BC_TGETS: - | // RA = dst*8, RB = table*8, RC = str_const*8 (~) - | lwzux CARG1, RB, BASE - | srwi TMP1, RC, 1 - | lwz TAB:RB, 4(RB) - | subfic TMP1, TMP1, -4 - | checktab CARG1 - | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 - | bne ->vmeta_tgets1 - |->BC_TGETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 - | lwz TMP0, TAB:RB->hmask - | lwz TMP1, STR:RC->hash - | lwz NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | slwi TMP0, TMP1, 5 - | slwi TMP1, TMP1, 3 - | sub TMP1, TMP0, TMP1 - | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |1: - | lwz CARG1, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val - | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) - | checkstr CARG1; bne >4 - | cmpw TMP0, STR:RC; bne >4 - | checknil CARG2; beq >5 // Key found, but nil value? - |3: - | stwux CARG2, RA, BASE - | stw TMP1, 4(RA) - | ins_next - | - |4: // Follow hash chain. - | lwz NODE:TMP2, NODE:TMP2->next - | cmplwi NODE:TMP2, 0 - | bne <1 - | // End of hash chain: key not found, nil result. - | li CARG2, LJ_TNIL - | - |5: // Check for __index if table value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <3 // No metatable: done. - | lbz TMP0, TAB:TMP2->nomm - | andix. TMP0, TMP0, 1<vmeta_tgets - break; - case BC_TGETB: - | // RA = dst*8, RB = table*8, RC = index*8 - | lwzux CARG1, RB, BASE - | srwi TMP0, RC, 3 - | lwz TAB:RB, 4(RB) - | checktab CARG1; bne ->vmeta_tgetb - | lwz TMP1, TAB:RB->asize - | lwz TMP2, TAB:RB->array - | cmplw TMP0, TMP1; bge ->vmeta_tgetb - | lwzx TMP1, TMP2, RC - | lfdx f0, TMP2, RC - | checknil TMP1; beq >5 - |1: - | ins_next1 - | stfdx f0, BASE, RA - | ins_next2 - | - |5: // Check for __index if table value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable: done. - | lbz TMP2, TAB:TMP2->nomm - | andix. TMP2, TMP2, 1<vmeta_tgetb // Caveat: preserve TMP0! - break; - case BC_TGETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | add RB, BASE, RB - | lwz TAB:CARG1, 4(RB) - |.if DUALNUM - | add RC, BASE, RC - | lwz TMP0, TAB:CARG1->asize - | lwz CARG2, 4(RC) - | lwz TMP1, TAB:CARG1->array - |.else - | lfdx f0, BASE, RC - | lwz TMP0, TAB:CARG1->asize - | toint CARG2, f0 - | lwz TMP1, TAB:CARG1->array - |.endif - | cmplw TMP0, CARG2 - | slwi TMP2, CARG2, 3 - | ble ->vmeta_tgetr // In array part? - | lfdx f14, TMP1, TMP2 - |->BC_TGETR_Z: - | ins_next1 - | stfdx f14, BASE, RA - | ins_next2 - break; - - case BC_TSETV: - | // RA = src*8, RB = table*8, RC = key*8 - | lwzux CARG1, RB, BASE - | lwzux CARG2, RC, BASE - | lwz TAB:RB, 4(RB) - |.if DUALNUM - | lwz RC, 4(RC) - |.else - | lfd f0, 0(RC) - |.endif - | checktab CARG1 - | checknum cr1, CARG2 - | bne ->vmeta_tsetv - |.if DUALNUM - | lwz TMP0, TAB:RB->asize - | bne cr1, >5 - | lwz TMP1, TAB:RB->array - | cmplw TMP0, RC - | slwi TMP0, RC, 3 - |.else - | bge cr1, >5 - | // Convert number key to integer, check for integerness and range. - | fctiwz f1, f0 - | fadd f2, f0, TOBIT - | stfd f1, TMPD - | lwz TMP0, TAB:RB->asize - | fsub f2, f2, TOBIT - | lwz TMP2, TMPD_LO - | lwz TMP1, TAB:RB->array - | fcmpu cr1, f0, f2 - | cmplw cr0, TMP0, TMP2 - | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq - | slwi TMP0, TMP2, 3 - |.endif - | ble ->vmeta_tsetv // Integer key and in array part? - | lwzx TMP2, TMP1, TMP0 - | lbz TMP3, TAB:RB->marked - | lfdx f14, BASE, RA - | checknil TMP2; beq >3 - |1: - | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) - | stfdx f14, TMP1, TMP0 - | bne >7 - |2: - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable: done. - | lbz TMP2, TAB:TMP2->nomm - | andix. TMP2, TMP2, 1<vmeta_tsetv - | - |5: - | checkstr CARG2; bne ->vmeta_tsetv - |.if not DUALNUM - | lwz STR:RC, 4(RC) - |.endif - | b ->BC_TSETS_Z // String key? - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0 - | b <2 - break; - case BC_TSETS: - | // RA = src*8, RB = table*8, RC = str_const*8 (~) - | lwzux CARG1, RB, BASE - | srwi TMP1, RC, 1 - | lwz TAB:RB, 4(RB) - | subfic TMP1, TMP1, -4 - | checktab CARG1 - | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 - | bne ->vmeta_tsets1 - |->BC_TSETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 - | lwz TMP0, TAB:RB->hmask - | lwz TMP1, STR:RC->hash - | lwz NODE:TMP2, TAB:RB->node - | stb ZERO, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | lfdx f14, BASE, RA - | slwi TMP0, TMP1, 5 - | slwi TMP1, TMP1, 3 - | sub TMP1, TMP0, TMP1 - | lbz TMP3, TAB:RB->marked - | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |1: - | lwz CARG1, NODE:TMP2->key - | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) - | lwz CARG2, NODE:TMP2->val - | lwz NODE:TMP1, NODE:TMP2->next - | checkstr CARG1; bne >5 - | cmpw TMP0, STR:RC; bne >5 - | checknil CARG2; beq >4 // Key found, but nil value? - |2: - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - | stfd f14, NODE:TMP2->val - | bne >7 - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | lwz TAB:TMP1, TAB:RB->metatable - | cmplwi TAB:TMP1, 0 - | beq <2 // No metatable: done. - | lbz TMP0, TAB:TMP1->nomm - | andix. TMP0, TMP0, 1<vmeta_tsets - | - |5: // Follow hash chain. - | cmplwi NODE:TMP1, 0 - | mr NODE:TMP2, NODE:TMP1 - | bne <1 - | // End of hash chain: key not found, add a new one. - | - | // But check for __newindex first. - | lwz TAB:TMP1, TAB:RB->metatable - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | stw PC, SAVE_PC - | mr CARG1, L - | cmplwi TAB:TMP1, 0 - | stp BASE, L->base - | beq >6 // No metatable: continue. - | lbz TMP0, TAB:TMP1->nomm - | andix. TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |6: - | li TMP0, LJ_TSTR - | stw STR:RC, 4(CARG3) - | mr CARG2, TAB:RB - | stw TMP0, 0(CARG3) - | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Returns TValue *. - | lp BASE, L->base - | stfd f14, 0(CRET1) - | b <3 // No 2nd write barrier needed. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0 - | b <3 - break; - case BC_TSETB: - | // RA = src*8, RB = table*8, RC = index*8 - | lwzux CARG1, RB, BASE - | srwi TMP0, RC, 3 - | lwz TAB:RB, 4(RB) - | checktab CARG1; bne ->vmeta_tsetb - | lwz TMP1, TAB:RB->asize - | lwz TMP2, TAB:RB->array - | lbz TMP3, TAB:RB->marked - | cmplw TMP0, TMP1 - | lfdx f14, BASE, RA - | bge ->vmeta_tsetb - | lwzx TMP1, TMP2, RC - | checknil TMP1; beq >5 - |1: - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - | stfdx f14, TMP2, RC - | bne >7 - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | lwz TAB:TMP1, TAB:RB->metatable - | cmplwi TAB:TMP1, 0 - | beq <1 // No metatable: done. - | lbz TMP1, TAB:TMP1->nomm - | andix. TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0! - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0 - | b <2 - break; - case BC_TSETR: - | // RA = dst*8, RB = table*8, RC = key*8 - | add RB, BASE, RB - | lwz TAB:CARG2, 4(RB) - |.if DUALNUM - | add RC, BASE, RC - | lbz TMP3, TAB:CARG2->marked - | lwz TMP0, TAB:CARG2->asize - | lwz CARG3, 4(RC) - | lwz TMP1, TAB:CARG2->array - |.else - | lfdx f0, BASE, RC - | lbz TMP3, TAB:CARG2->marked - | lwz TMP0, TAB:CARG2->asize - | toint CARG3, f0 - | lwz TMP1, TAB:CARG2->array - |.endif - | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) - | bne >7 - |2: - | cmplw TMP0, CARG3 - | slwi TMP2, CARG3, 3 - | lfdx f14, BASE, RA - | ble ->vmeta_tsetr // In array part? - | ins_next1 - | stfdx f14, TMP1, TMP2 - | ins_next2 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP2 - | b <2 - break; - - - case BC_TSETM: - | // RA = base*8 (table at base-1), RD = num_const*8 (start index) - | add RA, BASE, RA - |1: - | add TMP3, KBASE, RD - | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table. - | addic. TMP0, MULTRES, -8 - | lwz TMP3, 4(TMP3) // Integer constant is in lo-word. - | srwi CARG3, TMP0, 3 - | beq >4 // Nothing to copy? - | add CARG3, CARG3, TMP3 - | lwz TMP2, TAB:CARG2->asize - | slwi TMP1, TMP3, 3 - | lbz TMP3, TAB:CARG2->marked - | cmplw CARG3, TMP2 - | add TMP2, RA, TMP0 - | lwz TMP0, TAB:CARG2->array - | bgt >5 - | add TMP1, TMP1, TMP0 - | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - |3: // Copy result slots to table. - | lfd f0, 0(RA) - | addi RA, RA, 8 - | cmpw cr1, RA, TMP2 - | stfd f0, 0(TMP1) - | addi TMP1, TMP1, 8 - | blt cr1, <3 - | bne >7 - |4: - | ins_next - | - |5: // Need to resize array part. - | stp BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | mr SAVE0, RD - | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - | // Must not reallocate the stack. - | mr RD, SAVE0 - | b <1 - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0 - | b <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 - | add NARGS8:RC, NARGS8:RC, MULTRES - | // Fall through. Assumes BC_CALL follows. - break; - case BC_CALL: - | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 - | mr TMP2, BASE - | lwzux TMP0, BASE, RA - | lwz LFUNC:RB, 4(BASE) - | subi NARGS8:RC, NARGS8:RC, 8 - | addi BASE, BASE, 8 - | checkfunc TMP0; bne ->vmeta_call - | ins_call - break; - - case BC_CALLMT: - | // RA = base*8, (RB = 0,) RC = extra_nargs*8 - | add NARGS8:RC, NARGS8:RC, MULTRES - | // Fall through. Assumes BC_CALLT follows. - break; - case BC_CALLT: - | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - | lwzux TMP0, RA, BASE - | lwz LFUNC:RB, 4(RA) - | subi NARGS8:RC, NARGS8:RC, 8 - | lwz TMP1, FRAME_PC(BASE) - | checkfunc TMP0 - | addi RA, RA, 8 - | bne ->vmeta_callt - |->BC_CALLT_Z: - | andix. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand. - | lbz TMP3, LFUNC:RB->ffid - | xori TMP2, TMP1, FRAME_VARG - | cmplwi cr1, NARGS8:RC, 0 - | bne >7 - |1: - | stw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. - | li TMP2, 0 - | cmplwi cr7, TMP3, 1 // (> FF_C) Calling a fast function? - | beq cr1, >3 - |2: - | addi TMP3, TMP2, 8 - | lfdx f0, RA, TMP2 - | cmplw cr1, TMP3, NARGS8:RC - | stfdx f0, BASE, TMP2 - | mr TMP2, TMP3 - | bne cr1, <2 - |3: - | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt - | beq >5 - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | lwz INS, -4(TMP1) - | decode_RA8 RA, INS - | sub TMP1, BASE, RA - | lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1) - | lwz TMP1, LFUNC:TMP1->pc - | lwz KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. - | b <4 - | - |7: // Tailcall from a vararg function. - | andix. TMP0, TMP2, FRAME_TYPEP - | bne <1 // Vararg frame below? - | sub BASE, BASE, TMP2 // Relocate BASE down. - | lwz TMP1, FRAME_PC(BASE) - | andix. TMP0, TMP1, FRAME_TYPE - | b <1 - break; - - case BC_ITERC: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) - | mr TMP2, BASE - | add BASE, BASE, RA - | lwz TMP1, -24(BASE) - | lwz LFUNC:RB, -20(BASE) - | lfd f1, -8(BASE) - | lfd f0, -16(BASE) - | stw TMP1, 0(BASE) // Copy callable. - | stw LFUNC:RB, 4(BASE) - | checkfunc TMP1 - | stfd f1, 16(BASE) // Copy control var. - | li NARGS8:RC, 16 // Iterators get 2 arguments. - | stfdu f0, 8(BASE) // Copy state. - | bne ->vmeta_call - | ins_call - break; - - case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | add RA, BASE, RA - | lwz TAB:RB, -12(RA) - | lwz RC, -4(RA) // Get index from control var. - | lwz TMP0, TAB:RB->asize - | lwz TMP1, TAB:RB->array - | addi PC, PC, 4 - |1: // Traverse array part. - | cmplw RC, TMP0 - | slwi TMP3, RC, 3 - | bge >5 // Index points after array part? - | lwzx TMP2, TMP1, TMP3 - | lfdx f0, TMP1, TMP3 - | checknil TMP2 - | lwz INS, -4(PC) - | beq >4 - |.if DUALNUM - | stw RC, 4(RA) - | stw TISNUM, 0(RA) - |.else - | tonum_u f1, RC - |.endif - | addi RC, RC, 1 - | addis TMP3, PC, -(BCBIAS_J*4 >> 16) - | stfd f0, 8(RA) - | decode_RD4 TMP1, INS - | stw RC, -4(RA) // Update control var. - | add PC, TMP1, TMP3 - |.if not DUALNUM - | stfd f1, 0(RA) - |.endif - |3: - | ins_next - | - |4: // Skip holes in array part. - | addi RC, RC, 1 - | b <1 - | - |5: // Traverse hash part. - | lwz TMP1, TAB:RB->hmask - | sub RC, RC, TMP0 - | lwz TMP2, TAB:RB->node - |6: - | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1. - | slwi TMP3, RC, 5 - | bgty <3 - | slwi RB, RC, 3 - | sub TMP3, TMP3, RB - | lwzx RB, TMP2, TMP3 - | lfdx f0, TMP2, TMP3 - | add NODE:TMP3, TMP2, TMP3 - | checknil RB - | lwz INS, -4(PC) - | beq >7 - | lfd f1, NODE:TMP3->key - | addis TMP2, PC, -(BCBIAS_J*4 >> 16) - | stfd f0, 8(RA) - | add RC, RC, TMP0 - | decode_RD4 TMP1, INS - | stfd f1, 0(RA) - | addi RC, RC, 1 - | add PC, TMP1, TMP2 - | stw RC, -4(RA) // Update control var. - | b <3 - | - |7: // Skip holes in hash part. - | addi RC, RC, 1 - | b <6 - break; - - case BC_ISNEXT: - | // RA = base*8, RD = target (points to ITERN) - | add RA, BASE, RA - | lwz TMP0, -24(RA) - | lwz CFUNC:TMP1, -20(RA) - | lwz TMP2, -16(RA) - | lwz TMP3, -8(RA) - | cmpwi cr0, TMP2, LJ_TTAB - | cmpwi cr1, TMP0, LJ_TFUNC - | cmpwi cr6, TMP3, LJ_TNIL - | bne cr1, >5 - | lbz TMP1, CFUNC:TMP1->ffid - | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq - | cmpwi cr7, TMP1, FF_next_N - | srwi TMP0, RD, 1 - | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq - | add TMP3, PC, TMP0 - | bne cr0, >5 - | lus TMP1, 0xfffe - | ori TMP1, TMP1, 0x7fff - | stw ZERO, -4(RA) // Initialize control var. - | stw TMP1, -8(RA) - | addis PC, TMP3, -(BCBIAS_J*4 >> 16) - |1: - | ins_next - |5: // Despecialize bytecode if any of the checks fail. - | li TMP0, BC_JMP - | li TMP1, BC_ITERC - | stb TMP0, -1(PC) - | addis PC, TMP3, -(BCBIAS_J*4 >> 16) - | stb TMP1, 3(PC) - | b <1 - break; - - case BC_VARG: - | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 - | lwz TMP0, FRAME_PC(BASE) - | add RC, BASE, RC - | add RA, BASE, RA - | addi RC, RC, FRAME_VARG - | add TMP2, RA, RB - | subi TMP3, BASE, 8 // TMP3 = vtop - | sub RC, RC, TMP0 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | cmplwi cr1, RB, 0 - |.if PPE - | sub TMP1, TMP3, RC - | cmpwi TMP1, 0 - |.else - | sub. TMP1, TMP3, RC - |.endif - | beq cr1, >5 // Copy all varargs? - | subi TMP2, TMP2, 16 - | ble >2 // No vararg slots? - |1: // Copy vararg slots to destination slots. - | lfd f0, 0(RC) - | addi RC, RC, 8 - | stfd f0, 0(RA) - | cmplw RA, TMP2 - | cmplw cr1, RC, TMP3 - | bge >3 // All destination slots filled? - | addi RA, RA, 8 - | blt cr1, <1 // More vararg slots? - |2: // Fill up remainder with nil. - | stw TISNIL, 0(RA) - | cmplw RA, TMP2 - | addi RA, RA, 8 - | blt <2 - |3: - | ins_next - | - |5: // Copy all varargs. - | lwz TMP0, L->maxstack - | li MULTRES, 8 // MULTRES = (0+1)*8 - | bley <3 // No vararg slots? - | add TMP2, RA, TMP1 - | cmplw TMP2, TMP0 - | addi MULTRES, TMP1, 8 - | bgt >7 - |6: - | lfd f0, 0(RC) - | addi RC, RC, 8 - | stfd f0, 0(RA) - | cmplw RC, TMP3 - | addi RA, RA, 8 - | blt <6 // More vararg slots? - | b <3 - | - |7: // Grow stack for varargs. - | mr CARG1, L - | stp RA, L->top - | sub SAVE0, RC, BASE // Need delta, because BASE may change. - | stp BASE, L->base - | sub RA, RA, BASE - | stw PC, SAVE_PC - | srwi CARG2, TMP1, 3 - | bl extern lj_state_growstack // (lua_State *L, int n) - | lp BASE, L->base - | add RA, BASE, RA - | add RC, BASE, SAVE0 - | subi TMP3, BASE, 8 - | b <6 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results*8, RD = extra_nresults*8 - | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. - | // Fall through. Assumes BC_RET follows. - break; - - case BC_RET: - | // RA = results*8, RD = (nresults+1)*8 - | lwz PC, FRAME_PC(BASE) - | add RA, BASE, RA - | mr MULTRES, RD - |1: - | andix. TMP0, PC, FRAME_TYPE - | xori TMP1, PC, FRAME_VARG - | bne ->BC_RETV_Z - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return - | lwz INS, -4(PC) - | cmpwi RD, 8 - | subi TMP2, BASE, 8 - | subi RC, RD, 8 - | decode_RB8 RB, INS - | beq >3 - | li TMP1, 0 - |2: - | addi TMP3, TMP1, 8 - | lfdx f0, RA, TMP1 - | cmpw TMP3, RC - | stfdx f0, TMP2, TMP1 - | beq >3 - | addi TMP1, TMP3, 8 - | lfdx f1, RA, TMP3 - | cmpw TMP1, RC - | stfdx f1, TMP2, TMP3 - | bne <2 - |3: - |5: - | cmplw RB, RD - | decode_RA8 RA, INS - | bgt >6 - | sub BASE, TMP2, RA - | lwz LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | lwz TMP1, LFUNC:TMP1->pc - | lwz KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | subi TMP1, RD, 8 - | addi RD, RD, 8 - | stwx TISNIL, TMP2, TMP1 - | b <5 - | - |->BC_RETV_Z: // Non-standard return case. - | andix. TMP2, TMP1, FRAME_TYPEP - | bne ->vm_return - | // Return from vararg function: relocate BASE down. - | sub BASE, BASE, TMP1 - | lwz PC, FRAME_PC(BASE) - | b <1 - break; - - case BC_RET0: case BC_RET1: - | // RA = results*8, RD = (nresults+1)*8 - | lwz PC, FRAME_PC(BASE) - | add RA, BASE, RA - | mr MULTRES, RD - | andix. TMP0, PC, FRAME_TYPE - | xori TMP1, PC, FRAME_VARG - | bney ->BC_RETV_Z - | - | lwz INS, -4(PC) - | subi TMP2, BASE, 8 - | decode_RB8 RB, INS - if (op == BC_RET1) { - | lfd f0, 0(RA) - | stfd f0, 0(TMP2) - } - |5: - | cmplw RB, RD - | decode_RA8 RA, INS - | bgt >6 - | sub BASE, TMP2, RA - | lwz LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | lwz TMP1, LFUNC:TMP1->pc - | lwz KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | subi TMP1, RD, 8 - | addi RD, RD, 8 - | stwx TISNIL, TMP2, TMP1 - | b <5 - break; - - /* -- Loops and branches ------------------------------------------------ */ - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base*8, RD = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - |.if DUALNUM - | // Integer loop. - | lwzux TMP1, RA, BASE - | lwz CARG1, FORL_IDX*8+4(RA) - | cmplw cr0, TMP1, TISNUM - if (vk) { - | lwz CARG3, FORL_STEP*8+4(RA) - | bne >9 - |.if GPR64 - | // Need to check overflow for (a<<32) + (b<<32). - | rldicr TMP0, CARG1, 32, 31 - | rldicr TMP2, CARG3, 32, 31 - | add CARG1, CARG1, CARG3 - | addo. TMP0, TMP0, TMP2 - |.else - | addo. CARG1, CARG1, CARG3 - |.endif - | cmpwi cr6, CARG3, 0 - | lwz CARG2, FORL_STOP*8+4(RA) - | bso >6 - |4: - | stw CARG1, FORL_IDX*8+4(RA) - } else { - | lwz TMP3, FORL_STEP*8(RA) - | lwz CARG3, FORL_STEP*8+4(RA) - | lwz TMP2, FORL_STOP*8(RA) - | lwz CARG2, FORL_STOP*8+4(RA) - | cmplw cr7, TMP3, TISNUM - | cmplw cr1, TMP2, TISNUM - | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq - | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq - | cmpwi cr6, CARG3, 0 - | bne >9 - } - | blt cr6, >5 - | cmpw CARG1, CARG2 - |1: - | stw TISNUM, FORL_EXT*8(RA) - if (op != BC_JFORL) { - | srwi RD, RD, 1 - } - | stw CARG1, FORL_EXT*8+4(RA) - if (op != BC_JFORL) { - | add RD, PC, RD - } - if (op == BC_FORI) { - | bgt >3 // See FP loop below. - } else if (op == BC_JFORI) { - | addis PC, RD, -(BCBIAS_J*4 >> 16) - | bley >7 - } else if (op == BC_IFORL) { - | bgt >2 - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } else { - | bley =>BC_JLOOP - } - |2: - | ins_next - |5: // Invert check for negative step. - | cmpw CARG2, CARG1 - | b <1 - if (vk) { - |6: // Potential overflow. - | checkov TMP0, <4 // Ignore unrelated overflow. - | b <2 - } - |.endif - if (vk) { - |.if DUALNUM - |9: // FP loop. - | lfd f1, FORL_IDX*8(RA) - |.else - | lfdux f1, RA, BASE - |.endif - | lfd f3, FORL_STEP*8(RA) - | lfd f2, FORL_STOP*8(RA) - | lwz TMP3, FORL_STEP*8(RA) - | fadd f1, f1, f3 - | stfd f1, FORL_IDX*8(RA) - } else { - |.if DUALNUM - |9: // FP loop. - |.else - | lwzux TMP1, RA, BASE - | lwz TMP3, FORL_STEP*8(RA) - | lwz TMP2, FORL_STOP*8(RA) - | cmplw cr0, TMP1, TISNUM - | cmplw cr7, TMP3, TISNUM - | cmplw cr1, TMP2, TISNUM - |.endif - | lfd f1, FORL_IDX*8(RA) - | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | lfd f2, FORL_STOP*8(RA) - | bge ->vmeta_for - } - | cmpwi cr6, TMP3, 0 - if (op != BC_JFORL) { - | srwi RD, RD, 1 - } - | stfd f1, FORL_EXT*8(RA) - if (op != BC_JFORL) { - | add RD, PC, RD - } - | fcmpu cr0, f1, f2 - if (op == BC_JFORI) { - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } - | blt cr6, >5 - if (op == BC_FORI) { - | bgt >3 - } else if (op == BC_IFORL) { - |.if DUALNUM - | bgty <2 - |.else - | bgt >2 - |.endif - |1: - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } else if (op == BC_JFORI) { - | bley >7 - } else { - | bley =>BC_JLOOP - } - |.if DUALNUM - | b <2 - |.else - |2: - | ins_next - |.endif - |5: // Negative step. - if (op == BC_FORI) { - | bge <2 - |3: // Used by integer loop, too. - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } else if (op == BC_IFORL) { - | bgey <1 - } else if (op == BC_JFORI) { - | bgey >7 - } else { - | bgey =>BC_JLOOP - } - | b <2 - if (op == BC_JFORI) { - |7: - | lwz INS, -4(PC) - | decode_RD8 RD, INS - | b =>BC_JLOOP - } - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base*8, RD = target - | lwzux TMP1, RA, BASE - | lwz TMP2, 4(RA) - | checknil TMP1; beq >1 // Stop if iterator returned nil. - if (op == BC_JITERL) { - | stw TMP1, -8(RA) - | stw TMP2, -4(RA) - | b =>BC_JLOOP - } else { - | branch_RD // Otherwise save control var + branch. - | stw TMP1, -8(RA) - | stw TMP2, -4(RA) - } - |1: - | ins_next - break; - - case BC_LOOP: - | // RA = base*8, RD = target (loop extent) - | // Note: RA/RD is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base*8, RD = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | // RA = base*8 (ignored), RD = traceno*8 - | lwz TMP1, DISPATCH_J(trace)(DISPATCH) - | srwi RD, RD, 1 - | // Traces on PPC don't store the trace number, so use 0. - | stw ZERO, DISPATCH_GL(vmstate)(DISPATCH) - | lwzx TRACE:TMP2, TMP1, RD - | clrso TMP1 - | lp TMP2, TRACE:TMP2->mcode - | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) - | mtctr TMP2 - | addi JGL, DISPATCH, GG_DISP2G+32768 - | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH) - | bctr - |.endif - break; - - case BC_JMP: - | // RA = base*8 (only used by trace recorder), RD = target - | branch_RD - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | lwz TMP2, L->maxstack - | lbz TMP1, -4+PC2PROTO(numparams)(PC) - | lwz KBASE, -4+PC2PROTO(k)(PC) - | cmplw RA, TMP2 - | slwi TMP1, TMP1, 3 - | bgt ->vm_growstack_l - if (op != BC_JFUNCF) { - | ins_next1 - } - |2: - | cmplw NARGS8:RC, TMP1 // Check for missing parameters. - | blt >3 - if (op == BC_JFUNCF) { - | decode_RD8 RD, INS - | b =>BC_JLOOP - } else { - | ins_next2 - } - | - |3: // Clear missing parameters. - | stwx TISNIL, BASE, NARGS8:RC - | addi NARGS8:RC, NARGS8:RC, 8 - | b <2 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | lwz TMP2, L->maxstack - | add TMP1, BASE, RC - | add TMP0, RA, RC - | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC. - | addi TMP3, RC, 8+FRAME_VARG - | lwz KBASE, -4+PC2PROTO(k)(PC) - | cmplw TMP0, TMP2 - | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG. - | bge ->vm_growstack_l - | lbz TMP2, -4+PC2PROTO(numparams)(PC) - | mr RA, BASE - | mr RC, TMP1 - | ins_next1 - | cmpwi TMP2, 0 - | addi BASE, TMP1, 8 - | beq >3 - |1: - | cmplw RA, RC // Less args than parameters? - | lwz TMP0, 0(RA) - | lwz TMP3, 4(RA) - | bge >4 - | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC). - | addi RA, RA, 8 - |2: - | addic. TMP2, TMP2, -1 - | stw TMP0, 8(TMP1) - | stw TMP3, 12(TMP1) - | addi TMP1, TMP1, 8 - | bne <1 - |3: - | ins_next2 - | - |4: // Clear missing parameters. - | li TMP0, LJ_TNIL - | b <2 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | lp RD, CFUNC:RB->f - } else { - | lp RD, DISPATCH_GL(wrapf)(DISPATCH) - } - | add TMP1, RA, NARGS8:RC - | lwz TMP2, L->maxstack - | .toc lp TMP3, 0(RD) - | add RC, BASE, NARGS8:RC - | stp BASE, L->base - | cmplw TMP1, TMP2 - | stp RC, L->top - | li_vmstate C - |.if TOC - | mtctr TMP3 - |.else - | mtctr RD - |.endif - if (op == BC_FUNCCW) { - | lp CARG2, CFUNC:RB->f - } - | mr CARG1, L - | bgt ->vm_growstack_c // Need to grow stack. - | .toc lp TOCREG, TOC_OFS(RD) - | .tocenv lp ENVREG, ENV_OFS(RD) - | st_vmstate - | bctrl // (lua_State *L [, lua_CFunction f]) - | // Returns nresults. - | lp BASE, L->base - | .toc ld TOCREG, SAVE_TOC - | slwi RD, CRET1, 3 - | lp TMP1, L->top - | li_vmstate INTERP - | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. - | stw L, DISPATCH_GL(cur_L)(DISPATCH) - | sub RA, TMP1, RD // RA = L->top - nresults*8 - | st_vmstate - | b ->vm_returnc - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); - int i; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.long .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.long 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 65\n" - "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.long .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.long .Lframe0\n" - "\t.long .Lbegin\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", - fcofs, CFRAME_SIZE); - for (i = 14; i <= 31; i++) - fprintf(ctx->fp, - "\t.byte %d\n\t.uleb128 %d\n" - "\t.byte %d\n\t.uleb128 %d\n", - 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE0:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.long .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.long .Lframe0\n" -#if LJ_TARGET_PS3 - "\t.long .lj_vm_ffi_call\n" -#else - "\t.long lj_vm_ffi_call\n" -#endif - "\t.long %d\n" - "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x8e\n\t.uleb128 2\n" - "\t.byte 0xd\n\t.uleb128 0xe\n" - "\t.align 2\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif -#if !LJ_NO_UNWIND - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe1:\n" - "\t.long .LECIE1-.LSCIE1\n" - ".LSCIE1:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zPR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 65\n" - "\t.uleb128 6\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.long lj_err_unwind_dwarf-.\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE1:\n\n"); - fprintf(ctx->fp, - ".LSFDE2:\n" - "\t.long .LEFDE2-.LASFDE2\n" - ".LASFDE2:\n" - "\t.long .LASFDE2-.Lframe1\n" - "\t.long .Lbegin-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", - fcofs, CFRAME_SIZE); - for (i = 14; i <= 31; i++) - fprintf(ctx->fp, - "\t.byte %d\n\t.uleb128 %d\n" - "\t.byte %d\n\t.uleb128 %d\n", - 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE2:\n\n"); -#if LJ_HASFFI - fprintf(ctx->fp, - ".Lframe2:\n" - "\t.long .LECIE2-.LSCIE2\n" - ".LSCIE2:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 65\n" - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE2:\n\n"); - fprintf(ctx->fp, - ".LSFDE3:\n" - "\t.long .LEFDE3-.LASFDE3\n" - ".LASFDE3:\n" - "\t.long .LASFDE3-.Lframe2\n" - "\t.long lj_vm_ffi_call-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x8e\n\t.uleb128 2\n" - "\t.byte 0xd\n\t.uleb128 0xe\n" - "\t.align 2\n" - ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -#endif -#endif - break; - default: - break; - } -} - diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index a003fb4f6b..eeff1afcb0 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -12,28 +12,15 @@ | |//----------------------------------------------------------------------- | -|.if WIN -|.define X64WIN, 1 // Windows/x64 calling conventions. -|.endif -| |// Fixed register assignments for the interpreter. |// This is very fragile and has many dependencies. Caveat emptor. |.define BASE, rdx // Not C callee-save, refetched anyway. -|.if X64WIN -|.define KBASE, rdi // Must be C callee-save. -|.define PC, rsi // Must be C callee-save. -|.define DISPATCH, rbx // Must be C callee-save. -|.define KBASEd, edi -|.define PCd, esi -|.define DISPATCHd, ebx -|.else |.define KBASE, r15 // Must be C callee-save. |.define PC, rbx // Must be C callee-save. |.define DISPATCH, r14 // Must be C callee-save. |.define KBASEd, r15d |.define PCd, ebx |.define DISPATCHd, r14d -|.endif | |.define RA, rcx |.define RAd, ecx @@ -56,16 +43,6 @@ |.define ITYPE, r11 |.define ITYPEd, r11d | -|.if X64WIN -|.define CARG1, rcx // x64/WIN64 C call arguments. -|.define CARG2, rdx -|.define CARG3, r8 -|.define CARG4, r9 -|.define CARG1d, ecx -|.define CARG2d, edx -|.define CARG3d, r8d -|.define CARG4d, r9d -|.else |.define CARG1, rdi // x64/POSIX C call arguments. |.define CARG2, rsi |.define CARG3, rdx @@ -78,7 +55,6 @@ |.define CARG4d, ecx |.define CARG5d, r8d |.define CARG6d, r9d -|.endif | |// Type definitions. Some of these are only used for documentation. |.type L, lua_State @@ -98,54 +74,11 @@ | |// Stack layout while in interpreter. Must match with lj_frame.h. |//----------------------------------------------------------------------- -|.if X64WIN // x64/Windows stack layout -| -|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). -|.macro saveregs_ -| push rdi; push rsi; push rbx -| sub rsp, CFRAME_SPACE -|.endmacro -|.macro saveregs -| push rbp; saveregs_ -|.endmacro -|.macro restoreregs -| add rsp, CFRAME_SPACE -| pop rbx; pop rsi; pop rdi; pop rbp -|.endmacro -| -|.define SAVE_CFRAME, aword [rsp+aword*13] -|.define SAVE_PC, aword [rsp+aword*12] -|.define SAVE_L, aword [rsp+aword*11] -|.define SAVE_ERRF, dword [rsp+dword*21] -|.define SAVE_NRES, dword [rsp+dword*20] -|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*8] -|.define SAVE_R3, aword [rsp+aword*7] -|.define SAVE_R2, aword [rsp+aword*6] -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. -|.define ARG5, aword [rsp+aword*4] -|.define CSAVE_4, aword [rsp+aword*3] -|.define CSAVE_3, aword [rsp+aword*2] -|.define CSAVE_2, aword [rsp+aword*1] -|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. -|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee -| -|.define ARG5d, dword [rsp+dword*8] -|.define TMP1, ARG5 // TMP1 overlaps ARG5 -|.define TMP1d, ARG5d -|.define TMP1hi, dword [rsp+dword*9] -|.define MULTRES, TMP1d // MULTRES overlaps TMP1d. -| -|//----------------------------------------------------------------------- -|.else // x64/POSIX stack layout | |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). |.macro saveregs_ | push rbx; push r15; push r14 -|.if NO_UNWIND | push r13; push r12 -|.endif | sub rsp, CFRAME_SPACE |.endmacro |.macro saveregs @@ -153,14 +86,11 @@ |.endmacro |.macro restoreregs | add rsp, CFRAME_SPACE -|.if NO_UNWIND | pop r12; pop r13 -|.endif | pop r14; pop r15; pop rbx; pop rbp |.endmacro | |//----- 16 byte aligned, -|.if NO_UNWIND |.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. |.define SAVE_R4, aword [rsp+aword*10] |.define SAVE_R3, aword [rsp+aword*9] @@ -168,13 +98,6 @@ |.define SAVE_R1, aword [rsp+aword*7] |.define SAVE_RU2, aword [rsp+aword*6] |.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. -|.else -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*8] -|.define SAVE_R3, aword [rsp+aword*7] -|.define SAVE_R2, aword [rsp+aword*6] -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. -|.endif |.define SAVE_CFRAME, aword [rsp+aword*4] |.define SAVE_PC, aword [rsp+aword*3] |.define SAVE_L, aword [rsp+aword*2] @@ -187,8 +110,6 @@ |.define TMP1hi, dword [rsp+dword*1] |.define MULTRES, TMP1d // MULTRES overlaps TMP1d. | -|.endif -| |//----------------------------------------------------------------------- | |// Instruction headers. @@ -211,22 +132,9 @@ |.endmacro | |// Instruction footer. -|.if 1 | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. | .define ins_next, ins_NEXT | .define ins_next_, ins_NEXT -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| // Around 10%-30% slower on Core2, a lot more slower on P4. -| .macro ins_next -| jmp ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif | |// Call decode and dispatch. |.macro ins_callt @@ -496,12 +404,10 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->vm_leave_unw | |->vm_unwind_rethrow: - |.if not X64WIN | mov CARG1, SAVE_L | mov CARG2d, eax | restoreregs | jmp extern lj_err_throw // (lua_State *L, int errcode) - |.endif | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | // (void *cframe) @@ -687,10 +593,8 @@ static void build_subroutines(BuildCtx *ctx) | mov RC, RA // ... in [RC] | mov PC, [RB-24] // Restore PC from [cont|PC]. | mov RA, qword [RB-32] // May be negative on WIN64 with debug. - |.if FFI | cmp RA, 1 | jbe >1 - |.endif | mov LFUNC:KBASE, [BASE-16] | cleartp LFUNC:KBASE | mov KBASE, LFUNC:KBASE->pc @@ -698,7 +602,6 @@ static void build_subroutines(BuildCtx *ctx) | // BASE = base, RC = result, RB = meta base | jmp RA // Jump to continuation. | - |.if FFI |1: | je ->cont_ffi_callback // cont = 1: return from FFI callback. | // cont = 0: Tail call from C function. @@ -706,7 +609,6 @@ static void build_subroutines(BuildCtx *ctx) | shr RBd, 3 | lea RDd, [RBd-3] | jmp ->vm_call_tail - |.endif | |->cont_cat: // BASE = base, RC = result, RB = mbase | movzx RAd, PC_RB @@ -716,21 +618,12 @@ static void build_subroutines(BuildCtx *ctx) | je ->cont_ra | neg RA | shr RAd, 3 - |.if X64WIN - | mov CARG3d, RAd - | mov L:CARG1, SAVE_L - | mov L:CARG1->base, BASE - | mov RC, [RC] - | mov [RB], RC - | mov CARG2, RB - |.else | mov L:CARG1, SAVE_L | mov L:CARG1->base, BASE | mov CARG3d, RAd | mov RA, [RC] | mov [RB], RA | mov CARG2, RB - |.endif | jmp ->BC_CAT_Z | |//-- Table indexing metamethods ----------------------------------------- @@ -748,13 +641,8 @@ static void build_subroutines(BuildCtx *ctx) | |->vmeta_tgetb: | movzx RCd, PC_RC - |.if DUALNUM - | setint RC - | mov TMP1, RC - |.else | cvtsi2sd xmm0, RCd | movsd TMP1, xmm0 - |.endif | lea RC, TMP1 | jmp >1 | @@ -821,13 +709,8 @@ static void build_subroutines(BuildCtx *ctx) | |->vmeta_tsetb: | movzx RCd, PC_RC - |.if DUALNUM - | setint RC - | mov TMP1, RC - |.else | cvtsi2sd xmm0, RCd | movsd TMP1, xmm0 - |.endif | lea RC, TMP1 | jmp >1 | @@ -872,18 +755,11 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->vm_call_dispatch_f | |->vmeta_tsetr: - |.if X64WIN - | mov L:CARG1, SAVE_L - | mov CARG3d, RCd - | mov L:CARG1->base, BASE - | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE. - |.else | mov L:CARG1, SAVE_L | mov CARG2, TAB:RB | mov L:CARG1->base, BASE | mov RB, BASE // Save BASE. | mov CARG3d, RCd // Caveat: CARG3 == BASE. - |.endif | mov SAVE_PC, PC | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | // TValue * returned in eax (RC). @@ -898,13 +774,8 @@ static void build_subroutines(BuildCtx *ctx) | movzx RAd, PC_RA | mov L:RB, SAVE_L | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE. - |.if X64WIN - | lea CARG3, [BASE+RD*8] - | lea CARG2, [BASE+RA*8] - |.else | lea CARG2, [BASE+RA*8] | lea CARG3, [BASE+RD*8] - |.endif | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA. | movzx CARG4d, PC_OP | mov SAVE_PC, PC @@ -940,28 +811,18 @@ static void build_subroutines(BuildCtx *ctx) |->vmeta_equal: | cleartp TAB:RD | sub PC, 4 - |.if X64WIN - | mov CARG3, RD - | mov CARG4d, RBd - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2 == BASE. - | mov CARG2, RA - | mov CARG1, L:RB // Caveat: CARG1 == RA. - |.else | mov CARG2, RA | mov CARG4d, RBd // Caveat: CARG4 == RA. | mov L:RB, SAVE_L | mov L:RB->base, BASE // Caveat: CARG3 == BASE. | mov CARG3, RD | mov CARG1, L:RB - |.endif | mov SAVE_PC, PC | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) | // 0/1 or TValue * (metamethod) returned in eax (RC). | jmp <3 | |->vmeta_equal_cd: - |.if FFI | sub PC, 4 | mov L:RB, SAVE_L | mov L:RB->base, BASE @@ -971,7 +832,6 @@ static void build_subroutines(BuildCtx *ctx) | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins) | // 0/1 or TValue * (metamethod) returned in eax (RC). | jmp <3 - |.endif | |->vmeta_istype: | mov L:RB, SAVE_L @@ -987,19 +847,11 @@ static void build_subroutines(BuildCtx *ctx) |//-- Arithmetic metamethods --------------------------------------------- | |->vmeta_arith_vno: - |.if DUALNUM - | movzx RBd, PC_RB - | movzx RCd, PC_RC - |.endif |->vmeta_arith_vn: | lea RC, [KBASE+RC*8] | jmp >1 | |->vmeta_arith_nvo: - |.if DUALNUM - | movzx RBd, PC_RB - | movzx RCd, PC_RC - |.endif |->vmeta_arith_nv: | lea TMPR, [KBASE+RC*8] | lea RC, [BASE+RB*8] @@ -1012,26 +864,12 @@ static void build_subroutines(BuildCtx *ctx) | jmp >2 | |->vmeta_arith_vvo: - |.if DUALNUM - | movzx RBd, PC_RB - | movzx RCd, PC_RC - |.endif |->vmeta_arith_vv: | lea RC, [BASE+RC*8] |1: | lea RB, [BASE+RB*8] |2: | lea RA, [BASE+RA*8] - |.if X64WIN - | mov CARG3, RB - | mov CARG4, RC - | movzx RCd, PC_OP - | mov ARG5d, RCd - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2 == BASE. - | mov CARG2, RA - | mov CARG1, L:RB // Caveat: CARG1 == RA. - |.else | movzx CARG5d, PC_OP | mov CARG2, RA | mov CARG4, RC // Caveat: CARG4 == RA. @@ -1039,7 +877,6 @@ static void build_subroutines(BuildCtx *ctx) | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE. | mov CARG3, RB | mov L:RB, L:CARG1 - |.endif | mov SAVE_PC, PC | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) | // NULL (finished) or TValue * (metamethod) returned in eax (RC). @@ -1086,18 +923,10 @@ static void build_subroutines(BuildCtx *ctx) | // BASE = old base, RA = new base, RC = nargs+1, PC = return | mov TMP1d, NARGS:RDd // Save RA, RC for us. | mov RB, RA - |.if X64WIN - | mov L:TMPR, SAVE_L - | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE. - | lea CARG2, [RA-16] - | lea CARG3, [RA+NARGS:RD*8-8] - | mov CARG1, L:TMPR // Caveat: CARG1 is RA. - |.else | mov L:CARG1, SAVE_L | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE. | lea CARG2, [RA-16] | lea CARG3, [RA+NARGS:RD*8-8] - |.endif | mov SAVE_PC, PC | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | mov RA, RB @@ -1276,20 +1105,11 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->fff_res1 | |.ffunc_2 rawget - |.if X64WIN - | mov TAB:RA, [BASE] - | checktab TAB:RA, ->fff_fallback - | mov RB, BASE // Save BASE. - | lea CARG3, [BASE+8] - | mov CARG2, TAB:RA // Caveat: CARG2 == BASE. - | mov CARG1, SAVE_L - |.else | mov TAB:CARG2, [BASE] | checktab TAB:CARG2, ->fff_fallback | mov RB, BASE // Save BASE. | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. | mov CARG1, SAVE_L - |.endif | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | // cTValue * returned in eax (RD). | mov BASE, RB // Restore BASE. @@ -1327,15 +1147,9 @@ static void build_subroutines(BuildCtx *ctx) | mov L:RB, SAVE_L | mov L:RB->base, BASE // Add frame since C call can throw. | mov SAVE_PC, PC // Redundant (but a defined value). - |.if not X64WIN | mov CARG2, BASE // Otherwise: CARG2 == BASE - |.endif | mov L:CARG1, L:RB - |.if DUALNUM - | call extern lj_strfmt_number // (lua_State *L, cTValue *o) - |.else | call extern lj_strfmt_num // (lua_State *L, lua_Number *np) - |.endif | // GCstr returned in eax (RD). | mov BASE, L:RB->base | settp STR:RB, RD, LJ_TSTR @@ -1346,25 +1160,14 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc_1 next | je >2 // Missing 2nd arg? |1: - |.if X64WIN - | mov RA, [BASE] - | checktab RA, ->fff_fallback - |.else | mov CARG2, [BASE] | checktab CARG2, ->fff_fallback - |.endif | mov L:RB, SAVE_L | mov L:RB->base, BASE // Add frame since C call can throw. | mov L:RB->top, BASE // Dummy frame length is ok. | mov PC, [BASE-8] - |.if X64WIN - | lea CARG3, [BASE+8] - | mov CARG2, RA // Caveat: CARG2 == BASE. - | mov CARG1, L:RB - |.else | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. | mov CARG1, L:RB - |.endif | mov SAVE_PC, PC // Needed for ITERN fallback. | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) | // Flag returned in eax (RD). @@ -1406,24 +1209,13 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc_2 ipairs_aux | mov TAB:RB, [BASE] | checktab TAB:RB, ->fff_fallback - |.if DUALNUM - | mov RA, [BASE+8] - | checkint RA, ->fff_fallback - |.else | checknumtp [BASE+8], ->fff_fallback | movsd xmm0, qword [BASE+8] - |.endif | mov PC, [BASE-8] - |.if DUALNUM - | add RAd, 1 - | setint ITYPE, RA - | mov [BASE-16], ITYPE - |.else | sseconst_1 xmm1, TMPR | addsd xmm0, xmm1 | cvttsd2si RAd, xmm0 | movsd qword [BASE-16], xmm0 - |.endif | cmp RAd, TAB:RB->asize; jae >2 // Not in array part? | mov RD, TAB:RB->array | lea RD, [RD+RA*8] @@ -1435,16 +1227,9 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->fff_res2 |2: // Check for empty hash part first. Otherwise call C function. | cmp dword TAB:RB->hmask, 0; je ->fff_res0 - |.if X64WIN - | mov TMPR, BASE - | mov CARG2d, RAd - | mov CARG1, TAB:RB - | mov RB, TMPR - |.else | mov CARG1, TAB:RB | mov RB, BASE // Save BASE. | mov CARG2d, RAd // Caveat: CARG2 == BASE - |.endif | call extern lj_tab_getinth // (GCtab *t, int32_t key) | // cTValue * or NULL returned in eax (RD). | mov BASE, RB @@ -1468,12 +1253,7 @@ static void build_subroutines(BuildCtx *ctx) | mov PC, [BASE-8] | mov [BASE-16], CFUNC:RD | mov [BASE-8], TMPR - |.if DUALNUM - | mov64 RD, ((uint64_t)LJ_TISNUM<<47) - | mov [BASE], RD - |.else | mov qword [BASE], 0 - |.endif | mov RDd, 1+3 | jmp ->fff_res | @@ -1670,25 +1450,7 @@ static void build_subroutines(BuildCtx *ctx) | | .ffunc_1 math_abs | mov RB, [BASE] - |.if DUALNUM - | checkint RB, >3 - | cmp RBd, 0; jns ->fff_resi - | neg RBd; js >2 - |->fff_resbit: - |->fff_resi: - | setint RB - |->fff_resRB: - | mov PC, [BASE-8] - | mov [BASE-16], RB - | jmp ->fff_res1 - |2: - | mov64 RB, U64x(41e00000,00000000) // 2^31. - | jmp ->fff_resRB - |3: - | ja ->fff_fallback - |.else | checknum RB, ->fff_fallback - |.endif | shl RB, 1 | shr RB, 1 | mov PC, [BASE-8] @@ -1728,24 +1490,9 @@ static void build_subroutines(BuildCtx *ctx) | |.macro math_round, func | .ffunc math_ .. func - |.if DUALNUM - | mov RB, [BASE] - | checknumx RB, ->fff_resRB, je - | ja ->fff_fallback - |.else | checknumtp [BASE], ->fff_fallback - |.endif | movsd xmm0, qword [BASE] | call ->vm_ .. func .. _sse - |.if DUALNUM - | cvttsd2si RBd, xmm0 - | cmp RBd, 0x80000000 - | jne ->fff_resi - | cvtsi2sd xmm1, RBd - | ucomisd xmm0, xmm1 - | jp ->fff_resxmm0 - | je ->fff_resi - |.endif | jmp ->fff_resxmm0 |.endmacro | @@ -1805,33 +1552,20 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc_n math_frexp | mov RB, BASE - |.if X64WIN - | lea CARG2, TMP1 // Caveat: CARG2 == BASE - |.else | lea CARG1, TMP1 - |.endif | call extern frexp | mov BASE, RB | mov RBd, TMP1d | mov PC, [BASE-8] | movsd qword [BASE-16], xmm0 - |.if DUALNUM - | setint RB - | mov [BASE-8], RB - |.else | cvtsi2sd xmm1, RBd | movsd qword [BASE-8], xmm1 - |.endif | mov RDd, 1+2 | jmp ->fff_res | |.ffunc_n math_modf | mov RB, BASE - |.if X64WIN - | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE - |.else | lea CARG1, [BASE-16] - |.endif | call extern modf | mov BASE, RB | mov PC, [BASE-8] @@ -1842,40 +1576,12 @@ static void build_subroutines(BuildCtx *ctx) |.macro math_minmax, name, cmovop, sseop | .ffunc name | mov RAd, 2 - |.if DUALNUM - | mov RB, [BASE] - | checkint RB, >4 - |1: // Handle integers. - | cmp RAd, RDd; jae ->fff_resRB - | mov TMPR, [BASE+RA*8-8] - | checkint TMPR, >3 - | cmp RBd, TMPRd - | cmovop RB, TMPR - | add RAd, 1 - | jmp <1 - |3: - | ja ->fff_fallback - | // Convert intermediate result to number and continue below. - | cvtsi2sd xmm0, RBd - | jmp >6 - |4: - | ja ->fff_fallback - |.else | checknumtp [BASE], ->fff_fallback - |.endif | | movsd xmm0, qword [BASE] |5: // Handle numbers or integers. | cmp RAd, RDd; jae ->fff_resxmm0 - |.if DUALNUM - | mov RB, [BASE+RA*8-8] - | checknumx RB, >6, jb - | ja ->fff_fallback - | cvtsi2sd xmm1, RBd - | jmp >7 - |.else | checknumtp [BASE+RA*8-8], ->fff_fallback - |.endif |6: | movsd xmm1, qword [BASE+RA*8-8] |7: @@ -1897,22 +1603,13 @@ static void build_subroutines(BuildCtx *ctx) | cmp dword STR:RB->len, 1 | jb ->fff_res0 // Return no results for empty string. | movzx RBd, byte STR:RB[1] - |.if DUALNUM - | jmp ->fff_resi - |.else | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0 - |.endif | |.ffunc string_char // Only handle the 1-arg case here. | ffgccheck | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg. - |.if DUALNUM - | mov RB, [BASE] - | checkint RB, ->fff_fallback - |.else | checknumtp [BASE], ->fff_fallback | cvttsd2si RBd, qword [BASE] - |.endif | cmp RBd, 255; ja ->fff_fallback | mov TMP1d, RBd | mov TMPRd, 1 @@ -1938,26 +1635,13 @@ static void build_subroutines(BuildCtx *ctx) | mov TMPRd, -1 | cmp NARGS:RDd, 1+2; jb ->fff_fallback | jna >1 - |.if DUALNUM - | mov TMPR, [BASE+16] - | checkint TMPR, ->fff_fallback - |.else | checknumtp [BASE+16], ->fff_fallback | cvttsd2si TMPRd, qword [BASE+16] - |.endif |1: | mov STR:RB, [BASE] | checkstr STR:RB, ->fff_fallback - |.if DUALNUM - | mov ITYPE, [BASE+8] - | mov RAd, ITYPEd // Must clear hiword for lea below. - | sar ITYPE, 47 - | cmp ITYPEd, LJ_TISNUM - | jne ->fff_fallback - |.else | checknumtp [BASE+8], ->fff_fallback | cvttsd2si RAd, qword [BASE+8] - |.endif | mov RCd, STR:RB->len | cmp RCd, TMPRd // len < end? (unsigned compare) | jb >5 @@ -1996,19 +1680,11 @@ static void build_subroutines(BuildCtx *ctx) |.macro ffstring_op, name | .ffunc_1 string_ .. name | ffgccheck - |.if X64WIN - | mov STR:TMPR, [BASE] - | checkstr STR:TMPR, ->fff_fallback - |.else | mov STR:CARG2, [BASE] | checkstr STR:CARG2, ->fff_fallback - |.endif | mov L:RB, SAVE_L | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] | mov L:RB->base, BASE - |.if X64WIN - | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE - |.endif | mov RC, SBUF:CARG1->b | mov SBUF:CARG1->L, L:RB | mov SBUF:CARG1->p, RC @@ -2030,21 +1706,8 @@ static void build_subroutines(BuildCtx *ctx) |.if kind == 2 | sseconst_tobit xmm1, RB |.endif - |.if DUALNUM - | mov RB, [BASE] - | checkint RB, >1 - |.if kind > 0 - | jmp >2 - |.else - | jmp ->fff_resbit - |.endif - |1: - | ja ->fff_fallback - | movd xmm0, RB - |.else | checknumtp [BASE], ->fff_fallback | movsd xmm0, qword [BASE] - |.endif |.if kind < 2 | sseconst_tobit xmm1, RB |.endif @@ -2067,19 +1730,8 @@ static void build_subroutines(BuildCtx *ctx) |1: | cmp RD, BASE | jbe ->fff_resbit - |.if DUALNUM - | mov RA, [RD] - | checkint RA, >2 - | ins RBd, RAd - | sub RD, 8 - | jmp <1 - |2: - | ja ->fff_fallback_bit_op - | movd xmm0, RA - |.else | checknumtp [RD], ->fff_fallback_bit_op | movsd xmm0, qword [RD] - |.endif | addsd xmm0, xmm1 | movd RAd, xmm0 | ins RBd, RAd @@ -2097,32 +1749,21 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc_bit bit_bnot, 1 | not RBd - |.if DUALNUM - | jmp ->fff_resbit - |.else |->fff_resbit: | cvtsi2sd xmm0, RBd | jmp ->fff_resxmm0 - |.endif | |->fff_fallback_bit_op: | mov NARGS:RDd, TMPRd // Restore for fallback | jmp ->fff_fallback | |.macro .ffunc_bit_sh, name, ins - |.if DUALNUM - | .ffunc_bit name, 1, .ffunc_2 - | // Note: no inline conversion from number for 2nd argument! - | mov RA, [BASE+8] - | checkint RA, ->fff_fallback - |.else | .ffunc_nn name | sseconst_tobit xmm2, RB | addsd xmm0, xmm2 | addsd xmm1, xmm2 | movd RBd, xmm0 | movd RAd, xmm1 - |.endif | ins RBd, cl // Assumes RA is ecx. | jmp ->fff_resbit |.endmacro @@ -2217,10 +1858,6 @@ static void build_subroutines(BuildCtx *ctx) |//----------------------------------------------------------------------- | |->vm_record: // Dispatch target for recording phase. - |.if JIT - | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] - | test RDL, HOOK_VMEVENT // No recording while in vmevent. - | jnz >5 | // Decrement the hookcount for consistency, but always do the call. | test RDL, HOOK_ACTIVE | jnz >1 @@ -2228,7 +1865,6 @@ static void build_subroutines(BuildCtx *ctx) | jz >1 | dec dword [DISPATCH+DISPATCH_GL(hookcount)] | jmp >1 - |.endif | |->vm_rethook: // Dispatch target for return hooks. | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] @@ -2270,7 +1906,6 @@ static void build_subroutines(BuildCtx *ctx) | jmp <4 | |->vm_hotloop: // Hot loop counter underflow. - |.if JIT | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L). | cleartp LFUNC:RB | mov RB, LFUNC:RB->pc @@ -2285,20 +1920,15 @@ static void build_subroutines(BuildCtx *ctx) | mov SAVE_PC, PC | call extern lj_trace_hot // (jit_State *J, const BCIns *pc) | jmp <3 - |.endif | |->vm_callhook: // Dispatch target for call hooks. | mov SAVE_PC, PC - |.if JIT | jmp >1 - |.endif | |->vm_hotcall: // Hot call counter underflow. - |.if JIT | mov SAVE_PC, PC | or PC, 1 // Marker for hot call. |1: - |.endif | lea RD, [BASE+NARGS:RD*8-8] | mov L:RB, SAVE_L | mov L:RB->base, BASE @@ -2308,9 +1938,7 @@ static void build_subroutines(BuildCtx *ctx) | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc) | // ASMFunction returned in eax/rax (RD). | mov SAVE_PC, 0 // Invalidate for subsequent line hook. - |.if JIT | and PC, -2 - |.endif | mov BASE, L:RB->base | mov RA, RD | mov RD, L:RB->top @@ -2322,7 +1950,6 @@ static void build_subroutines(BuildCtx *ctx) | jmp RB | |->cont_stitch: // Trace stitching. - |.if JIT | // BASE = base, RC = result, RB = mbase | mov TRACE:ITYPE, [RB-40] // Save previous trace. | cleartp TRACE:ITYPE @@ -2371,20 +1998,6 @@ static void build_subroutines(BuildCtx *ctx) | mov aword [RA], LJ_TNIL | add RA, 8 | jmp <3 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov CARG2, PC // Caveat: CARG2 == BASE - | mov CARG1, L:RB - | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) - | mov BASE, L:RB->base - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | sub PC, 4 - | jmp ->cont_nop -#endif | |//----------------------------------------------------------------------- |//-- Trace exit handler ------------------------------------------------- @@ -2393,7 +2006,6 @@ static void build_subroutines(BuildCtx *ctx) |// Called from an exit stub with the exit number on the stack. |// The 16 bit exit number is stored with two (sign-extended) push imm8. |->vm_exit_handler: - |.if JIT | push r13; push r12 | push r11; push r10; push r9; push r8 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp @@ -2406,11 +2018,8 @@ static void build_subroutines(BuildCtx *ctx) | set_vmstate EXIT | mov [DISPATCH+DISPATCH_J(exitno)], RCd | mov [DISPATCH+DISPATCH_J(parent)], RAd - |.if X64WIN - | sub rsp, 16*8+4*8 // Room for SSE regs + save area. - |.else + | mov dword [DISPATCH+DISPATCH_GL(lasttrace)], RAd | sub rsp, 16*8 // Room for SSE regs. - |.endif | add rbp, -128 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 @@ -2425,11 +2034,7 @@ static void build_subroutines(BuildCtx *ctx) | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] | mov aword [DISPATCH+DISPATCH_J(L)], L:RB | mov L:RB->base, BASE - |.if X64WIN - | lea CARG2, [rsp+4*8] - |.else | mov CARG2, rsp - |.endif | lea CARG1, [DISPATCH+GG_DISP2J] | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 | call extern lj_trace_exit // (jit_State *J, ExitState *ex) @@ -2440,36 +2045,18 @@ static void build_subroutines(BuildCtx *ctx) | mov BASE, L:RB->base | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC. | jmp >1 - |.endif |->vm_exit_interp: + | // Record which trace exited to the interpreter. + | mov TMPRd, dword [DISPATCH+DISPATCH_GL(vmstate)] + | mov dword [DISPATCH+DISPATCH_GL(lasttrace)], TMPRd + |->vm_exit_interp_notrack: | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. - |.if JIT | // Restore additional callee-save registers only used in compiled code. - |.if X64WIN - | lea RA, [rsp+10*16+4*8] - |1: - | movdqa xmm15, [RA-10*16] - | movdqa xmm14, [RA-9*16] - | movdqa xmm13, [RA-8*16] - | movdqa xmm12, [RA-7*16] - | movdqa xmm11, [RA-6*16] - | movdqa xmm10, [RA-5*16] - | movdqa xmm9, [RA-4*16] - | movdqa xmm8, [RA-3*16] - | movdqa xmm7, [RA-2*16] - | mov rsp, RA // Reposition stack to C frame. - | movdqa xmm6, [RA-1*16] - | mov r15, CSAVE_1 - | mov r14, CSAVE_2 - | mov r13, CSAVE_3 - | mov r12, CSAVE_4 - |.else | lea RA, [rsp+16] |1: | mov r13, [RA-8] | mov r12, [RA] | mov rsp, RA // Reposition stack to C frame. - |.endif | test RDd, RDd; js >9 // Check for error from exit. | mov L:RB, SAVE_L | mov MULTRES, RDd @@ -2479,6 +2066,7 @@ static void build_subroutines(BuildCtx *ctx) | mov KBASE, [KBASE+PC2PROTO(k)] | mov L:RB->base, BASE | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 + | mov TMPRd, dword [DISPATCH+DISPATCH_GL(vmstate)] | set_vmstate INTERP | // Modified copy of ins_next which handles function header dispatch, too. | mov RCd, [PC] @@ -2513,7 +2101,6 @@ static void build_subroutines(BuildCtx *ctx) | mov CARG1, L:RB | mov CARG2, RD | call extern lj_err_throw // (lua_State *L, int errcode) - |.endif | |//----------------------------------------------------------------------- |//-- Math helper functions ---------------------------------------------- @@ -2635,7 +2222,6 @@ static void build_subroutines(BuildCtx *ctx) |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) |->vm_cpuid: | mov eax, CARG1d - | .if X64WIN; push rsi; mov rsi, CARG2; .endif | push rbx | xor ecx, ecx | cpuid @@ -2644,7 +2230,6 @@ static void build_subroutines(BuildCtx *ctx) | mov [rsi+8], ecx | mov [rsi+12], edx | pop rbx - | .if X64WIN; pop rsi; .endif | ret | |//----------------------------------------------------------------------- @@ -2663,7 +2248,6 @@ static void build_subroutines(BuildCtx *ctx) | |// Handler for callback functions. Callback slot number in ah/al. |->vm_ffi_callback: - |.if FFI |.type CTSTATE, CTState, PC | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. | lea DISPATCH, [ebp+GG_G2DISP] @@ -2678,9 +2262,6 @@ static void build_subroutines(BuildCtx *ctx) | movsd qword CTSTATE->cb.fpr[1], xmm1 | movsd qword CTSTATE->cb.fpr[2], xmm2 | movsd qword CTSTATE->cb.fpr[3], xmm3 - |.if X64WIN - | lea rax, [rsp+CFRAME_SIZE+4*8] - |.else | lea rax, [rsp+CFRAME_SIZE] | mov CTSTATE->cb.gpr[4], CARG5 | mov CTSTATE->cb.gpr[5], CARG6 @@ -2688,7 +2269,6 @@ static void build_subroutines(BuildCtx *ctx) | movsd qword CTSTATE->cb.fpr[5], xmm5 | movsd qword CTSTATE->cb.fpr[6], xmm6 | movsd qword CTSTATE->cb.fpr[7], xmm7 - |.endif | mov CTSTATE->cb.stack, rax | mov CARG2, rsp | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. @@ -2704,10 +2284,8 @@ static void build_subroutines(BuildCtx *ctx) | shr RD, 3 | add RD, 1 | ins_callt - |.endif | |->cont_ffi_callback: // Return from FFI callback. - |.if FFI | mov L:RA, SAVE_L | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] | mov aword CTSTATE->L, L:RA @@ -2719,11 +2297,9 @@ static void build_subroutines(BuildCtx *ctx) | mov rax, CTSTATE->cb.gpr[0] | movsd xmm0, qword CTSTATE->cb.fpr[0] | jmp ->vm_leave_unw - |.endif | |->vm_ffi_call: // Call C function via FFI. | // Caveat: needs special frame unwinding, see below. - |.if FFI | .type CCSTATE, CCallState, rbx | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 | @@ -2747,35 +2323,28 @@ static void build_subroutines(BuildCtx *ctx) | mov CARG2, CCSTATE->gpr[1] | mov CARG3, CCSTATE->gpr[2] | mov CARG4, CCSTATE->gpr[3] - |.if not X64WIN | mov CARG5, CCSTATE->gpr[4] | mov CARG6, CCSTATE->gpr[5] - |.endif | test eax, eax; jz >5 | movaps xmm0, CCSTATE->fpr[0] | movaps xmm1, CCSTATE->fpr[1] | movaps xmm2, CCSTATE->fpr[2] | movaps xmm3, CCSTATE->fpr[3] - |.if not X64WIN | cmp eax, 4; jbe >5 | movaps xmm4, CCSTATE->fpr[4] | movaps xmm5, CCSTATE->fpr[5] | movaps xmm6, CCSTATE->fpr[6] | movaps xmm7, CCSTATE->fpr[7] - |.endif |5: | | call aword CCSTATE->func | | mov CCSTATE->gpr[0], rax | movaps CCSTATE->fpr[0], xmm0 - |.if not X64WIN | mov CCSTATE->gpr[1], rdx | movaps CCSTATE->fpr[1], xmm1 - |.endif | | mov rbx, [rbp-8]; leave; ret - |.endif |// Note: vm_ffi_call must be the last function in this object file! | |//----------------------------------------------------------------------- @@ -2821,36 +2390,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov RD, RB | sar ITYPE, 47 | sar RB, 47 - |.if DUALNUM - | cmp ITYPEd, LJ_TISNUM; jne >7 - | cmp RBd, LJ_TISNUM; jne >8 - | add PC, 4 - | cmp RAd, RDd - | jmp_comp jge, jl, jg, jle, >9 - |6: - | movzx RDd, PC_RD - | branchPC RD - |9: - | ins_next - | - |7: // RA is not an integer. - | ja ->vmeta_comp - | // RA is a number. - | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp - | // RA is a number, RD is an integer. - | cvtsi2sd xmm0, RDd - | jmp >2 - | - |8: // RA is an integer, RD is not an integer. - | ja ->vmeta_comp - | // RA is an integer, RD is a number. - | cvtsi2sd xmm1, RAd - | movd xmm0, RD - | jmp >3 - |.else | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp | cmp RBd, LJ_TISNUM; jae ->vmeta_comp - |.endif |1: | movd xmm0, RD |2: @@ -2860,16 +2401,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ucomisd xmm0, xmm1 | // Unordered: all of ZF CF PF set, ordered: PF clear. | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. - |.if DUALNUM - | jmp_comp jbe, ja, jb, jae, <9 - | jmp <6 - |.else | jmp_comp jbe, ja, jb, jae, >1 | movzx RDd, PC_RD | branchPC RD |1: | ins_next - |.endif break; case BC_ISEQV: case BC_ISNEV: @@ -2882,40 +2418,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov RA, ITYPE | sar RB, 47 | sar ITYPE, 47 - |.if DUALNUM - | cmp RBd, LJ_TISNUM; jne >7 - | cmp ITYPEd, LJ_TISNUM; jne >8 - | cmp RDd, RAd - if (vk) { - | jne >9 - } else { - | je >9 - } - | movzx RDd, PC_RD - | branchPC RD - |9: - | ins_next - | - |7: // RD is not an integer. - | ja >5 - | // RD is a number. - | movd xmm1, RD - | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5 - | // RD is a number, RA is an integer. - | cvtsi2sd xmm0, RAd - | jmp >2 - | - |8: // RD is an integer, RA is not an integer. - | ja >5 - | // RD is an integer, RA is a number. - | cvtsi2sd xmm1, RDd - | jmp >1 - | - |.else | cmp RBd, LJ_TISNUM; jae >5 | cmp ITYPEd, LJ_TISNUM; jae >5 | movd xmm1, RD - |.endif |1: | movd xmm0, RA |2: @@ -2935,31 +2440,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | movzx RDd, PC_RD | branchPC RD |2: // NE: Fallthrough to next instruction. - |.if not FFI - |3: - |.endif } else { - |.if not FFI - |3: - |.endif |2: // NE: Branch to the target. | movzx RDd, PC_RD | branchPC RD |1: // EQ: Fallthrough to next instruction. } - if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || - op == BC_ISEQN || op == BC_ISNEN)) { - | jmp <9 - } else { - | ins_next - } + | ins_next | if (op == BC_ISEQV || op == BC_ISNEV) { |5: // Either or both types are not numbers. - |.if FFI | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd - |.endif | cmp RA, RD | je <1 // Same GCobjs or pvalues? | cmp RBd, ITYPEd @@ -2982,16 +2474,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | jmp ->vmeta_equal // Handle __eq metamethod. } else { - |.if FFI |3: | cmp ITYPEd, LJ_TCDATA - if (LJ_DUALNUM && vk) { - | jne <9 - } else { - | jne <2 - } + | jne <2 | jmp ->vmeta_equal_cd - |.endif } break; case BC_ISEQS: case BC_ISNES: @@ -3001,7 +2487,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add PC, 4 | checkstr RB, >3 | cmp RB, [KBASE+RD*8] - iseqne_test: if (vk) { | jne >2 } else { @@ -3013,42 +2498,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_AD // RA = src, RD = num const, JMP with RD = target | mov RB, [BASE+RA*8] | add PC, 4 - |.if DUALNUM - | checkint RB, >7 - | mov RD, [KBASE+RD*8] - | checkint RD, >8 - | cmp RBd, RDd - if (vk) { - | jne >9 - } else { - | je >9 - } - | movzx RDd, PC_RD - | branchPC RD - |9: - | ins_next - | - |7: // RA is not an integer. - | ja >3 - | // RA is a number. - | mov RD, [KBASE+RD*8] - | checkint RD, >1 - | // RA is a number, RD is an integer. - | cvtsi2sd xmm0, RDd - | jmp >2 - | - |8: // RA is an integer, RD is a number. - | cvtsi2sd xmm0, RBd - | movd xmm1, RD - | ucomisd xmm0, xmm1 - | jmp >4 - |1: - | movd xmm0, RD - |.else | checknum RB, >3 |1: | movsd xmm0, qword [KBASE+RD*8] - |.endif |2: | ucomisd xmm0, qword [BASE+RA*8] |4: @@ -3060,7 +2512,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | sar RB, 47 | add PC, 4 | cmp RBd, RDd - if (!LJ_HASFFI) goto iseqne_test; if (vk) { | jne >3 | movzx RDd, PC_RD @@ -3142,46 +2593,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_UNM: | ins_AD // RA = dst, RD = src | mov RB, [BASE+RD*8] - |.if DUALNUM - | checkint RB, >5 - | neg RBd - | jo >4 - | setint RB - |9: - | mov [BASE+RA*8], RB - | ins_next - |4: - | mov64 RB, U64x(41e00000,00000000) // 2^31. - | jmp <9 - |5: - | ja ->vmeta_unm - |.else | checknum RB, ->vmeta_unm - |.endif | mov64 RD, U64x(80000000,00000000) | xor RB, RD - |.if DUALNUM - | jmp <9 - |.else | mov [BASE+RA*8], RB | ins_next - |.endif break; case BC_LEN: | ins_AD // RA = dst, RD = src | mov RD, [BASE+RD*8] | checkstr RD, >2 - |.if DUALNUM - | mov RDd, dword STR:RD->len - |1: - | setint RD - | mov [BASE+RA*8], RD - |.else | xorps xmm0, xmm0 | cvtsi2sd xmm0, dword STR:RD->len |1: | movsd qword [BASE+RA*8], xmm0 - |.endif | ins_next |2: | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len @@ -3196,11 +2621,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov RB, BASE // Save BASE. | call extern lj_tab_len // (GCtab *t) | // Length of table returned in eax (RD). - |.if DUALNUM - | // Nothing to do. - |.else | cvtsi2sd xmm0, RDd - |.endif | mov BASE, RB // Restore BASE. | movzx RAd, PC_RA | jmp <1 @@ -3220,17 +2641,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) ||switch (vk) { ||case 0: | checknumtp [BASE+RB*8], ->vmeta_arith_vn - | .if DUALNUM - | checknumtp [KBASE+RC*8], ->vmeta_arith_vn - | .endif | movsd xmm0, qword [BASE+RB*8] | sseins ssereg, qword [KBASE+RC*8] || break; ||case 1: | checknumtp [BASE+RB*8], ->vmeta_arith_nv - | .if DUALNUM - | checknumtp [KBASE+RC*8], ->vmeta_arith_nv - | .endif | movsd xmm0, qword [KBASE+RC*8] | sseins ssereg, qword [BASE+RB*8] || break; @@ -3290,11 +2705,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endmacro | |.macro ins_arith, intins, sseins - |.if DUALNUM - | ins_arithdn intins - |.else | ins_arith, sseins - |.endif |.endmacro | // RA = dst, RB = src1 or num const, RC = src2 or num const @@ -3363,25 +2774,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next break; case BC_KCDATA: - |.if FFI | ins_AND // RA = dst, RD = cdata const (~) | mov RD, [KBASE+RD*8] | settp RD, LJ_TCDATA | mov [BASE+RA*8], RD | ins_next - |.endif break; case BC_KSHORT: | ins_AD // RA = dst, RD = signed int16 literal - |.if DUALNUM - | movsx RDd, RDW - | setint RD - | mov [BASE+RA*8], RD - |.else | movsx RDd, RDW // Sign-extend literal. | cvtsi2sd xmm0, RDd | movsd qword [BASE+RA*8], xmm0 - |.endif | ins_next break; case BC_KNUM: @@ -3451,12 +2854,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) | jz <1 | // Crossed a write barrier. Move the barrier forward. - |.if not X64WIN | mov CARG2, RB | mov RB, BASE // Save BASE. - |.else - | xchg CARG2, RB // Save BASE (CARG2 == BASE). - |.endif | lea GL:CARG1, [DISPATCH+GG_DISP2G] | call extern lj_gc_barrieruv // (global_State *g, TValue *tv) | mov BASE, RB // Restore BASE. @@ -3630,9 +3029,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | checktab TAB:RB, ->vmeta_tgetv | | // Integer key? - |.if DUALNUM - | checkint RC, >5 - |.else | // Convert number to int and back and compare. | checknum RC, >5 | movd xmm0, RC @@ -3640,7 +3036,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cvtsi2sd xmm1, RCd | ucomisd xmm0, xmm1 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. - |.endif | cmp RCd, TAB:RB->asize // Takes care of unordered, too. | jae ->vmeta_tgetv // Not in array part? Use fallback. | shl RCd, 3 @@ -3732,11 +3127,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_ABC // RA = dst, RB = table, RC = key | mov TAB:RB, [BASE+RB*8] | cleartp TAB:RB - |.if DUALNUM - | mov RCd, dword [BASE+RC*8] - |.else | cvttsd2si RCd, qword [BASE+RC*8] - |.endif | cmp RCd, TAB:RB->asize | jae ->vmeta_tgetr // Not in array part? Use fallback. | shl RCd, 3 @@ -3756,9 +3147,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | checktab TAB:RB, ->vmeta_tsetv | | // Integer key? - |.if DUALNUM - | checkint RC, >5 - |.else | // Convert number to int and back and compare. | checknum RC, >5 | movd xmm0, RC @@ -3766,7 +3154,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cvtsi2sd xmm1, RCd | ucomisd xmm0, xmm1 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. - |.endif | cmp RCd, TAB:RB->asize // Takes care of unordered, too. | jae ->vmeta_tsetv | shl RCd, 3 @@ -3898,11 +3285,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_ABC // RA = src, RB = table, RC = key | mov TAB:RB, [BASE+RB*8] | cleartp TAB:RB - |.if DUALNUM - | mov RC, [BASE+RC*8] - |.else | cvttsd2si RCd, qword [BASE+RC*8] - |.endif | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | jnz >7 |2: @@ -4059,9 +3442,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERN: | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) - |.if JIT | // NYI: add hotloop, record BC_ITERN. - |.endif | mov TAB:RB, [BASE+RA*8-16] | cleartp TAB:RB | mov RCd, [BASE+RA*8-8] // Get index from control var. @@ -4071,19 +3452,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |1: // Traverse array part. | cmp RCd, TMPRd; jae >5 // Index points after array part? | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4 - |.if not DUALNUM | cvtsi2sd xmm0, RCd - |.endif | // Copy array slot to returned value. | mov RB, [ITYPE+RC*8] | mov [BASE+RA*8+8], RB | // Return array index as a numeric key. - |.if DUALNUM - | setint ITYPE, RC - | mov [BASE+RA*8], ITYPE - |.else | movsd qword [BASE+RA*8], xmm0 - |.endif | add RCd, 1 | mov [BASE+RA*8-8], RCd // Update control var. |2: @@ -4290,100 +3664,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.define FOR_EXT, [RA+24] case BC_FORL: - |.if JIT | hotloop RBd - |.endif | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. break; case BC_JFORI: case BC_JFORL: -#if !LJ_HASJIT - break; -#endif case BC_FORI: case BC_IFORL: vk = (op == BC_IFORL || op == BC_JFORL); | ins_AJ // RA = base, RD = target (after end of loop or start of loop) | lea RA, [BASE+RA*8] - if (LJ_DUALNUM) { - | mov RB, FOR_IDX - | checkint RB, >9 - | mov TMPR, FOR_STOP - if (!vk) { - | checkint TMPR, ->vmeta_for - | mov ITYPE, FOR_STEP - | test ITYPEd, ITYPEd; js >5 - | sar ITYPE, 47; - | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for - } else { -#ifdef LUA_USE_ASSERT - | checkinttp FOR_STOP, ->assert_bad_for_arg_type - | checkinttp FOR_STEP, ->assert_bad_for_arg_type -#endif - | mov ITYPE, FOR_STEP - | test ITYPEd, ITYPEd; js >5 - | add RBd, ITYPEd; jo >1 - | setint RB - | mov FOR_IDX, RB - } - | cmp RBd, TMPRd - | mov FOR_EXT, RB - if (op == BC_FORI) { - | jle >7 - |1: - |6: - | branchPC RD - } else if (op == BC_JFORI) { - | branchPC RD - | movzx RDd, PC_RD - | jle =>BC_JLOOP - |1: - |6: - } else if (op == BC_IFORL) { - | jg >7 - |6: - | branchPC RD - |1: - } else { - | jle =>BC_JLOOP - |1: - |6: - } - |7: - | ins_next - | - |5: // Invert check for negative step. - if (!vk) { - | sar ITYPE, 47; - | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for - } else { - | add RBd, ITYPEd; jo <1 - | setint RB - | mov FOR_IDX, RB - } - | cmp RBd, TMPRd - | mov FOR_EXT, RB - if (op == BC_FORI) { - | jge <7 - } else if (op == BC_JFORI) { - | branchPC RD - | movzx RDd, PC_RD - | jge =>BC_JLOOP - } else if (op == BC_IFORL) { - | jl <7 - } else { - | jge =>BC_JLOOP - } - | jmp <6 - |9: // Fallback to FP variant. - if (!vk) { - | jae ->vmeta_for - } - } else if (!vk) { - | checknumtp FOR_IDX, ->vmeta_for - } if (!vk) { + | checknumtp FOR_IDX, ->vmeta_for | checknumtp FOR_STOP, ->vmeta_for } else { #ifdef LUA_USE_ASSERT @@ -4408,32 +3701,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |1: | movsd qword FOR_EXT, xmm0 if (op == BC_FORI) { - |.if DUALNUM - | jnb <7 - |.else | jnb >2 | branchPC RD - |.endif } else if (op == BC_JFORI) { | branchPC RD | movzx RDd, PC_RD | jnb =>BC_JLOOP } else if (op == BC_IFORL) { - |.if DUALNUM - | jb <7 - |.else | jb >2 | branchPC RD - |.endif } else { | jnb =>BC_JLOOP } - |.if DUALNUM - | jmp <6 - |.else |2: | ins_next - |.endif | |3: // Invert comparison if step is negative. | ucomisd xmm0, xmm1 @@ -4441,16 +3722,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERL: - |.if JIT | hotloop RBd - |.endif | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. break; case BC_JITERL: -#if !LJ_HASJIT - break; -#endif case BC_IITERL: | ins_AJ // RA = base, RD = target | lea RA, [BASE+RA*8] @@ -4471,9 +3747,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_A // RA = base, RD = target (loop extent) | // Note: RA/RD is only used by trace recorder to determine scope/extent | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT | hotloop RBd - |.endif | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. break; @@ -4483,7 +3757,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_JLOOP: - |.if JIT | ins_AD // RA = base (ignored), RD = traceno | mov RA, [DISPATCH+DISPATCH_J(trace)] | mov TRACE:RD, [RA+RD*8] @@ -4492,30 +3765,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB | // Save additional callee-save registers only used in compiled code. - |.if X64WIN - | mov CSAVE_4, r12 - | mov CSAVE_3, r13 - | mov CSAVE_2, r14 - | mov CSAVE_1, r15 - | mov RA, rsp - | sub rsp, 10*16+4*8 - | movdqa [RA-1*16], xmm6 - | movdqa [RA-2*16], xmm7 - | movdqa [RA-3*16], xmm8 - | movdqa [RA-4*16], xmm9 - | movdqa [RA-5*16], xmm10 - | movdqa [RA-6*16], xmm11 - | movdqa [RA-7*16], xmm12 - | movdqa [RA-8*16], xmm13 - | movdqa [RA-9*16], xmm14 - | movdqa [RA-10*16], xmm15 - |.else | sub rsp, 16 | mov [rsp+16], r12 | mov [rsp+8], r13 - |.endif | jmp RD - |.endif break; case BC_JMP: @@ -4534,17 +3787,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) */ case BC_FUNCF: - |.if JIT | hotcall RBd - |.endif case BC_FUNCV: /* NYI: compiled vararg functions. */ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. break; case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif case BC_IFUNCF: | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 | mov KBASE, [PC-4+PC2PROTO(k)] @@ -4572,9 +3820,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif | int3 // NYI: compiled vararg functions break; /* NYI: compiled vararg functions. */ @@ -4712,13 +3957,10 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ -#if LJ_NO_UNWIND "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ -#endif "\t.align 8\n" ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); -#if LJ_HASFFI fprintf(ctx->fp, ".LSFDE1:\n" "\t.long .LEFDE1-.LASFDE1\n" @@ -4732,176 +3974,7 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ "\t.align 8\n" ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif -#if !LJ_NO_UNWIND -#if (defined(__sun__) && defined(__svr4__)) - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); -#else - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); -#endif - fprintf(ctx->fp, - ".Lframe1:\n" - "\t.long .LECIE1-.LSCIE1\n" - ".LSCIE1:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zPR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -8\n" - "\t.byte 0x10\n" - "\t.uleb128 6\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.long lj_err_unwind_dwarf-.\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" - "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" - "\t.align 8\n" - ".LECIE1:\n\n"); - fprintf(ctx->fp, - ".LSFDE2:\n" - "\t.long .LEFDE2-.LASFDE2\n" - ".LASFDE2:\n" - "\t.long .LASFDE2-.Lframe1\n" - "\t.long .Lbegin-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ - "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ - "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ - "\t.align 8\n" - ".LEFDE2:\n\n", fcofs, CFRAME_SIZE); -#if LJ_HASFFI - fprintf(ctx->fp, - ".Lframe2:\n" - "\t.long .LECIE2-.LSCIE2\n" - ".LSCIE2:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -8\n" - "\t.byte 0x10\n" - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" - "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" - "\t.align 8\n" - ".LECIE2:\n\n"); - fprintf(ctx->fp, - ".LSFDE3:\n" - "\t.long .LEFDE3-.LASFDE3\n" - ".LASFDE3:\n" - "\t.long .LASFDE3-.Lframe2\n" - "\t.long lj_vm_ffi_call-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ - "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ - "\t.align 8\n" - ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -#endif -#endif break; -#if !LJ_NO_UNWIND - /* Mental note: never let Apple design an assembler. - ** Or a linker. Or a plastic case. But I digress. - */ - case BUILD_machasm: { -#if LJ_HASFFI - int fcsize = 0; -#endif - int i; - fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); - fprintf(ctx->fp, - "EH_frame1:\n" - "\t.set L$set$x,LECIEX-LSCIEX\n" - "\t.long L$set$x\n" - "LSCIEX:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.ascii \"zPR\\0\"\n" - "\t.byte 0x1\n" - "\t.byte 128-8\n" - "\t.byte 0x10\n" - "\t.byte 6\n" /* augmentation length */ - "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ - "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n" - "\t.byte 0x80+0x10\n\t.byte 0x1\n" - "\t.align 3\n" - "LECIEX:\n\n"); - for (i = 0; i < ctx->nsym; i++) { - const char *name = ctx->sym[i].name; - int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs; - if (size == 0) continue; -#if LJ_HASFFI - if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } -#endif - fprintf(ctx->fp, - "%s.eh:\n" - "LSFDE%d:\n" - "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" - "\t.long L$set$%d\n" - "LASFDE%d:\n" - "\t.long LASFDE%d-EH_frame1\n" - "\t.long %s-.\n" - "\t.long %d\n" - "\t.byte 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ - "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ - "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */ - "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */ - "\t.align 3\n" - "LEFDE%d:\n\n", - name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i); - } -#if LJ_HASFFI - if (fcsize) { - fprintf(ctx->fp, - "EH_frame2:\n" - "\t.set L$set$y,LECIEY-LSCIEY\n" - "\t.long L$set$y\n" - "LSCIEY:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.ascii \"zR\\0\"\n" - "\t.byte 0x1\n" - "\t.byte 128-8\n" - "\t.byte 0x10\n" - "\t.byte 1\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n" - "\t.byte 0x80+0x10\n\t.byte 0x1\n" - "\t.align 3\n" - "LECIEY:\n\n"); - fprintf(ctx->fp, - "_lj_vm_ffi_call.eh:\n" - "LSFDEY:\n" - "\t.set L$set$yy,LEFDEY-LASFDEY\n" - "\t.long L$set$yy\n" - "LASFDEY:\n" - "\t.long LASFDEY-EH_frame2\n" - "\t.long _lj_vm_ffi_call-.\n" - "\t.long %d\n" - "\t.byte 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ - "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */ - "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ - "\t.align 3\n" - "LEFDEY:\n\n", fcsize); - } -#endif - fprintf(ctx->fp, ".subsections_via_symbols\n"); - } - break; -#endif default: /* Difficult for other modes. */ break; } diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc deleted file mode 100644 index 211ae7b922..0000000000 --- a/src/vm_x86.dasc +++ /dev/null @@ -1,5780 +0,0 @@ -|// Low-level VM code for x86 CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -| -|.if P64 -|.arch x64 -|.else -|.arch x86 -|.endif -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|//----------------------------------------------------------------------- -| -|.if P64 -|.define X64, 1 -|.if WIN -|.define X64WIN, 1 -|.endif -|.endif -| -|// Fixed register assignments for the interpreter. -|// This is very fragile and has many dependencies. Caveat emptor. -|.define BASE, edx // Not C callee-save, refetched anyway. -|.if not X64 -|.define KBASE, edi // Must be C callee-save. -|.define KBASEa, KBASE -|.define PC, esi // Must be C callee-save. -|.define PCa, PC -|.define DISPATCH, ebx // Must be C callee-save. -|.elif X64WIN -|.define KBASE, edi // Must be C callee-save. -|.define KBASEa, rdi -|.define PC, esi // Must be C callee-save. -|.define PCa, rsi -|.define DISPATCH, ebx // Must be C callee-save. -|.else -|.define KBASE, r15d // Must be C callee-save. -|.define KBASEa, r15 -|.define PC, ebx // Must be C callee-save. -|.define PCa, rbx -|.define DISPATCH, r14d // Must be C callee-save. -|.endif -| -|.define RA, ecx -|.define RAH, ch -|.define RAL, cl -|.define RB, ebp // Must be ebp (C callee-save). -|.define RC, eax // Must be eax. -|.define RCW, ax -|.define RCH, ah -|.define RCL, al -|.define OP, RB -|.define RD, RC -|.define RDW, RCW -|.define RDL, RCL -|.if X64 -|.define RAa, rcx -|.define RBa, rbp -|.define RCa, rax -|.define RDa, rax -|.else -|.define RAa, RA -|.define RBa, RB -|.define RCa, RC -|.define RDa, RD -|.endif -| -|.if not X64 -|.define FCARG1, ecx // x86 fastcall arguments. -|.define FCARG2, edx -|.elif X64WIN -|.define CARG1, rcx // x64/WIN64 C call arguments. -|.define CARG2, rdx -|.define CARG3, r8 -|.define CARG4, r9 -|.define CARG1d, ecx -|.define CARG2d, edx -|.define CARG3d, r8d -|.define CARG4d, r9d -|.define FCARG1, CARG1d // Upwards compatible to x86 fastcall. -|.define FCARG2, CARG2d -|.else -|.define CARG1, rdi // x64/POSIX C call arguments. -|.define CARG2, rsi -|.define CARG3, rdx -|.define CARG4, rcx -|.define CARG5, r8 -|.define CARG6, r9 -|.define CARG1d, edi -|.define CARG2d, esi -|.define CARG3d, edx -|.define CARG4d, ecx -|.define CARG5d, r8d -|.define CARG6d, r9d -|.define FCARG1, CARG1d // Simulate x86 fastcall. -|.define FCARG2, CARG2d -|.endif -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS, int -|.type TRACE, GCtrace -|.type SBUF, SBuf -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|//----------------------------------------------------------------------- -|.if not X64 // x86 stack layout. -| -|.if WIN -| -|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--). -|.macro saveregs_ -| push edi; push esi; push ebx -| push extern lj_err_unwind_win -| fs; push dword [0] -| fs; mov [0], esp -| sub esp, CFRAME_SPACE -|.endmacro -|.macro restoreregs -| add esp, CFRAME_SPACE -| fs; pop dword [0] -| pop edi // Short for esp += 4. -| pop ebx; pop esi; pop edi; pop ebp -|.endmacro -| -|.else -| -|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). -|.macro saveregs_ -| push edi; push esi; push ebx -| sub esp, CFRAME_SPACE -|.endmacro -|.macro restoreregs -| add esp, CFRAME_SPACE -| pop ebx; pop esi; pop edi; pop ebp -|.endmacro -| -|.endif -| -|.macro saveregs -| push ebp; saveregs_ -|.endmacro -| -|.if WIN -|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only. -|.define SAVE_NRES, aword [esp+aword*18] -|.define SAVE_CFRAME, aword [esp+aword*17] -|.define SAVE_L, aword [esp+aword*16] -|//----- 16 byte aligned, ^^^ arguments from C caller -|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter. -|.define SAVE_R4, aword [esp+aword*14] -|.define SAVE_R3, aword [esp+aword*13] -|.define SAVE_R2, aword [esp+aword*12] -|//----- 16 byte aligned -|.define SAVE_R1, aword [esp+aword*11] -|.define SEH_FUNC, aword [esp+aword*10] -|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves. -|.define UNUSED2, aword [esp+aword*8] -|//----- 16 byte aligned -|.define UNUSED1, aword [esp+aword*7] -|.define SAVE_PC, aword [esp+aword*6] -|.define TMP2, aword [esp+aword*5] -|.define TMP1, aword [esp+aword*4] -|//----- 16 byte aligned -|.define ARG4, aword [esp+aword*3] -|.define ARG3, aword [esp+aword*2] -|.define ARG2, aword [esp+aword*1] -|.define ARG1, aword [esp] //<-- esp while in interpreter. -|//----- 16 byte aligned, ^^^ arguments for C callee -|.else -|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. -|.define SAVE_NRES, aword [esp+aword*14] -|.define SAVE_CFRAME, aword [esp+aword*13] -|.define SAVE_L, aword [esp+aword*12] -|//----- 16 byte aligned, ^^^ arguments from C caller -|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter. -|.define SAVE_R4, aword [esp+aword*10] -|.define SAVE_R3, aword [esp+aword*9] -|.define SAVE_R2, aword [esp+aword*8] -|//----- 16 byte aligned -|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves. -|.define SAVE_PC, aword [esp+aword*6] -|.define TMP2, aword [esp+aword*5] -|.define TMP1, aword [esp+aword*4] -|//----- 16 byte aligned -|.define ARG4, aword [esp+aword*3] -|.define ARG3, aword [esp+aword*2] -|.define ARG2, aword [esp+aword*1] -|.define ARG1, aword [esp] //<-- esp while in interpreter. -|//----- 16 byte aligned, ^^^ arguments for C callee -|.endif -| -|// FPARGx overlaps ARGx and ARG(x+1) on x86. -|.define FPARG3, qword [esp+qword*1] -|.define FPARG1, qword [esp] -|// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ). -|.define TMPQ, qword [esp+aword*4] -|.define TMP3, ARG4 -|.define ARG5, TMP1 -|.define TMPa, TMP1 -|.define MULTRES, TMP2 -| -|// Arguments for vm_call and vm_pcall. -|.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME! -| -|// Arguments for vm_cpcall. -|.define INARG_CP_CALL, SAVE_ERRF -|.define INARG_CP_UD, SAVE_NRES -|.define INARG_CP_FUNC, SAVE_CFRAME -| -|//----------------------------------------------------------------------- -|.elif X64WIN // x64/Windows stack layout -| -|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). -|.macro saveregs_ -| push rdi; push rsi; push rbx -| sub rsp, CFRAME_SPACE -|.endmacro -|.macro saveregs -| push rbp; saveregs_ -|.endmacro -|.macro restoreregs -| add rsp, CFRAME_SPACE -| pop rbx; pop rsi; pop rdi; pop rbp -|.endmacro -| -|.define SAVE_CFRAME, aword [rsp+aword*13] -|.define SAVE_PC, dword [rsp+dword*25] -|.define SAVE_L, dword [rsp+dword*24] -|.define SAVE_ERRF, dword [rsp+dword*23] -|.define SAVE_NRES, dword [rsp+dword*22] -|.define TMP2, dword [rsp+dword*21] -|.define TMP1, dword [rsp+dword*20] -|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*8] -|.define SAVE_R3, aword [rsp+aword*7] -|.define SAVE_R2, aword [rsp+aword*6] -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. -|.define ARG5, aword [rsp+aword*4] -|.define CSAVE_4, aword [rsp+aword*3] -|.define CSAVE_3, aword [rsp+aword*2] -|.define CSAVE_2, aword [rsp+aword*1] -|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. -|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee -| -|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). -|.define TMPQ, qword [rsp+aword*10] -|.define MULTRES, TMP2 -|.define TMPa, ARG5 -|.define ARG5d, dword [rsp+aword*4] -|.define TMP3, ARG5d -| -|//----------------------------------------------------------------------- -|.else // x64/POSIX stack layout -| -|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). -|.macro saveregs_ -| push rbx; push r15; push r14 -|.if NO_UNWIND -| push r13; push r12 -|.endif -| sub rsp, CFRAME_SPACE -|.endmacro -|.macro saveregs -| push rbp; saveregs_ -|.endmacro -|.macro restoreregs -| add rsp, CFRAME_SPACE -|.if NO_UNWIND -| pop r12; pop r13 -|.endif -| pop r14; pop r15; pop rbx; pop rbp -|.endmacro -| -|//----- 16 byte aligned, -|.if NO_UNWIND -|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*10] -|.define SAVE_R3, aword [rsp+aword*9] -|.define SAVE_R2, aword [rsp+aword*8] -|.define SAVE_R1, aword [rsp+aword*7] -|.define SAVE_RU2, aword [rsp+aword*6] -|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. -|.else -|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. -|.define SAVE_R4, aword [rsp+aword*8] -|.define SAVE_R3, aword [rsp+aword*7] -|.define SAVE_R2, aword [rsp+aword*6] -|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. -|.endif -|.define SAVE_CFRAME, aword [rsp+aword*4] -|.define SAVE_PC, dword [rsp+dword*7] -|.define SAVE_L, dword [rsp+dword*6] -|.define SAVE_ERRF, dword [rsp+dword*5] -|.define SAVE_NRES, dword [rsp+dword*4] -|.define TMPa, aword [rsp+aword*1] -|.define TMP2, dword [rsp+dword*1] -|.define TMP1, dword [rsp] //<-- rsp while in interpreter. -|//----- 16 byte aligned -| -|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). -|.define TMPQ, qword [rsp] -|.define TMP3, dword [rsp+aword*1] -|.define MULTRES, TMP2 -| -|.endif -| -|//----------------------------------------------------------------------- -| -|// Instruction headers. -|.macro ins_A; .endmacro -|.macro ins_AD; .endmacro -|.macro ins_AJ; .endmacro -|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro -|.macro ins_AB_; movzx RB, RCH; .endmacro -|.macro ins_A_C; movzx RC, RCL; .endmacro -|.macro ins_AND; not RDa; .endmacro -| -|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). -|.macro ins_NEXT -| mov RC, [PC] -| movzx RA, RCH -| movzx OP, RCL -| add PC, 4 -| shr RC, 16 -|.if X64 -| jmp aword [DISPATCH+OP*8] -|.else -| jmp aword [DISPATCH+OP*4] -|.endif -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| // Around 10%-30% slower on Core2, a lot more slower on P4. -| .macro ins_next -| jmp ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC -| mov PC, LFUNC:RB->pc -| mov RA, [PC] -| movzx OP, RAL -| movzx RA, RAH -| add PC, 4 -|.if X64 -| jmp aword [DISPATCH+OP*8] -|.else -| jmp aword [DISPATCH+OP*4] -|.endif -|.endmacro -| -|.macro ins_call -| // BASE = new base, RB = LFUNC, RD = nargs+1 -| mov [BASE-4], PC -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Macros to test operand types. -|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro -|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro -|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro -|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro -|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro -| -|// These operands must be used with movzx. -|.define PC_OP, byte [PC-4] -|.define PC_RA, byte [PC-3] -|.define PC_RB, byte [PC-1] -|.define PC_RC, byte [PC-2] -|.define PC_RD, word [PC-2] -| -|.macro branchPC, reg -| lea PC, [PC+reg*4-BCBIAS_J*4] -|.endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|// Decrement hashed hotcount and trigger trace recorder if zero. -|.macro hotloop, reg -| mov reg, PC -| shr reg, 1 -| and reg, HOTCOUNT_PCMASK -| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP -| jb ->vm_hotloop -|.endmacro -| -|.macro hotcall, reg -| mov reg, PC -| shr reg, 1 -| and reg, HOTCOUNT_PCMASK -| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL -| jb ->vm_hotcall -|.endmacro -| -|// Set current VM state. -|.macro set_vmstate, st -| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st -|.endmacro -| -|// x87 compares. -|.macro fcomparepp // Compare and pop st0 >< st1. -| fucomip st1 -| fpop -|.endmacro -| -|.macro fpop1; fstp st1; .endmacro -| -|// Synthesize SSE FP constants. -|.macro sseconst_abs, reg, tmp // Synthesize abs mask. -|.if X64 -| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp -|.else -| pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1 -|.endif -|.endmacro -| -|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const. -|.if X64 -| mov64 tmp, U64x(val,00000000); movd reg, tmp -|.else -| mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51 -|.endif -|.endmacro -| -|.macro sseconst_sign, reg, tmp // Synthesize sign mask. -| sseconst_hi reg, tmp, 80000000 -|.endmacro -|.macro sseconst_1, reg, tmp // Synthesize 1.0. -| sseconst_hi reg, tmp, 3ff00000 -|.endmacro -|.macro sseconst_m1, reg, tmp // Synthesize -1.0. -| sseconst_hi reg, tmp, bff00000 -|.endmacro -|.macro sseconst_2p52, reg, tmp // Synthesize 2^52. -| sseconst_hi reg, tmp, 43300000 -|.endmacro -|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. -| sseconst_hi reg, tmp, 43380000 -|.endmacro -| -|// Move table write barrier back. Overwrites reg. -|.macro barrierback, tab, reg -| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab) -| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] -| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab -| mov tab->gclist, reg -|.endmacro -| -|//----------------------------------------------------------------------- - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | test PC, FRAME_P - | jz ->cont_dispatch - | - | // Return from pcall or xpcall fast func. - | and PC, -8 - | sub BASE, PC // Restore caller base. - | lea RAa, [RA+PC-8] // Rebase RA and prepend one result. - | mov PC, [BASE-4] // Fetch PC of previous frame. - | // Prepending may overwrite the pcall frame, so do it at the end. - | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results. - | - |->vm_returnc: - | add RD, 1 // RD = nresults+1 - | jz ->vm_unwind_yield - | mov MULTRES, RD - | test PC, FRAME_TYPE - | jz ->BC_RET_Z // Handle regular return to Lua. - | - |->vm_return: - | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return - | xor PC, FRAME_C - | test PC, FRAME_TYPE - | jnz ->vm_returnp - | - | // Return to C. - | set_vmstate C - | and PC, -8 - | sub PC, BASE - | neg PC // Previous base = BASE - delta. - | - | sub RD, 1 - | jz >2 - |1: // Move results down. - |.if X64 - | mov RBa, [BASE+RA] - | mov [BASE-8], RBa - |.else - | mov RB, [BASE+RA] - | mov [BASE-8], RB - | mov RB, [BASE+RA+4] - | mov [BASE-4], RB - |.endif - | add BASE, 8 - | sub RD, 1 - | jnz <1 - |2: - | mov L:RB, SAVE_L - | mov L:RB->base, PC - |3: - | mov RD, MULTRES - | mov RA, SAVE_NRES // RA = wanted nresults+1 - |4: - | cmp RA, RD - | jne >6 // More/less results wanted? - |5: - | sub BASE, 8 - | mov L:RB->top, BASE - | - |->vm_leave_cp: - | mov RAa, SAVE_CFRAME // Restore previous C frame. - | mov L:RB->cframe, RAa - | xor eax, eax // Ok return status for vm_pcall. - | - |->vm_leave_unw: - | restoreregs - | ret - | - |6: - | jb >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - | cmp BASE, L:RB->maxstack - | ja >8 - | mov dword [BASE-4], LJ_TNIL - | add BASE, 8 - | add RD, 1 - | jmp <4 - | - |7: // Less results wanted. - | test RA, RA - | jz <5 // But check for LUA_MULTRET+1. - | sub RA, RD // Negative result! - | lea BASE, [BASE+RA*8] // Correct top. - | jmp <5 - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | mov L:RB->top, BASE // Save current top held in BASE (yes). - | mov MULTRES, RD // Need to fill only remainder with nil. - | mov FCARG2, RA - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. - | jmp <3 - | - |->vm_unwind_yield: - | mov al, LUA_YIELD - | jmp ->vm_unwind_c_eh - | - |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - |.if X64 - | mov eax, CARG2d // Error return status for vm_pcall. - | mov rsp, CARG1 - |.else - | mov eax, FCARG2 // Error return status for vm_pcall. - | mov esp, FCARG1 - |.if WIN - | lea FCARG1, SEH_NEXT - | fs; mov [0], FCARG1 - |.endif - |.endif - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | mov L:RB, SAVE_L - | mov GL:RB, L:RB->glref - | mov dword GL:RB->vmstate, ~LJ_VMST_C - | jmp ->vm_leave_unw - | - |->vm_unwind_rethrow: - |.if X64 and not X64WIN - | mov FCARG1, SAVE_L - | mov FCARG2, eax - | restoreregs - | jmp extern lj_err_throw@8 // (lua_State *L, int errcode) - |.endif - | - |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall. - | // (void *cframe) - |.if X64 - | and CARG1, CFRAME_RAWMASK - | mov rsp, CARG1 - |.else - | and FCARG1, CFRAME_RAWMASK - | mov esp, FCARG1 - |.if WIN - | lea FCARG1, SEH_NEXT - | fs; mov [0], FCARG1 - |.endif - |.endif - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | mov L:RB, SAVE_L - | mov RAa, -8 // Results start at BASE+RA = BASE-8. - | mov RD, 1+1 // Really 1+2 results, incr. later. - | mov BASE, L:RB->base - | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. - | add DISPATCH, GG_G2DISP - | mov PC, [BASE-4] // Fetch PC of previous frame. - | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message. - | set_vmstate INTERP - | jmp ->vm_returnc // Increments RD/MULTRES and returns. - | - |.if WIN and not X64 - |->vm_rtlunwind@16: // Thin layer around RtlUnwind. - | // (void *cframe, void *excptrec, void *unwinder, int errcode) - | mov [esp], FCARG1 // Return value for RtlUnwind. - | push FCARG2 // Exception record for RtlUnwind. - | push 0 // Ignored by RtlUnwind. - | push dword [FCARG1+CFRAME_OFS_SEH] - | call extern RtlUnwind@16 // Violates ABI (clobbers too much). - | mov FCARG1, eax - | mov FCARG2, [esp+4] // errcode (for vm_unwind_c). - | ret // Jump to unwinder. - |.endif - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | mov FCARG2, LUA_MINSTACK - | jmp >2 - | - |->vm_growstack_v: // Grow stack for vararg Lua function. - | sub RD, 8 - | jmp >1 - | - |->vm_growstack_f: // Grow stack for fixarg Lua function. - | // BASE = new base, RD = nargs+1, RB = L, PC = first PC - | lea RD, [BASE+NARGS:RD*8-8] - |1: - | movzx RA, byte [PC-4+PC2PROTO(framesize)] - | add PC, 4 // Must point after first instruction. - | mov L:RB->base, BASE - | mov L:RB->top, RD - | mov SAVE_PC, PC - | mov FCARG2, RA - |2: - | // RB = L, L->base = new base, L->top = top - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - | mov BASE, L:RB->base - | mov RD, L:RB->top - | mov LFUNC:RB, [BASE-8] - | sub RD, BASE - | shr RD, 3 - | add NARGS:RD, 1 - | // BASE = new base, RB = LFUNC, RD = nargs+1 - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - |.if X64 - | mov L:RB, CARG1d // Caveat: CARG1d may be RA. - | mov SAVE_L, CARG1d - | mov RA, CARG2d - |.else - | mov L:RB, SAVE_L - | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! - |.endif - | mov PC, FRAME_CP - | xor RD, RD - | lea KBASEa, [esp+CFRAME_RESUME] - | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. - | add DISPATCH, GG_G2DISP - | mov SAVE_PC, RD // Any value outside of bytecode is ok. - | mov SAVE_CFRAME, RDa - |.if X64 - | mov SAVE_NRES, RD - | mov SAVE_ERRF, RD - |.endif - | mov L:RB->cframe, KBASEa - | cmp byte L:RB->status, RDL - | je >2 // Initial resume (like a call). - | - | // Resume after yield (like a return). - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP - | mov byte L:RB->status, RDL - | mov BASE, L:RB->base - | mov RD, L:RB->top - | sub RD, RA - | shr RD, 3 - | add RD, 1 // RD = nresults+1 - | sub RA, BASE // RA = resultofs - | mov PC, [BASE-4] - | mov MULTRES, RD - | test PC, FRAME_TYPE - | jz ->BC_RET_Z - | jmp ->vm_return - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | mov PC, FRAME_CP - |.if X64 - | mov SAVE_ERRF, CARG4d - |.endif - | jmp >1 - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | mov PC, FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - |.if X64 - | mov SAVE_NRES, CARG3d - | mov L:RB, CARG1d // Caveat: CARG1d may be RA. - | mov SAVE_L, CARG1d - | mov RA, CARG2d - |.else - | mov L:RB, SAVE_L - | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! - |.endif - | - | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. - | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. - | mov SAVE_CFRAME, KBASEa - | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. - | add DISPATCH, GG_G2DISP - |.if X64 - | mov L:RB->cframe, rsp - |.else - | mov L:RB->cframe, esp - |.endif - | - |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP - | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). - | add PC, RA - | sub PC, BASE // PC = frame delta + frame type - | - | mov RD, L:RB->top - | sub RD, RA - | shr NARGS:RD, 3 - | add NARGS:RD, 1 // RD = nargs+1 - | - |->vm_call_dispatch: - | mov LFUNC:RB, [RA-8] - | cmp dword [RA-4], LJ_TFUNC - | jne ->vmeta_call // Ensure KBASE defined and != BASE. - | - |->vm_call_dispatch_f: - | mov BASE, RA - | ins_call - | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - |.if X64 - | mov L:RB, CARG1d // Caveat: CARG1d may be RA. - | mov SAVE_L, CARG1d - |.else - | mov L:RB, SAVE_L - | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap! - | mov RC, INARG_CP_UD // Get args before they are overwritten. - | mov RA, INARG_CP_FUNC - | mov BASE, INARG_CP_CALL - |.endif - | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. - | - | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). - | sub KBASE, L:RB->top - | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. - | mov SAVE_ERRF, 0 // No error function. - | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. - | add DISPATCH, GG_G2DISP - | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). - | - |.if X64 - | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. - | mov SAVE_CFRAME, KBASEa - | mov L:RB->cframe, rsp - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | - | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) - |.else - | mov ARG3, RC // Have to copy args downwards. - | mov ARG2, RA - | mov ARG1, L:RB - | - | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. - | mov SAVE_CFRAME, KBASE - | mov L:RB->cframe, esp - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | - | call BASE // (lua_State *L, lua_CFunction func, void *ud) - |.endif - | // TValue * (new base) or NULL returned in eax (RC). - | test RC, RC - | jz ->vm_leave_cp // No base? Just remove C frame. - | mov RA, RC - | mov PC, FRAME_CP - | jmp <2 // Else continue with the call. - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) - | add RA, BASE - | and PC, -8 - | mov RB, BASE - | sub BASE, PC // Restore caller BASE. - | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg. - | mov RC, RA // ... in [RC] - | mov PC, [RB-12] // Restore PC from [cont|PC]. - |.if X64 - | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug. - |.if FFI - | cmp RA, 1 - | jbe >1 - |.endif - | lea KBASEa, qword [=>0] - | add RAa, KBASEa - |.else - | mov RA, dword [RB-16] - |.if FFI - | cmp RA, 1 - | jbe >1 - |.endif - |.endif - | mov LFUNC:KBASE, [BASE-8] - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | // BASE = base, RC = result, RB = meta base - | jmp RAa // Jump to continuation. - | - |.if FFI - |1: - | je ->cont_ffi_callback // cont = 1: return from FFI callback. - | // cont = 0: Tail call from C function. - | sub RB, BASE - | shr RB, 3 - | lea RD, [RB-1] - | jmp ->vm_call_tail - |.endif - | - |->cont_cat: // BASE = base, RC = result, RB = mbase - | movzx RA, PC_RB - | sub RB, 16 - | lea RA, [BASE+RA*8] - | sub RA, RB - | je ->cont_ra - | neg RA - | shr RA, 3 - |.if X64WIN - | mov CARG3d, RA - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE - | mov RCa, [RC] - | mov [RB], RCa - | mov CARG2d, RB - |.elif X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE - | mov CARG3d, RA - | mov RAa, [RC] - | mov [RB], RAa - | mov CARG2d, RB - |.else - | mov ARG3, RA - | mov RA, [RC+4] - | mov RC, [RC] - | mov [RB+4], RA - | mov [RB], RC - | mov ARG2, RB - |.endif - | jmp ->BC_CAT_Z - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets: - | mov TMP1, RC // RC = GCstr * - | mov TMP2, LJ_TSTR - | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2. - | cmp PC_OP, BC_GGET - | jne >1 - | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. - | mov [RA], TAB:RB // RB = GCtab * - | mov dword [RA+4], LJ_TTAB - | mov RB, RA - | jmp >2 - | - |->vmeta_tgetb: - | movzx RC, PC_RC - |.if DUALNUM - | mov TMP2, LJ_TISNUM - | mov TMP1, RC - |.else - | cvtsi2sd xmm0, RC - | movsd TMPQ, xmm0 - |.endif - | lea RCa, TMPQ // Store temp. TValue in TMPQ. - | jmp >1 - | - |->vmeta_tgetv: - | movzx RC, PC_RC // Reload TValue *k from RC. - | lea RC, [BASE+RC*8] - |1: - | movzx RB, PC_RB // Reload TValue *t from RB. - | lea RB, [BASE+RB*8] - |2: - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, RB - | mov CARG3, RCa // May be 64 bit ptr to stack. - | mov L:RB, L:CARG1d - |.else - | mov ARG2, RB - | mov L:RB, SAVE_L - | mov ARG3, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - | // TValue * (finished) or NULL (metamethod) returned in eax (RC). - | mov BASE, L:RB->base - | test RC, RC - | jz >3 - |->cont_ra: // BASE = base, RC = result - | movzx RA, PC_RA - |.if X64 - | mov RBa, [RC] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [RC+4] - | mov RC, [RC] - | mov [BASE+RA*8+4], RB - | mov [BASE+RA*8], RC - |.endif - | ins_next - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | mov RA, L:RB->top - | mov [RA-12], PC // [cont|PC] - | lea PC, [RA+FRAME_CONT] - | sub PC, BASE - | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. - | mov NARGS:RD, 2+1 // 2 args for func(t, k). - | jmp ->vm_call_dispatch_f - | - |->vmeta_tgetr: - | mov FCARG1, TAB:RB - | mov RB, BASE // Save BASE. - | mov FCARG2, RC // Caveat: FCARG2 == BASE - | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) - | // cTValue * or NULL returned in eax (RC). - | movzx RA, PC_RA - | mov BASE, RB // Restore BASE. - | test RC, RC - | jnz ->BC_TGETR_Z - | mov dword [BASE+RA*8+4], LJ_TNIL - | jmp ->BC_TGETR2_Z - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets: - | mov TMP1, RC // RC = GCstr * - | mov TMP2, LJ_TSTR - | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2. - | cmp PC_OP, BC_GSET - | jne >1 - | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. - | mov [RA], TAB:RB // RB = GCtab * - | mov dword [RA+4], LJ_TTAB - | mov RB, RA - | jmp >2 - | - |->vmeta_tsetb: - | movzx RC, PC_RC - |.if DUALNUM - | mov TMP2, LJ_TISNUM - | mov TMP1, RC - |.else - | cvtsi2sd xmm0, RC - | movsd TMPQ, xmm0 - |.endif - | lea RCa, TMPQ // Store temp. TValue in TMPQ. - | jmp >1 - | - |->vmeta_tsetv: - | movzx RC, PC_RC // Reload TValue *k from RC. - | lea RC, [BASE+RC*8] - |1: - | movzx RB, PC_RB // Reload TValue *t from RB. - | lea RB, [BASE+RB*8] - |2: - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, RB - | mov CARG3, RCa // May be 64 bit ptr to stack. - | mov L:RB, L:CARG1d - |.else - | mov ARG2, RB - | mov L:RB, SAVE_L - | mov ARG3, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // TValue * (finished) or NULL (metamethod) returned in eax (RC). - | mov BASE, L:RB->base - | test RC, RC - | jz >3 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | movzx RA, PC_RA - |.if X64 - | mov RBa, [BASE+RA*8] - | mov [RC], RBa - |.else - | mov RB, [BASE+RA*8+4] - | mov RA, [BASE+RA*8] - | mov [RC+4], RB - | mov [RC], RA - |.endif - |->cont_nop: // BASE = base, (RC = result) - | ins_next - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | mov RA, L:RB->top - | mov [RA-12], PC // [cont|PC] - | movzx RC, PC_RA - | // Copy value to third argument. - |.if X64 - | mov RBa, [BASE+RC*8] - | mov [RA+16], RBa - |.else - | mov RB, [BASE+RC*8+4] - | mov RC, [BASE+RC*8] - | mov [RA+20], RB - | mov [RA+16], RC - |.endif - | lea PC, [RA+FRAME_CONT] - | sub PC, BASE - | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. - | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). - | jmp ->vm_call_dispatch_f - | - |->vmeta_tsetr: - |.if X64WIN - | mov L:CARG1d, SAVE_L - | mov CARG3d, RC - | mov L:CARG1d->base, BASE - | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE. - |.elif X64 - | mov L:CARG1d, SAVE_L - | mov CARG2d, TAB:RB - | mov L:CARG1d->base, BASE - | mov RB, BASE // Save BASE. - | mov CARG3d, RC // Caveat: CARG3d == BASE. - |.else - | mov L:RA, SAVE_L - | mov ARG2, TAB:RB - | mov RB, BASE // Save BASE. - | mov ARG3, RC - | mov ARG1, L:RA - | mov L:RA->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) - | // TValue * returned in eax (RC). - | movzx RA, PC_RA - | mov BASE, RB // Restore BASE. - | jmp ->BC_TSETR_Z - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - |.if X64 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE. - |.if X64WIN - | lea CARG3d, [BASE+RD*8] - | lea CARG2d, [BASE+RA*8] - |.else - | lea CARG2d, [BASE+RA*8] - | lea CARG3d, [BASE+RD*8] - |.endif - | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA. - | movzx CARG4d, PC_OP - |.else - | movzx RB, PC_OP - | lea RD, [BASE+RD*8] - | lea RA, [BASE+RA*8] - | mov ARG4, RB - | mov L:RB, SAVE_L - | mov ARG3, RD - | mov ARG2, RA - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - | // 0/1 or TValue * (metamethod) returned in eax (RC). - |3: - | mov BASE, L:RB->base - | cmp RC, 1 - | ja ->vmeta_binop - |4: - | lea PC, [PC+4] - | jb >6 - |5: - | movzx RD, PC_RD - | branchPC RD - |6: - | ins_next - | - |->cont_condt: // BASE = base, RC = result - | add PC, 4 - | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true. - | jb <5 - | jmp <6 - | - |->cont_condf: // BASE = base, RC = result - | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false. - | jmp <4 - | - |->vmeta_equal: - | sub PC, 4 - |.if X64WIN - | mov CARG3d, RD - | mov CARG4d, RB - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d == BASE. - | mov CARG2d, RA - | mov CARG1d, L:RB // Caveat: CARG1d == RA. - |.elif X64 - | mov CARG2d, RA - | mov CARG4d, RB // Caveat: CARG4d == RA. - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG3d == BASE. - | mov CARG3d, RD - | mov CARG1d, L:RB - |.else - | mov ARG4, RB - | mov L:RB, SAVE_L - | mov ARG3, RD - | mov ARG2, RA - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - | // 0/1 or TValue * (metamethod) returned in eax (RC). - | jmp <3 - | - |->vmeta_equal_cd: - |.if FFI - | sub PC, 4 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG1, L:RB - | mov FCARG2, dword [PC-4] - | mov SAVE_PC, PC - | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins) - | // 0/1 or TValue * (metamethod) returned in eax (RC). - | jmp <3 - |.endif - | - |->vmeta_istype: - |.if X64 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, RA - | movzx CARG3d, PC_RD - | mov L:CARG1d, L:RB - |.else - | movzx RD, PC_RD - | mov ARG2, RA - | mov L:RB, SAVE_L - | mov ARG3, RD - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) - | mov BASE, L:RB->base - | jmp <6 - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_arith_vno: - |.if DUALNUM - | movzx RB, PC_RB - |.endif - |->vmeta_arith_vn: - | lea RC, [KBASE+RC*8] - | jmp >1 - | - |->vmeta_arith_nvo: - |.if DUALNUM - | movzx RC, PC_RC - |.endif - |->vmeta_arith_nv: - | lea RC, [KBASE+RC*8] - | lea RB, [BASE+RB*8] - | xchg RB, RC - | jmp >2 - | - |->vmeta_unm: - | lea RC, [BASE+RD*8] - | mov RB, RC - | jmp >2 - | - |->vmeta_arith_vvo: - |.if DUALNUM - | movzx RB, PC_RB - |.endif - |->vmeta_arith_vv: - | lea RC, [BASE+RC*8] - |1: - | lea RB, [BASE+RB*8] - |2: - | lea RA, [BASE+RA*8] - |.if X64WIN - | mov CARG3d, RB - | mov CARG4d, RC - | movzx RC, PC_OP - | mov ARG5d, RC - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d == BASE. - | mov CARG2d, RA - | mov CARG1d, L:RB // Caveat: CARG1d == RA. - |.elif X64 - | movzx CARG5d, PC_OP - | mov CARG2d, RA - | mov CARG4d, RC // Caveat: CARG4d == RA. - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE. - | mov CARG3d, RB - | mov L:RB, L:CARG1d - |.else - | mov ARG3, RB - | mov L:RB, SAVE_L - | mov ARG4, RC - | movzx RC, PC_OP - | mov ARG2, RA - | mov ARG5, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - | // NULL (finished) or TValue * (metamethod) returned in eax (RC). - | mov BASE, L:RB->base - | test RC, RC - | jz ->cont_nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = base, RC = new base, stack = cont/func/o1/o2 - | mov RA, RC - | sub RC, BASE - | mov [RA-12], PC // [cont|PC] - | lea PC, [RC+FRAME_CONT] - | mov NARGS:RD, 2+1 // 2 args for func(o1, o2). - | jmp ->vm_call_dispatch - | - |->vmeta_len: - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE - | mov L:FCARG1, L:RB - | mov SAVE_PC, PC - | call extern lj_meta_len@8 // (lua_State *L, TValue *o) - | // NULL (retry) or TValue * (metamethod) returned in eax (RC). - | mov BASE, L:RB->base -#if LJ_52 - | test RC, RC - | jne ->vmeta_binop // Binop call for compatibility. - | movzx RD, PC_RD - | mov TAB:FCARG1, [BASE+RD*8] - | jmp ->BC_LEN_Z -#else - | jmp ->vmeta_binop // Binop call for compatibility. -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call_ra: - | lea RA, [BASE+RA*8+8] - |->vmeta_call: // Resolve and call __call metamethod. - | // BASE = old base, RA = new base, RC = nargs+1, PC = return - | mov TMP2, RA // Save RA, RC for us. - | mov TMP1, NARGS:RD - | sub RA, 8 - |.if X64 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, RA - | lea CARG3d, [RA+NARGS:RD*8] - | mov CARG1d, L:RB // Caveat: CARG1d may be RA. - |.else - | lea RC, [RA+NARGS:RD*8] - | mov L:RB, SAVE_L - | mov ARG2, RA - | mov ARG3, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE // This is the callers base! - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | mov BASE, L:RB->base - | mov RA, TMP2 - | mov NARGS:RD, TMP1 - | mov LFUNC:RB, [RA-8] - | add NARGS:RD, 1 - | // This is fragile. L->base must not move, KBASE must always be defined. - | cmp KBASE, BASE // Continue with CALLT if flag set. - | je ->BC_CALLT_Z - | mov BASE, RA - | ins_call // Otherwise call resolved metamethod. - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG2, RA // Caveat: FCARG2 == BASE - | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA - | mov SAVE_PC, PC - | call extern lj_meta_for@8 // (lua_State *L, TValue *base) - | mov BASE, L:RB->base - | mov RC, [PC-4] - | movzx RA, RCH - | movzx OP, RCL - | shr RC, 16 - |.if X64 - | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI. - |.else - | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI. - |.endif - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | cmp NARGS:RD, 1+1; jb ->fff_fallback - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | cmp NARGS:RD, 2+1; jb ->fff_fallback - |.endmacro - | - |.macro .ffunc_nsse, name, op - | .ffunc_1 name - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | op xmm0, qword [BASE] - |.endmacro - | - |.macro .ffunc_nsse, name - | .ffunc_nsse name, movsd - |.endmacro - | - |.macro .ffunc_nnsse, name - | .ffunc_2 name - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback - | movsd xmm0, qword [BASE] - | movsd xmm1, qword [BASE+8] - |.endmacro - | - |.macro .ffunc_nnr, name - | .ffunc_2 name - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback - | fld qword [BASE+8] - | fld qword [BASE] - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses label 1. - |.macro ffgccheck - | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] - | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] - | jb >1 - | call ->fff_gcstep - |1: - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc_1 assert - | mov RB, [BASE+4] - | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback - | mov PC, [BASE-4] - | mov MULTRES, RD - | mov [BASE-4], RB - | mov RB, [BASE] - | mov [BASE-8], RB - | sub RD, 2 - | jz >2 - | mov RA, BASE - |1: - | add RA, 8 - |.if X64 - | mov RBa, [RA] - | mov [RA-8], RBa - |.else - | mov RB, [RA+4] - | mov [RA-4], RB - | mov RB, [RA] - | mov [RA-8], RB - |.endif - | sub RD, 1 - | jnz <1 - |2: - | mov RD, MULTRES - | jmp ->fff_res_ - | - |.ffunc_1 type - | mov RB, [BASE+4] - |.if X64 - | mov RA, RB - | sar RA, 15 - | cmp RA, -2 - | je >3 - |.endif - | mov RC, ~LJ_TNUMX - | not RB - | cmp RC, RB - | cmova RC, RB - |2: - | mov CFUNC:RB, [BASE-8] - | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TSTR - | mov [BASE-8], STR:RC - | jmp ->fff_res1 - |.if X64 - |3: - | mov RC, ~LJ_TLIGHTUD - | jmp <2 - |.endif - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | mov RB, [BASE+4] - | mov PC, [BASE-4] - | cmp RB, LJ_TTAB; jne >6 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | mov TAB:RB, [BASE] - | mov TAB:RB, TAB:RB->metatable - |2: - | test TAB:RB, TAB:RB - | mov dword [BASE-4], LJ_TNIL - | jz ->fff_res1 - | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)] - | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. - | mov [BASE-8], TAB:RB - | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash - | imul RA, #NODE - | add NODE:RA, TAB:RB->node - |3: // Rearranged logic, because we expect _not_ to find the key. - | cmp dword NODE:RA->key.it, LJ_TSTR - | jne >4 - | cmp dword NODE:RA->key.gcr, STR:RC - | je >5 - |4: - | mov NODE:RA, NODE:RA->next - | test NODE:RA, NODE:RA - | jnz <3 - | jmp ->fff_res1 // Not found, keep default result. - |5: - | mov RB, [RA+4] - | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. - | mov RC, [RA] - | mov [BASE-4], RB // Return value of mt.__metatable. - | mov [BASE-8], RC - | jmp ->fff_res1 - | - |6: - | cmp RB, LJ_TUDATA; je <1 - |.if X64 - | cmp RB, LJ_TNUMX; ja >8 - | cmp RB, LJ_TISNUM; jbe >7 - | mov RB, LJ_TLIGHTUD - | jmp >8 - |7: - |.else - | cmp RB, LJ_TISNUM; ja >8 - |.endif - | mov RB, LJ_TNUMX - |8: - | not RB - | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])] - | jmp <2 - | - |.ffunc_2 setmetatable - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - | // Fast path: no mt for table yet and not clearing the mt. - | mov TAB:RB, [BASE] - | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback - | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback - | mov TAB:RC, [BASE+8] - | mov TAB:RB->metatable, TAB:RC - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TTAB // Return original table. - | mov [BASE-8], TAB:RB - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jz >1 - | // Possible write barrier. Table is black, but skip iswhite(mt) check. - | barrierback TAB:RB, RC - |1: - | jmp ->fff_res1 - | - |.ffunc_2 rawget - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - |.if X64WIN - | mov RB, BASE // Save BASE. - | lea CARG3d, [BASE+8] - | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. - | mov CARG1d, SAVE_L - |.elif X64 - | mov RB, BASE // Save BASE. - | mov CARG2d, [BASE] - | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. - | mov CARG1d, SAVE_L - |.else - | mov TAB:RD, [BASE] - | mov L:RB, SAVE_L - | mov ARG2, TAB:RD - | mov ARG1, L:RB - | mov RB, BASE // Save BASE. - | add BASE, 8 - | mov ARG3, BASE - |.endif - | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // cTValue * returned in eax (RD). - | mov BASE, RB // Restore BASE. - | // Copy table slot. - |.if X64 - | mov RBa, [RD] - | mov PC, [BASE-4] - | mov [BASE-8], RBa - |.else - | mov RB, [RD] - | mov RD, [RD+4] - | mov PC, [BASE-4] - | mov [BASE-8], RB - | mov [BASE-4], RD - |.endif - | jmp ->fff_res1 - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. - | cmp dword [BASE+4], LJ_TISNUM - |.if DUALNUM - | jne >1 - | mov RB, dword [BASE]; jmp ->fff_resi - |1: - | ja ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | mov PC, [BASE-4] - | cmp dword [BASE+4], LJ_TSTR; jne >3 - | // A __tostring method in the string base metatable is ignored. - | mov STR:RD, [BASE] - |2: - | mov dword [BASE-4], LJ_TSTR - | mov [BASE-8], STR:RD - | jmp ->fff_res1 - |3: // Handle numbers inline, unless a number base metatable is present. - | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback - | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 - | jne ->fff_fallback - | ffgccheck // Caveat: uses label 1. - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Add frame since C call can throw. - | mov SAVE_PC, PC // Redundant (but a defined value). - |.if X64 and not X64WIN - | mov FCARG2, BASE // Otherwise: FCARG2 == BASE - |.endif - | mov L:FCARG1, L:RB - |.if DUALNUM - | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o) - |.else - | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np) - |.endif - | // GCstr returned in eax (RD). - | mov BASE, L:RB->base - | jmp <2 - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc_1 next - | je >2 // Missing 2nd arg? - |1: - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Add frame since C call can throw. - | mov L:RB->top, BASE // Dummy frame length is ok. - | mov PC, [BASE-4] - |.if X64WIN - | lea CARG3d, [BASE+8] - | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. - | mov CARG1d, L:RB - |.elif X64 - | mov CARG2d, [BASE] - | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. - | mov CARG1d, L:RB - |.else - | mov TAB:RD, [BASE] - | mov ARG2, TAB:RD - | mov ARG1, L:RB - | add BASE, 8 - | mov ARG3, BASE - |.endif - | mov SAVE_PC, PC // Needed for ITERN fallback. - | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Flag returned in eax (RD). - | mov BASE, L:RB->base - | test RD, RD; jz >3 // End of traversal? - | // Copy key and value to results. - |.if X64 - | mov RBa, [BASE+8] - | mov RDa, [BASE+16] - | mov [BASE-8], RBa - | mov [BASE], RDa - |.else - | mov RB, [BASE+8] - | mov RD, [BASE+12] - | mov [BASE-8], RB - | mov [BASE-4], RD - | mov RB, [BASE+16] - | mov RD, [BASE+20] - | mov [BASE], RB - | mov [BASE+4], RD - |.endif - |->fff_res2: - | mov RD, 1+2 - | jmp ->fff_res - |2: // Set missing 2nd arg to nil. - | mov dword [BASE+12], LJ_TNIL - | jmp <1 - |3: // End of traversal: return nil. - | mov dword [BASE-4], LJ_TNIL - | jmp ->fff_res1 - | - |.ffunc_1 pairs - | mov TAB:RB, [BASE] - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback -#if LJ_52 - | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback -#endif - | mov CFUNC:RB, [BASE-8] - | mov CFUNC:RD, CFUNC:RB->upvalue[0] - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TFUNC - | mov [BASE-8], CFUNC:RD - | mov dword [BASE+12], LJ_TNIL - | mov RD, 1+3 - | jmp ->fff_res - | - |.ffunc_2 ipairs_aux - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - | cmp dword [BASE+12], LJ_TISNUM - |.if DUALNUM - | jne ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | mov PC, [BASE-4] - |.if DUALNUM - | mov RD, dword [BASE+8] - | add RD, 1 - | mov dword [BASE-4], LJ_TISNUM - | mov dword [BASE-8], RD - |.else - | movsd xmm0, qword [BASE+8] - | sseconst_1 xmm1, RBa - | addsd xmm0, xmm1 - | cvttsd2si RD, xmm0 - | movsd qword [BASE-8], xmm0 - |.endif - | mov TAB:RB, [BASE] - | cmp RD, TAB:RB->asize; jae >2 // Not in array part? - | shl RD, 3 - | add RD, TAB:RB->array - |1: - | cmp dword [RD+4], LJ_TNIL; je ->fff_res0 - | // Copy array slot. - |.if X64 - | mov RBa, [RD] - | mov [BASE], RBa - |.else - | mov RB, [RD] - | mov RD, [RD+4] - | mov [BASE], RB - | mov [BASE+4], RD - |.endif - | jmp ->fff_res2 - |2: // Check for empty hash part first. Otherwise call C function. - | cmp dword TAB:RB->hmask, 0; je ->fff_res0 - | mov FCARG1, TAB:RB - | mov RB, BASE // Save BASE. - | mov FCARG2, RD // Caveat: FCARG2 == BASE - | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) - | // cTValue * or NULL returned in eax (RD). - | mov BASE, RB - | test RD, RD - | jnz <1 - |->fff_res0: - | mov RD, 1+0 - | jmp ->fff_res - | - |.ffunc_1 ipairs - | mov TAB:RB, [BASE] - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback -#if LJ_52 - | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback -#endif - | mov CFUNC:RB, [BASE-8] - | mov CFUNC:RD, CFUNC:RB->upvalue[0] - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TFUNC - | mov [BASE-8], CFUNC:RD - |.if DUALNUM - | mov dword [BASE+12], LJ_TISNUM - | mov dword [BASE+8], 0 - |.else - | xorps xmm0, xmm0 - | movsd qword [BASE+8], xmm0 - |.endif - | mov RD, 1+3 - | jmp ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc_1 pcall - | lea RA, [BASE+8] - | sub NARGS:RD, 1 - | mov PC, 8+FRAME_PCALL - |1: - | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)] - | shr RB, HOOK_ACTIVE_SHIFT - | and RB, 1 - | add PC, RB // Remember active hook before pcall. - | jmp ->vm_call_dispatch - | - |.ffunc_2 xpcall - | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback - | mov RB, [BASE+4] // Swap function and traceback. - | mov [BASE+12], RB - | mov dword [BASE+4], LJ_TFUNC - | mov LFUNC:RB, [BASE] - | mov PC, [BASE+8] - | mov [BASE+8], LFUNC:RB - | mov [BASE], PC - | lea RA, [BASE+16] - | sub NARGS:RD, 2 - | mov PC, 16+FRAME_PCALL - | jmp <1 - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | mov L:RB, [BASE] - |.else - |.ffunc coroutine_wrap_aux - | mov CFUNC:RB, [BASE-8] - | mov L:RB, CFUNC:RB->upvalue[0].gcr - |.endif - | mov PC, [BASE-4] - | mov SAVE_PC, PC - |.if X64 - | mov TMP1, L:RB - |.else - | mov ARG1, L:RB - |.endif - |.if resume - | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback - |.endif - | cmp aword L:RB->cframe, 0; jne ->fff_fallback - | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback - | mov RA, L:RB->top - | je >1 // Status != LUA_YIELD (i.e. 0)? - | cmp RA, L:RB->base // Check for presence of initial func. - | je ->fff_fallback - |1: - |.if resume - | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread). - |.else - | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1). - |.endif - | cmp PC, L:RB->maxstack; ja ->fff_fallback - | mov L:RB->top, PC - | - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - |.if resume - | add BASE, 8 // Keep resumed thread in stack for GC. - |.endif - | mov L:RB->top, BASE - |.if resume - | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move. - |.else - | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move. - |.endif - | sub RBa, PCa // Relative to PC. - | - | cmp PC, RA - | je >3 - |2: // Move args to coroutine. - |.if X64 - | mov RCa, [PC+RB] - | mov [PC-8], RCa - |.else - | mov RC, [PC+RB+4] - | mov [PC-4], RC - | mov RC, [PC+RB] - | mov [PC-8], RC - |.endif - | sub PC, 8 - | cmp PC, RA - | jne <2 - |3: - |.if X64 - | mov CARG2d, RA - | mov CARG1d, TMP1 - |.else - | mov ARG2, RA - | xor RA, RA - | mov ARG4, RA - | mov ARG3, RA - |.endif - | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) - | - | mov L:RB, SAVE_L - |.if X64 - | mov L:PC, TMP1 - |.else - | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. - |.endif - | mov BASE, L:RB->base - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP - | - | cmp eax, LUA_YIELD - | ja >8 - |4: - | mov RA, L:PC->base - | mov KBASE, L:PC->top - | mov L:PC->top, RA // Clear coroutine stack. - | mov PC, KBASE - | sub PC, RA - | je >6 // No results? - | lea RD, [BASE+PC] - | shr PC, 3 - | cmp RD, L:RB->maxstack - | ja >9 // Need to grow stack? - | - | mov RB, BASE - | sub RBa, RAa - |5: // Move results from coroutine. - |.if X64 - | mov RDa, [RA] - | mov [RA+RB], RDa - |.else - | mov RD, [RA] - | mov [RA+RB], RD - | mov RD, [RA+4] - | mov [RA+RB+4], RD - |.endif - | add RA, 8 - | cmp RA, KBASE - | jne <5 - |6: - |.if resume - | lea RD, [PC+2] // nresults+1 = 1 + true + results. - | mov dword [BASE-4], LJ_TTRUE // Prepend true to results. - |.else - | lea RD, [PC+1] // nresults+1 = 1 + results. - |.endif - |7: - | mov PC, SAVE_PC - | mov MULTRES, RD - |.if resume - | mov RAa, -8 - |.else - | xor RA, RA - |.endif - | test PC, FRAME_TYPE - | jz ->BC_RET_Z - | jmp ->vm_return - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | mov dword [BASE-4], LJ_TFALSE // Prepend false to results. - | mov RA, L:PC->top - | sub RA, 8 - | mov L:PC->top, RA // Clear error from coroutine stack. - | // Copy error message. - |.if X64 - | mov RDa, [RA] - | mov [BASE], RDa - |.else - | mov RD, [RA] - | mov [BASE], RD - | mov RD, [RA+4] - | mov [BASE+4], RD - |.endif - | mov RD, 1+2 // nresults+1 = 1 + false + error. - | jmp <7 - |.else - | mov FCARG2, L:PC - | mov FCARG1, L:RB - | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co) - | // Error function does not return. - |.endif - | - |9: // Handle stack expansion on return from yield. - |.if X64 - | mov L:RA, TMP1 - |.else - | mov L:RA, ARG1 // The callee doesn't modify SAVE_L. - |.endif - | mov L:RA->top, KBASE // Undo coroutine stack clearing. - | mov FCARG2, PC - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - |.if X64 - | mov L:PC, TMP1 - |.else - | mov L:PC, ARG1 - |.endif - | mov BASE, L:RB->base - | jmp <4 // Retry the stack move. - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | mov L:RB, SAVE_L - | test aword L:RB->cframe, CFRAME_RESUME - | jz ->fff_fallback - | mov L:RB->base, BASE - | lea RD, [BASE+NARGS:RD*8-8] - | mov L:RB->top, RD - | xor RD, RD - | mov aword L:RB->cframe, RDa - | mov al, LUA_YIELD - | mov byte L:RB->status, al - | jmp ->vm_leave_unw - | - |//-- Math library ------------------------------------------------------- - | - |.if not DUALNUM - |->fff_resi: // Dummy. - |.endif - | - |->fff_resn: - | mov PC, [BASE-4] - | fstp qword [BASE-8] - | jmp ->fff_res1 - | - | .ffunc_1 math_abs - |.if DUALNUM - | cmp dword [BASE+4], LJ_TISNUM; jne >2 - | mov RB, dword [BASE] - | cmp RB, 0; jns ->fff_resi - | neg RB; js >1 - |->fff_resbit: - |->fff_resi: - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TISNUM - | mov dword [BASE-8], RB - | jmp ->fff_res1 - |1: - | mov PC, [BASE-4] - | mov dword [BASE-4], 0x41e00000 // 2^31. - | mov dword [BASE-8], 0 - | jmp ->fff_res1 - |2: - | ja ->fff_fallback - |.else - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - |.endif - | movsd xmm0, qword [BASE] - | sseconst_abs xmm1, RDa - | andps xmm0, xmm1 - |->fff_resxmm0: - | mov PC, [BASE-4] - | movsd qword [BASE-8], xmm0 - | // fallthrough - | - |->fff_res1: - | mov RD, 1+1 - |->fff_res: - | mov MULTRES, RD - |->fff_res_: - | test PC, FRAME_TYPE - | jnz >7 - |5: - | cmp PC_RB, RDL // More results expected? - | ja >6 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | movzx RA, PC_RA - | not RAa // Note: ~RA = -(RA+1) - | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 - | ins_next - | - |6: // Fill up results with nil. - | mov dword [BASE+RD*8-12], LJ_TNIL - | add RD, 1 - | jmp <5 - | - |7: // Non-standard return case. - | mov RAa, -8 // Results start at BASE+RA = BASE-8. - | jmp ->vm_return - | - |.if X64 - |.define fff_resfp, fff_resxmm0 - |.else - |.define fff_resfp, fff_resn - |.endif - | - |.macro math_round, func - | .ffunc math_ .. func - |.if DUALNUM - | cmp dword [BASE+4], LJ_TISNUM; jne >1 - | mov RB, dword [BASE]; jmp ->fff_resi - |1: - | ja ->fff_fallback - |.else - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - |.endif - | movsd xmm0, qword [BASE] - | call ->vm_ .. func .. _sse - |.if DUALNUM - | cvttsd2si RB, xmm0 - | cmp RB, 0x80000000 - | jne ->fff_resi - | cvtsi2sd xmm1, RB - | ucomisd xmm0, xmm1 - | jp ->fff_resxmm0 - | je ->fff_resi - |.endif - | jmp ->fff_resxmm0 - |.endmacro - | - | math_round floor - | math_round ceil - | - |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 - | - |.ffunc math_log - | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | movsd xmm0, qword [BASE] - |.if not X64 - | movsd FPARG1, xmm0 - |.endif - | mov RB, BASE - | call extern log - | mov BASE, RB - | jmp ->fff_resfp - | - |.macro math_extern, func - | .ffunc_nsse math_ .. func - |.if not X64 - | movsd FPARG1, xmm0 - |.endif - | mov RB, BASE - | call extern func - | mov BASE, RB - | jmp ->fff_resfp - |.endmacro - | - |.macro math_extern2, func - | .ffunc_nnsse math_ .. func - |.if not X64 - | movsd FPARG1, xmm0 - | movsd FPARG3, xmm1 - |.endif - | mov RB, BASE - | call extern func - | mov BASE, RB - | jmp ->fff_resfp - |.endmacro - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn - | - |.ffunc_1 math_frexp - | mov RB, [BASE+4] - | cmp RB, LJ_TISNUM; jae ->fff_fallback - | mov PC, [BASE-4] - | mov RC, [BASE] - | mov [BASE-4], RB; mov [BASE-8], RC - | shl RB, 1; cmp RB, 0xffe00000; jae >3 - | or RC, RB; jz >3 - | mov RC, 1022 - | cmp RB, 0x00200000; jb >4 - |1: - | shr RB, 21; sub RB, RC // Extract and unbias exponent. - | cvtsi2sd xmm0, RB - | mov RB, [BASE-4] - | and RB, 0x800fffff // Mask off exponent. - | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. - | mov [BASE-4], RB - |2: - | movsd qword [BASE], xmm0 - | mov RD, 1+2 - | jmp ->fff_res - |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. - | xorps xmm0, xmm0; jmp <2 - |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. - | movsd xmm0, qword [BASE] - | sseconst_hi xmm1, RBa, 43500000 // 2^54. - | mulsd xmm0, xmm1 - | movsd qword [BASE-8], xmm0 - | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 - | - |.ffunc_nsse math_modf - | mov RB, [BASE+4] - | mov PC, [BASE-4] - | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? - | movaps xmm4, xmm0 - | call ->vm_trunc_sse - | subsd xmm4, xmm0 - |1: - | movsd qword [BASE-8], xmm0 - | movsd qword [BASE], xmm4 - | mov RC, [BASE-4]; mov RB, [BASE+4] - | xor RC, RB; js >3 // Need to adjust sign? - |2: - | mov RD, 1+2 - | jmp ->fff_res - |3: - | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. - | jmp <2 - |4: - | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. - | - |.macro math_minmax, name, cmovop, sseop - | .ffunc name - | mov RA, 2 - | cmp dword [BASE+4], LJ_TISNUM - |.if DUALNUM - | jne >4 - | mov RB, dword [BASE] - |1: // Handle integers. - | cmp RA, RD; jae ->fff_resi - | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3 - | cmp RB, dword [BASE+RA*8-8] - | cmovop RB, dword [BASE+RA*8-8] - | add RA, 1 - | jmp <1 - |3: - | ja ->fff_fallback - | // Convert intermediate result to number and continue below. - | cvtsi2sd xmm0, RB - | jmp >6 - |4: - | ja ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | - | movsd xmm0, qword [BASE] - |5: // Handle numbers or integers. - | cmp RA, RD; jae ->fff_resxmm0 - | cmp dword [BASE+RA*8-4], LJ_TISNUM - |.if DUALNUM - | jb >6 - | ja ->fff_fallback - | cvtsi2sd xmm1, dword [BASE+RA*8-8] - | jmp >7 - |.else - | jae ->fff_fallback - |.endif - |6: - | movsd xmm1, qword [BASE+RA*8-8] - |7: - | sseop xmm0, xmm1 - | add RA, 1 - | jmp <5 - |.endmacro - | - | math_minmax math_min, cmovg, minsd - | math_minmax math_max, cmovl, maxsd - | - |//-- String library ----------------------------------------------------- - | - |.ffunc string_byte // Only handle the 1-arg case here. - | cmp NARGS:RD, 1+1; jne ->fff_fallback - | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback - | mov STR:RB, [BASE] - | mov PC, [BASE-4] - | cmp dword STR:RB->len, 1 - | jb ->fff_res0 // Return no results for empty string. - | movzx RB, byte STR:RB[1] - |.if DUALNUM - | jmp ->fff_resi - |.else - | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 - |.endif - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. - | cmp dword [BASE+4], LJ_TISNUM - |.if DUALNUM - | jne ->fff_fallback - | mov RB, dword [BASE] - | cmp RB, 255; ja ->fff_fallback - | mov TMP2, RB - |.else - | jae ->fff_fallback - | cvttsd2si RB, qword [BASE] - | cmp RB, 255; ja ->fff_fallback - | mov TMP2, RB - |.endif - |.if X64 - | mov TMP3, 1 - |.else - | mov ARG3, 1 - |.endif - | lea RDa, TMP2 // Points to stack. Little-endian. - |->fff_newstr: - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - |.if X64 - | mov CARG3d, TMP3 // Zero-extended to size_t. - | mov CARG2, RDa // May be 64 bit ptr to stack. - | mov CARG1d, L:RB - |.else - | mov ARG2, RD - | mov ARG1, L:RB - |.endif - | mov SAVE_PC, PC - | call extern lj_str_new // (lua_State *L, char *str, size_t l) - |->fff_resstr: - | // GCstr * returned in eax (RD). - | mov BASE, L:RB->base - | mov PC, [BASE-4] - | mov dword [BASE-4], LJ_TSTR - | mov [BASE-8], STR:RD - | jmp ->fff_res1 - | - |.ffunc string_sub - | ffgccheck - | mov TMP2, -1 - | cmp NARGS:RD, 1+2; jb ->fff_fallback - | jna >1 - | cmp dword [BASE+20], LJ_TISNUM - |.if DUALNUM - | jne ->fff_fallback - | mov RB, dword [BASE+16] - | mov TMP2, RB - |.else - | jae ->fff_fallback - | cvttsd2si RB, qword [BASE+16] - | mov TMP2, RB - |.endif - |1: - | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback - | cmp dword [BASE+12], LJ_TISNUM - |.if DUALNUM - | jne ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | mov STR:RB, [BASE] - | mov TMP3, STR:RB - | mov RB, STR:RB->len - |.if DUALNUM - | mov RA, dword [BASE+8] - |.else - | cvttsd2si RA, qword [BASE+8] - |.endif - | mov RC, TMP2 - | cmp RB, RC // len < end? (unsigned compare) - | jb >5 - |2: - | test RA, RA // start <= 0? - | jle >7 - |3: - | mov STR:RB, TMP3 - | sub RC, RA // start > end? - | jl ->fff_emptystr - | lea RB, [STR:RB+RA+#STR-1] - | add RC, 1 - |4: - |.if X64 - | mov TMP3, RC - |.else - | mov ARG3, RC - |.endif - | mov RD, RB - | jmp ->fff_newstr - | - |5: // Negative end or overflow. - | jl >6 - | lea RC, [RC+RB+1] // end = end+(len+1) - | jmp <2 - |6: // Overflow. - | mov RC, RB // end = len - | jmp <2 - | - |7: // Negative start or underflow. - | je >8 - | add RA, RB // start = start+(len+1) - | add RA, 1 - | jg <3 // start > 0? - |8: // Underflow. - | mov RA, 1 // start = 1 - | jmp <3 - | - |->fff_emptystr: // Range underflow. - | xor RC, RC // Zero length. Any ptr in RB is ok. - | jmp <4 - | - |.macro ffstring_op, name - | .ffunc_1 string_ .. name - | ffgccheck - | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback - | mov L:RB, SAVE_L - | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] - | mov L:RB->base, BASE - | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE - | mov RC, SBUF:FCARG1->b - | mov SBUF:FCARG1->L, L:RB - | mov SBUF:FCARG1->p, RC - | mov SAVE_PC, PC - | call extern lj_buf_putstr_ .. name .. @8 - | mov FCARG1, eax - | call extern lj_buf_tostr@4 - | jmp ->fff_resstr - |.endmacro - | - |ffstring_op reverse - |ffstring_op lower - |ffstring_op upper - | - |//-- Bit library -------------------------------------------------------- - | - |.macro .ffunc_bit, name, kind, fdef - | fdef name - |.if kind == 2 - | sseconst_tobit xmm1, RBa - |.endif - | cmp dword [BASE+4], LJ_TISNUM - |.if DUALNUM - | jne >1 - | mov RB, dword [BASE] - |.if kind > 0 - | jmp >2 - |.else - | jmp ->fff_resbit - |.endif - |1: - | ja ->fff_fallback - |.else - | jae ->fff_fallback - |.endif - | movsd xmm0, qword [BASE] - |.if kind < 2 - | sseconst_tobit xmm1, RBa - |.endif - | addsd xmm0, xmm1 - | movd RB, xmm0 - |2: - |.endmacro - | - |.macro .ffunc_bit, name, kind - | .ffunc_bit name, kind, .ffunc_1 - |.endmacro - | - |.ffunc_bit bit_tobit, 0 - | jmp ->fff_resbit - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name, 2 - | mov TMP2, NARGS:RD // Save for fallback. - | lea RD, [BASE+NARGS:RD*8-16] - |1: - | cmp RD, BASE - | jbe ->fff_resbit - | cmp dword [RD+4], LJ_TISNUM - |.if DUALNUM - | jne >2 - | ins RB, dword [RD] - | sub RD, 8 - | jmp <1 - |2: - | ja ->fff_fallback_bit_op - |.else - | jae ->fff_fallback_bit_op - |.endif - | movsd xmm0, qword [RD] - | addsd xmm0, xmm1 - | movd RA, xmm0 - | ins RB, RA - | sub RD, 8 - | jmp <1 - |.endmacro - | - |.ffunc_bit_op bit_band, and - |.ffunc_bit_op bit_bor, or - |.ffunc_bit_op bit_bxor, xor - | - |.ffunc_bit bit_bswap, 1 - | bswap RB - | jmp ->fff_resbit - | - |.ffunc_bit bit_bnot, 1 - | not RB - |.if DUALNUM - | jmp ->fff_resbit - |.else - |->fff_resbit: - | cvtsi2sd xmm0, RB - | jmp ->fff_resxmm0 - |.endif - | - |->fff_fallback_bit_op: - | mov NARGS:RD, TMP2 // Restore for fallback - | jmp ->fff_fallback - | - |.macro .ffunc_bit_sh, name, ins - |.if DUALNUM - | .ffunc_bit name, 1, .ffunc_2 - | // Note: no inline conversion from number for 2nd argument! - | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback - | mov RA, dword [BASE+8] - |.else - | .ffunc_nnsse name - | sseconst_tobit xmm2, RBa - | addsd xmm0, xmm2 - | addsd xmm1, xmm2 - | movd RB, xmm0 - | movd RA, xmm1 - |.endif - | ins RB, cl // Assumes RA is ecx. - | jmp ->fff_resbit - |.endmacro - | - |.ffunc_bit_sh bit_lshift, shl - |.ffunc_bit_sh bit_rshift, shr - |.ffunc_bit_sh bit_arshift, sar - |.ffunc_bit_sh bit_rol, rol - |.ffunc_bit_sh bit_ror, ror - | - |//----------------------------------------------------------------------- - | - |->fff_fallback_2: - | mov NARGS:RD, 1+2 // Other args are ignored, anyway. - | jmp ->fff_fallback - |->fff_fallback_1: - | mov NARGS:RD, 1+1 // Other args are ignored, anyway. - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RD = nargs+1 - | mov L:RB, SAVE_L - | mov PC, [BASE-4] // Fallback may overwrite PC. - | mov SAVE_PC, PC // Redundant (but a defined value). - | mov L:RB->base, BASE - | lea RD, [BASE+NARGS:RD*8-8] - | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler. - | mov L:RB->top, RD - | mov CFUNC:RD, [BASE-8] - | cmp RA, L:RB->maxstack - | ja >5 // Need to grow stack. - |.if X64 - | mov CARG1d, L:RB - |.else - | mov ARG1, L:RB - |.endif - | call aword CFUNC:RD->f // (lua_State *L) - | mov BASE, L:RB->base - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | test RD, RD; jg ->fff_res // Returned nresults+1? - |1: - | mov RA, L:RB->top - | sub RA, BASE - | shr RA, 3 - | test RD, RD - | lea NARGS:RD, [RA+1] - | mov LFUNC:RB, [BASE-8] - | jne ->vm_call_tail // Returned -1? - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | mov RA, BASE - | test PC, FRAME_TYPE - | jnz >3 - | movzx RB, PC_RA - | not RBa // Note: ~RB = -(RB+1) - | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8 - | jmp ->vm_call_dispatch // Resolve again for tailcall. - |3: - | mov RB, PC - | and RB, -8 - | sub BASE, RB - | jmp ->vm_call_dispatch // Resolve again for tailcall. - | - |5: // Grow stack for fallback handler. - | mov FCARG2, LUA_MINSTACK - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - | mov BASE, L:RB->base - | xor RD, RD // Simulate a return 0. - | jmp <1 // Dumb retry (goes through ff first). - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RD = nargs+1 - | pop RBa // Must keep stack at same level. - | mov TMPa, RBa // Save return address - | mov L:RB, SAVE_L - | mov SAVE_PC, PC // Redundant (but a defined value). - | mov L:RB->base, BASE - | lea RD, [BASE+NARGS:RD*8-8] - | mov FCARG1, L:RB - | mov L:RB->top, RD - | call extern lj_gc_step@4 // (lua_State *L) - | mov BASE, L:RB->base - | mov RD, L:RB->top - | sub RD, BASE - | shr RD, 3 - | add NARGS:RD, 1 - | mov RBa, TMPa - | push RBa // Restore return address. - | ret - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] - | test RDL, HOOK_VMEVENT // No recording while in vmevent. - | jnz >5 - | // Decrement the hookcount for consistency, but always do the call. - | test RDL, HOOK_ACTIVE - | jnz >1 - | test RDL, LUA_MASKLINE|LUA_MASKCOUNT - | jz >1 - | dec dword [DISPATCH+DISPATCH_GL(hookcount)] - | jmp >1 - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] - | test RDL, HOOK_ACTIVE // Hook already active? - | jnz >5 - | jmp >1 - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] - | test RDL, HOOK_ACTIVE // Hook already active? - | jnz >5 - | - | test RDL, LUA_MASKLINE|LUA_MASKCOUNT - | jz >5 - | dec dword [DISPATCH+DISPATCH_GL(hookcount)] - | jz >1 - | test RDL, LUA_MASKLINE - | jz >5 - |1: - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG2, PC // Caveat: FCARG2 == BASE - | mov FCARG1, L:RB - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc) - |3: - | mov BASE, L:RB->base - |4: - | movzx RA, PC_RA - |5: - | movzx OP, PC_OP - | movzx RD, PC_RD - |.if X64 - | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. - |.else - | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins. - |.endif - | - |->cont_hook: // Continue from hook yield. - | add PC, 4 - | mov RA, [RB-24] - | mov MULTRES, RA // Restore MULTRES for *M ins. - | jmp <4 - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L). - | mov RB, LFUNC:RB->pc - | movzx RD, byte [RB+PC2PROTO(framesize)] - | lea RD, [BASE+RD*8] - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov L:RB->top, RD - | mov FCARG2, PC - | lea FCARG1, [DISPATCH+GG_DISP2J] - | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa - | mov SAVE_PC, PC - | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc) - | jmp <3 - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - | mov SAVE_PC, PC - |.if JIT - | jmp >1 - |.endif - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | mov SAVE_PC, PC - | or PC, 1 // Marker for hot call. - |1: - |.endif - | lea RD, [BASE+NARGS:RD*8-8] - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov L:RB->top, RD - | mov FCARG2, PC - | mov FCARG1, L:RB - | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc) - | // ASMFunction returned in eax/rax (RDa). - | mov SAVE_PC, 0 // Invalidate for subsequent line hook. - |.if JIT - | and PC, -2 - |.endif - | mov BASE, L:RB->base - | mov RAa, RDa - | mov RD, L:RB->top - | sub RD, BASE - | mov RBa, RAa - | movzx RA, PC_RA - | shr RD, 3 - | add NARGS:RD, 1 - | jmp RBa - | - |->cont_stitch: // Trace stitching. - |.if JIT - | // BASE = base, RC = result, RB = mbase - | mov TRACE:RA, [RB-24] // Save previous trace. - | mov TMP1, TRACE:RA - | mov TMP3, DISPATCH // Need one more register. - | mov DISPATCH, MULTRES - | movzx RA, PC_RA - | lea RA, [BASE+RA*8] // Call base. - | sub DISPATCH, 1 - | jz >2 - |1: // Move results down. - |.if X64 - | mov RBa, [RC] - | mov [RA], RBa - |.else - | mov RB, [RC] - | mov [RA], RB - | mov RB, [RC+4] - | mov [RA+4], RB - |.endif - | add RC, 8 - | add RA, 8 - | sub DISPATCH, 1 - | jnz <1 - |2: - | movzx RC, PC_RA - | movzx RB, PC_RB - | add RC, RB - | lea RC, [BASE+RC*8-8] - |3: - | cmp RC, RA - | ja >9 // More results wanted? - | - | mov DISPATCH, TMP3 - | mov TRACE:RD, TMP1 // Get previous trace. - | movzx RB, word TRACE:RD->traceno - | movzx RD, word TRACE:RD->link - | cmp RD, RB - | je ->cont_nop // Blacklisted. - | test RD, RD - | jne =>BC_JLOOP // Jump to stitched trace. - | - | // Stitch a new trace to the previous trace. - | mov [DISPATCH+DISPATCH_J(exitno)], RB - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG2, PC - | lea FCARG1, [DISPATCH+GG_DISP2J] - | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa - | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc) - | mov BASE, L:RB->base - | jmp ->cont_nop - | - |9: // Fill up results with nil. - | mov dword [RA+4], LJ_TNIL - | add RA, 8 - | jmp <3 - |.endif - | - |->vm_profhook: // Dispatch target for profiler hook. -#if LJ_HASPROFILE - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov FCARG2, PC // Caveat: FCARG2 == BASE - | mov FCARG1, L:RB - | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc) - | mov BASE, L:RB->base - | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. - | sub PC, 4 - | jmp ->cont_nop -#endif - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Called from an exit stub with the exit number on the stack. - |// The 16 bit exit number is stored with two (sign-extended) push imm8. - |->vm_exit_handler: - |.if JIT - |.if X64 - | push r13; push r12 - | push r11; push r10; push r9; push r8 - | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp - | push rbx; push rdx; push rcx; push rax - | movzx RC, byte [rbp-8] // Reconstruct exit number. - | mov RCH, byte [rbp-16] - | mov [rbp-8], r15; mov [rbp-16], r14 - |.else - | push ebp; lea ebp, [esp+12]; push ebp - | push ebx; push edx; push ecx; push eax - | movzx RC, byte [ebp-4] // Reconstruct exit number. - | mov RCH, byte [ebp-8] - | mov [ebp-4], edi; mov [ebp-8], esi - |.endif - | // Caveat: DISPATCH is ebx. - | mov DISPATCH, [ebp] - | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. - | set_vmstate EXIT - | mov [DISPATCH+DISPATCH_J(exitno)], RC - | mov [DISPATCH+DISPATCH_J(parent)], RA - |.if X64 - |.if X64WIN - | sub rsp, 16*8+4*8 // Room for SSE regs + save area. - |.else - | sub rsp, 16*8 // Room for SSE regs. - |.endif - | add rbp, -128 - | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 - | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 - | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10 - | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8 - | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6 - | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4 - | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2 - | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0 - |.else - | sub esp, 8*8+16 // Room for SSE regs + args. - | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6 - | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4 - | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2 - | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 - |.endif - | // Caveat: RB is ebp. - | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] - | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] - | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa - | mov L:RB->base, BASE - |.if X64WIN - | lea CARG2, [rsp+4*8] - |.elif X64 - | mov CARG2, rsp - |.else - | lea FCARG2, [esp+16] - |.endif - | lea FCARG1, [DISPATCH+GG_DISP2J] - | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 - | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) - | // MULTRES or negated error code returned in eax (RD). - | mov RAa, L:RB->cframe - | and RAa, CFRAME_RAWMASK - |.if X64WIN - | // Reposition stack later. - |.elif X64 - | mov rsp, RAa // Reposition stack to C frame. - |.else - | mov esp, RAa // Reposition stack to C frame. - |.endif - | mov [RAa+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield). - | mov BASE, L:RB->base - | mov PC, [RAa+CFRAME_OFS_PC] // Get SAVE_PC. - |.if X64 - | jmp >1 - |.endif - |.endif - |->vm_exit_interp: - | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. - |.if JIT - |.if X64 - | // Restore additional callee-save registers only used in compiled code. - |.if X64WIN - | lea RAa, [rsp+9*16+4*8] - |1: - | movdqa xmm15, [RAa-9*16] - | movdqa xmm14, [RAa-8*16] - | movdqa xmm13, [RAa-7*16] - | movdqa xmm12, [RAa-6*16] - | movdqa xmm11, [RAa-5*16] - | movdqa xmm10, [RAa-4*16] - | movdqa xmm9, [RAa-3*16] - | movdqa xmm8, [RAa-2*16] - | movdqa xmm7, [RAa-1*16] - | mov rsp, RAa // Reposition stack to C frame. - | movdqa xmm6, [RAa] - | mov r15, CSAVE_3 - | mov r14, CSAVE_4 - |.else - | add rsp, 16 // Reposition stack to C frame. - |1: - |.endif - | mov r13, TMPa - | mov r12, TMPQ - |.endif - | test RD, RD; js >9 // Check for error from exit. - | mov L:RB, SAVE_L - | mov MULTRES, RD - | mov LFUNC:KBASE, [BASE-8] - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | mov L:RB->base, BASE - | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 - | set_vmstate INTERP - | // Modified copy of ins_next which handles function header dispatch, too. - | mov RC, [PC] - | movzx RA, RCH - | movzx OP, RCL - | add PC, 4 - | shr RC, 16 - | cmp OP, BC_FUNCF // Function header? - | jb >3 - | cmp OP, BC_FUNCC+2 // Fast function? - | jae >4 - |2: - | mov RC, MULTRES // RC/RD holds nres+1. - |3: - |.if X64 - | jmp aword [DISPATCH+OP*8] - |.else - | jmp aword [DISPATCH+OP*4] - |.endif - | - |4: // Check frame below fast function. - | mov RC, [BASE-4] - | test RC, FRAME_TYPE - | jnz <2 // Trace stitching continuation? - | // Otherwise set KBASE for Lua function below fast function. - | movzx RC, byte [RC-3] - | not RCa - | mov LFUNC:KBASE, [BASE+RC*8-8] - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | jmp <2 - | - |9: // Rethrow error from the right C frame. - | neg RD - | mov FCARG1, L:RB - | mov FCARG2, RD - | call extern lj_err_throw@8 // (lua_State *L, int errcode) - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// FP value rounding. Called by math.floor/math.ceil fast functions - |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. - |.macro vm_round, name, mode, cond - |->name: - |.if not X64 and cond - | movsd xmm0, qword [esp+4] - | call ->name .. _sse - | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. - | fld qword [esp+4] - | ret - |.endif - | - |->name .. _sse: - | sseconst_abs xmm2, RDa - | sseconst_2p52 xmm3, RDa - | movaps xmm1, xmm0 - | andpd xmm1, xmm2 // |x| - | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|. - | jbe >1 - | andnpd xmm2, xmm0 // Isolate sign bit. - |.if mode == 2 // trunc(x)? - | movaps xmm0, xmm1 - | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 - | subsd xmm1, xmm3 - | sseconst_1 xmm3, RDa - | cmpsd xmm0, xmm1, 1 // |x| < result? - | andpd xmm0, xmm3 - | subsd xmm1, xmm0 // If yes, subtract -1. - | orpd xmm1, xmm2 // Merge sign bit back in. - |.else - | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 - | subsd xmm1, xmm3 - | orpd xmm1, xmm2 // Merge sign bit back in. - | .if mode == 1 // ceil(x)? - | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0. - | cmpsd xmm0, xmm1, 6 // x > result? - | .else // floor(x)? - | sseconst_1 xmm2, RDa - | cmpsd xmm0, xmm1, 1 // x < result? - | .endif - | andpd xmm0, xmm2 - | subsd xmm1, xmm0 // If yes, subtract +-1. - |.endif - | movaps xmm0, xmm1 - |1: - | ret - |.endmacro - | - | vm_round vm_floor, 0, 1 - | vm_round vm_ceil, 1, JIT - | vm_round vm_trunc, 2, JIT - | - |// FP modulo x%y. Called by BC_MOD* and vm_arith. - |->vm_mod: - |// Args in xmm0/xmm1, return value in xmm0. - |// Caveat: xmm0-xmm5 and RC (eax) modified! - | movaps xmm5, xmm0 - | divsd xmm0, xmm1 - | sseconst_abs xmm2, RDa - | sseconst_2p52 xmm3, RDa - | movaps xmm4, xmm0 - | andpd xmm4, xmm2 // |x/y| - | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. - | jbe >1 - | andnpd xmm2, xmm0 // Isolate sign bit. - | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 - | subsd xmm4, xmm3 - | orpd xmm4, xmm2 // Merge sign bit back in. - | sseconst_1 xmm2, RDa - | cmpsd xmm0, xmm4, 1 // x/y < result? - | andpd xmm0, xmm2 - | subsd xmm4, xmm0 // If yes, subtract 1.0. - | movaps xmm0, xmm5 - | mulsd xmm1, xmm4 - | subsd xmm0, xmm1 - | ret - |1: - | mulsd xmm1, xmm0 - | movaps xmm0, xmm5 - | subsd xmm0, xmm1 - | ret - | - |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. - |->vm_powi_sse: - | cmp eax, 1; jle >6 // i<=1? - | // Now 1 < (unsigned)i <= 0x80000000. - |1: // Handle leading zeros. - | test eax, 1; jnz >2 - | mulsd xmm0, xmm0 - | shr eax, 1 - | jmp <1 - |2: - | shr eax, 1; jz >5 - | movaps xmm1, xmm0 - |3: // Handle trailing bits. - | mulsd xmm0, xmm0 - | shr eax, 1; jz >4 - | jnc <3 - | mulsd xmm1, xmm0 - | jmp <3 - |4: - | mulsd xmm0, xmm1 - |5: - | ret - |6: - | je <5 // x^1 ==> x - | jb >7 // x^0 ==> 1 - | neg eax - | call <1 - | sseconst_1 xmm1, RDa - | divsd xmm1, xmm0 - | movaps xmm0, xmm1 - | ret - |7: - | sseconst_1 xmm0, RDa - | ret - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) - |->vm_cpuid: - |.if X64 - | mov eax, CARG1d - | .if X64WIN; push rsi; mov rsi, CARG2; .endif - | push rbx - | xor ecx, ecx - | cpuid - | mov [rsi], eax - | mov [rsi+4], ebx - | mov [rsi+8], ecx - | mov [rsi+12], edx - | pop rbx - | .if X64WIN; pop rsi; .endif - | ret - |.else - | pushfd - | pop edx - | mov ecx, edx - | xor edx, 0x00200000 // Toggle ID bit in flags. - | push edx - | popfd - | pushfd - | pop edx - | xor eax, eax // Zero means no features supported. - | cmp ecx, edx - | jz >1 // No ID toggle means no CPUID support. - | mov eax, [esp+4] // Argument 1 is function number. - | push edi - | push ebx - | xor ecx, ecx - | cpuid - | mov edi, [esp+16] // Argument 2 is result area. - | mov [edi], eax - | mov [edi+4], ebx - | mov [edi+8], ecx - | mov [edi+12], edx - | pop ebx - | pop edi - |1: - | ret - |.endif - | - |//----------------------------------------------------------------------- - |//-- Assertions --------------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->assert_bad_for_arg_type: -#ifdef LUA_USE_ASSERT - | int3 -#endif - | int3 - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |// Handler for callback functions. Callback slot number in ah/al. - |->vm_ffi_callback: - |.if FFI - |.type CTSTATE, CTState, PC - |.if not X64 - | sub esp, 16 // Leave room for SAVE_ERRF etc. - |.endif - | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. - | lea DISPATCH, [ebp+GG_G2DISP] - | mov CTSTATE, GL:ebp->ctype_state - | movzx eax, ax - | mov CTSTATE->cb.slot, eax - |.if X64 - | mov CTSTATE->cb.gpr[0], CARG1 - | mov CTSTATE->cb.gpr[1], CARG2 - | mov CTSTATE->cb.gpr[2], CARG3 - | mov CTSTATE->cb.gpr[3], CARG4 - | movsd qword CTSTATE->cb.fpr[0], xmm0 - | movsd qword CTSTATE->cb.fpr[1], xmm1 - | movsd qword CTSTATE->cb.fpr[2], xmm2 - | movsd qword CTSTATE->cb.fpr[3], xmm3 - |.if X64WIN - | lea rax, [rsp+CFRAME_SIZE+4*8] - |.else - | lea rax, [rsp+CFRAME_SIZE] - | mov CTSTATE->cb.gpr[4], CARG5 - | mov CTSTATE->cb.gpr[5], CARG6 - | movsd qword CTSTATE->cb.fpr[4], xmm4 - | movsd qword CTSTATE->cb.fpr[5], xmm5 - | movsd qword CTSTATE->cb.fpr[6], xmm6 - | movsd qword CTSTATE->cb.fpr[7], xmm7 - |.endif - | mov CTSTATE->cb.stack, rax - | mov CARG2, rsp - |.else - | lea eax, [esp+CFRAME_SIZE+16] - | mov CTSTATE->cb.gpr[0], FCARG1 - | mov CTSTATE->cb.gpr[1], FCARG2 - | mov CTSTATE->cb.stack, eax - | mov FCARG1, [esp+CFRAME_SIZE+12] // Move around misplaced retaddr/ebp. - | mov FCARG2, [esp+CFRAME_SIZE+8] - | mov SAVE_RET, FCARG1 - | mov SAVE_R4, FCARG2 - | mov FCARG2, esp - |.endif - | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. - | mov FCARG1, CTSTATE - | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf) - | // lua_State * returned in eax (RD). - | set_vmstate INTERP - | mov BASE, L:RD->base - | mov RD, L:RD->top - | sub RD, BASE - | mov LFUNC:RB, [BASE-8] - | shr RD, 3 - | add RD, 1 - | ins_callt - |.endif - | - |->cont_ffi_callback: // Return from FFI callback. - |.if FFI - | mov L:RA, SAVE_L - | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] - | mov aword CTSTATE->L, L:RAa - | mov L:RA->base, BASE - | mov L:RA->top, RB - | mov FCARG1, CTSTATE - | mov FCARG2, RC - | call extern lj_ccallback_leave@8 // (CTState *cts, TValue *o) - |.if X64 - | mov rax, CTSTATE->cb.gpr[0] - | movsd xmm0, qword CTSTATE->cb.fpr[0] - | jmp ->vm_leave_unw - |.else - | mov L:RB, SAVE_L - | mov eax, CTSTATE->cb.gpr[0] - | mov edx, CTSTATE->cb.gpr[1] - | cmp dword CTSTATE->cb.gpr[2], 1 - | jb >7 - | je >6 - | fld qword CTSTATE->cb.fpr[0].d - | jmp >7 - |6: - | fld dword CTSTATE->cb.fpr[0].f - |7: - | mov ecx, L:RB->top - | movzx ecx, word [ecx+6] // Get stack adjustment and copy up. - | mov SAVE_L, ecx // Must be one slot above SAVE_RET - | restoreregs - | pop ecx // Move return addr from SAVE_RET. - | add esp, [esp] // Adjust stack. - | add esp, 16 - | push ecx - | ret - |.endif - |.endif - | - |->vm_ffi_call@4: // Call C function via FFI. - | // Caveat: needs special frame unwinding, see below. - |.if FFI - |.if X64 - | .type CCSTATE, CCallState, rbx - | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 - |.else - | .type CCSTATE, CCallState, ebx - | push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1 - |.endif - | - | // Readjust stack. - |.if X64 - | mov eax, CCSTATE->spadj - | sub rsp, rax - |.else - | sub esp, CCSTATE->spadj - |.if WIN - | mov CCSTATE->spadj, esp - |.endif - |.endif - | - | // Copy stack slots. - | movzx ecx, byte CCSTATE->nsp - | sub ecx, 1 - | js >2 - |1: - |.if X64 - | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] - | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax - |.else - | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)] - | mov [esp+ecx*4], eax - |.endif - | sub ecx, 1 - | jns <1 - |2: - | - |.if X64 - | movzx eax, byte CCSTATE->nfpr - | mov CARG1, CCSTATE->gpr[0] - | mov CARG2, CCSTATE->gpr[1] - | mov CARG3, CCSTATE->gpr[2] - | mov CARG4, CCSTATE->gpr[3] - |.if not X64WIN - | mov CARG5, CCSTATE->gpr[4] - | mov CARG6, CCSTATE->gpr[5] - |.endif - | test eax, eax; jz >5 - | movaps xmm0, CCSTATE->fpr[0] - | movaps xmm1, CCSTATE->fpr[1] - | movaps xmm2, CCSTATE->fpr[2] - | movaps xmm3, CCSTATE->fpr[3] - |.if not X64WIN - | cmp eax, 4; jbe >5 - | movaps xmm4, CCSTATE->fpr[4] - | movaps xmm5, CCSTATE->fpr[5] - | movaps xmm6, CCSTATE->fpr[6] - | movaps xmm7, CCSTATE->fpr[7] - |.endif - |5: - |.else - | mov FCARG1, CCSTATE->gpr[0] - | mov FCARG2, CCSTATE->gpr[1] - |.endif - | - | call aword CCSTATE->func - | - |.if X64 - | mov CCSTATE->gpr[0], rax - | movaps CCSTATE->fpr[0], xmm0 - |.if not X64WIN - | mov CCSTATE->gpr[1], rdx - | movaps CCSTATE->fpr[1], xmm1 - |.endif - |.else - | mov CCSTATE->gpr[0], eax - | mov CCSTATE->gpr[1], edx - | cmp byte CCSTATE->resx87, 1 - | jb >7 - | je >6 - | fstp qword CCSTATE->fpr[0].d[0] - | jmp >7 - |6: - | fstp dword CCSTATE->fpr[0].f[0] - |7: - |.if WIN - | sub CCSTATE->spadj, esp - |.endif - |.endif - | - |.if X64 - | mov rbx, [rbp-8]; leave; ret - |.else - | mov ebx, [ebp-4]; leave; ret - |.endif - |.endif - |// Note: vm_ffi_call must be the last function in this object file! - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |// Note: aligning all instructions does not pay off. - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - |.macro jmp_comp, lt, ge, le, gt, target - ||switch (op) { - ||case BC_ISLT: - | lt target - ||break; - ||case BC_ISGE: - | ge target - ||break; - ||case BC_ISLE: - | le target - ||break; - ||case BC_ISGT: - | gt target - ||break; - ||default: break; /* Shut up GCC. */ - ||} - |.endmacro - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1, RD = src2, JMP with RD = target - | ins_AD - |.if DUALNUM - | checkint RA, >7 - | checkint RD, >8 - | mov RB, dword [BASE+RA*8] - | add PC, 4 - | cmp RB, dword [BASE+RD*8] - | jmp_comp jge, jl, jg, jle, >9 - |6: - | movzx RD, PC_RD - | branchPC RD - |9: - | ins_next - | - |7: // RA is not an integer. - | ja ->vmeta_comp - | // RA is a number. - | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp - | // RA is a number, RD is an integer. - | cvtsi2sd xmm0, dword [BASE+RD*8] - | jmp >2 - | - |8: // RA is an integer, RD is not an integer. - | ja ->vmeta_comp - | // RA is an integer, RD is a number. - | cvtsi2sd xmm1, dword [BASE+RA*8] - | movsd xmm0, qword [BASE+RD*8] - | add PC, 4 - | ucomisd xmm0, xmm1 - | jmp_comp jbe, ja, jb, jae, <9 - | jmp <6 - |.else - | checknum RA, ->vmeta_comp - | checknum RD, ->vmeta_comp - |.endif - |1: - | movsd xmm0, qword [BASE+RD*8] - |2: - | add PC, 4 - | ucomisd xmm0, qword [BASE+RA*8] - |3: - | // Unordered: all of ZF CF PF set, ordered: PF clear. - | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. - |.if DUALNUM - | jmp_comp jbe, ja, jb, jae, <9 - | jmp <6 - |.else - | jmp_comp jbe, ja, jb, jae, >1 - | movzx RD, PC_RD - | branchPC RD - |1: - | ins_next - |.endif - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | ins_AD // RA = src1, RD = src2, JMP with RD = target - | mov RB, [BASE+RD*8+4] - | add PC, 4 - |.if DUALNUM - | cmp RB, LJ_TISNUM; jne >7 - | checkint RA, >8 - | mov RB, dword [BASE+RD*8] - | cmp RB, dword [BASE+RA*8] - if (vk) { - | jne >9 - } else { - | je >9 - } - | movzx RD, PC_RD - | branchPC RD - |9: - | ins_next - | - |7: // RD is not an integer. - | ja >5 - | // RD is a number. - | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 - | // RD is a number, RA is an integer. - | cvtsi2sd xmm0, dword [BASE+RA*8] - | jmp >2 - | - |8: // RD is an integer, RA is not an integer. - | ja >5 - | // RD is an integer, RA is a number. - | cvtsi2sd xmm0, dword [BASE+RD*8] - | ucomisd xmm0, qword [BASE+RA*8] - | jmp >4 - | - |.else - | cmp RB, LJ_TISNUM; jae >5 - | checknum RA, >5 - |.endif - |1: - | movsd xmm0, qword [BASE+RA*8] - |2: - | ucomisd xmm0, qword [BASE+RD*8] - |4: - iseqne_fp: - if (vk) { - | jp >2 // Unordered means not equal. - | jne >2 - } else { - | jp >2 // Unordered means not equal. - | je >1 - } - iseqne_end: - if (vk) { - |1: // EQ: Branch to the target. - | movzx RD, PC_RD - | branchPC RD - |2: // NE: Fallthrough to next instruction. - |.if not FFI - |3: - |.endif - } else { - |.if not FFI - |3: - |.endif - |2: // NE: Branch to the target. - | movzx RD, PC_RD - | branchPC RD - |1: // EQ: Fallthrough to next instruction. - } - if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || - op == BC_ISEQN || op == BC_ISNEN)) { - | jmp <9 - } else { - | ins_next - } - | - if (op == BC_ISEQV || op == BC_ISNEV) { - |5: // Either or both types are not numbers. - |.if FFI - | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd - | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd - |.endif - | checktp RA, RB // Compare types. - | jne <2 // Not the same type? - | cmp RB, LJ_TISPRI - | jae <1 // Same type and primitive type? - | - | // Same types and not a primitive type. Compare GCobj or pvalue. - | mov RA, [BASE+RA*8] - | mov RD, [BASE+RD*8] - | cmp RA, RD - | je <1 // Same GCobjs or pvalues? - | cmp RB, LJ_TISTABUD - | ja <2 // Different objects and not table/ud? - |.if X64 - | cmp RB, LJ_TUDATA // And not 64 bit lightuserdata. - | jb <2 - |.endif - | - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | mov TAB:RB, TAB:RA->metatable - | test TAB:RB, TAB:RB - | jz <2 // No metatable? - | test byte TAB:RB->nomm, 1<vmeta_equal // Handle __eq metamethod. - } else { - |.if FFI - |3: - | cmp RB, LJ_TCDATA - if (LJ_DUALNUM && vk) { - | jne <9 - } else { - | jne <2 - } - | jmp ->vmeta_equal_cd - |.endif - } - break; - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | ins_AND // RA = src, RD = str const, JMP with RD = target - | mov RB, [BASE+RA*8+4] - | add PC, 4 - | cmp RB, LJ_TSTR; jne >3 - | mov RA, [BASE+RA*8] - | cmp RA, [KBASE+RD*4] - iseqne_test: - if (vk) { - | jne >2 - } else { - | je >1 - } - goto iseqne_end; - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | ins_AD // RA = src, RD = num const, JMP with RD = target - | mov RB, [BASE+RA*8+4] - | add PC, 4 - |.if DUALNUM - | cmp RB, LJ_TISNUM; jne >7 - | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8 - | mov RB, dword [KBASE+RD*8] - | cmp RB, dword [BASE+RA*8] - if (vk) { - | jne >9 - } else { - | je >9 - } - | movzx RD, PC_RD - | branchPC RD - |9: - | ins_next - | - |7: // RA is not an integer. - | ja >3 - | // RA is a number. - | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 - | // RA is a number, RD is an integer. - | cvtsi2sd xmm0, dword [KBASE+RD*8] - | jmp >2 - | - |8: // RA is an integer, RD is a number. - | cvtsi2sd xmm0, dword [BASE+RA*8] - | ucomisd xmm0, qword [KBASE+RD*8] - | jmp >4 - |.else - | cmp RB, LJ_TISNUM; jae >3 - |.endif - |1: - | movsd xmm0, qword [KBASE+RD*8] - |2: - | ucomisd xmm0, qword [BASE+RA*8] - |4: - goto iseqne_fp; - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target - | mov RB, [BASE+RA*8+4] - | add PC, 4 - | cmp RB, RD - if (!LJ_HASFFI) goto iseqne_test; - if (vk) { - | jne >3 - | movzx RD, PC_RD - | branchPC RD - |2: - | ins_next - |3: - | cmp RB, LJ_TCDATA; jne <2 - | jmp ->vmeta_equal_cd - } else { - | je >2 - | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd - | movzx RD, PC_RD - | branchPC RD - |2: - | ins_next - } - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | ins_AD // RA = dst or unused, RD = src, JMP with RD = target - | mov RB, [BASE+RD*8+4] - | add PC, 4 - | cmp RB, LJ_TISTRUECOND - if (op == BC_IST || op == BC_ISTC) { - | jae >1 - } else { - | jb >1 - } - if (op == BC_ISTC || op == BC_ISFC) { - | mov [BASE+RA*8+4], RB - | mov RB, [BASE+RD*8] - | mov [BASE+RA*8], RB - } - | movzx RD, PC_RD - | branchPC RD - |1: // Fallthrough to the next instruction. - | ins_next - break; - - case BC_ISTYPE: - | ins_AD // RA = src, RD = -type - | add RD, [BASE+RA*8+4] - | jne ->vmeta_istype - | ins_next - break; - case BC_ISNUM: - | ins_AD // RA = src, RD = -(TISNUM-1) - | checknum RA, ->vmeta_istype - | ins_next - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | ins_AD // RA = dst, RD = src - |.if X64 - | mov RBa, [BASE+RD*8] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [BASE+RD*8+4] - | mov RD, [BASE+RD*8] - | mov [BASE+RA*8+4], RB - | mov [BASE+RA*8], RD - |.endif - | ins_next_ - break; - case BC_NOT: - | ins_AD // RA = dst, RD = src - | xor RB, RB - | checktp RD, LJ_TISTRUECOND - | adc RB, LJ_TTRUE - | mov [BASE+RA*8+4], RB - | ins_next - break; - case BC_UNM: - | ins_AD // RA = dst, RD = src - |.if DUALNUM - | checkint RD, >5 - | mov RB, [BASE+RD*8] - | neg RB - | jo >4 - | mov dword [BASE+RA*8+4], LJ_TISNUM - | mov dword [BASE+RA*8], RB - |9: - | ins_next - |4: - | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31. - | mov dword [BASE+RA*8], 0 - | jmp <9 - |5: - | ja ->vmeta_unm - |.else - | checknum RD, ->vmeta_unm - |.endif - | movsd xmm0, qword [BASE+RD*8] - | sseconst_sign xmm1, RDa - | xorps xmm0, xmm1 - | movsd qword [BASE+RA*8], xmm0 - |.if DUALNUM - | jmp <9 - |.else - | ins_next - |.endif - break; - case BC_LEN: - | ins_AD // RA = dst, RD = src - | checkstr RD, >2 - | mov STR:RD, [BASE+RD*8] - |.if DUALNUM - | mov RD, dword STR:RD->len - |1: - | mov dword [BASE+RA*8+4], LJ_TISNUM - | mov dword [BASE+RA*8], RD - |.else - | xorps xmm0, xmm0 - | cvtsi2sd xmm0, dword STR:RD->len - |1: - | movsd qword [BASE+RA*8], xmm0 - |.endif - | ins_next - |2: - | checktab RD, ->vmeta_len - | mov TAB:FCARG1, [BASE+RD*8] -#if LJ_52 - | mov TAB:RB, TAB:FCARG1->metatable - | cmp TAB:RB, 0 - | jnz >9 - |3: -#endif - |->BC_LEN_Z: - | mov RB, BASE // Save BASE. - | call extern lj_tab_len@4 // (GCtab *t) - | // Length of table returned in eax (RD). - |.if DUALNUM - | // Nothing to do. - |.else - | cvtsi2sd xmm0, RD - |.endif - | mov BASE, RB // Restore BASE. - | movzx RA, PC_RA - | jmp <1 -#if LJ_52 - |9: // Check for __len. - | test byte TAB:RB->nomm, 1<vmeta_len // 'no __len' flag NOT set: check. -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro ins_arithpre, sseins, ssereg - | ins_ABC - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | checknum RB, ->vmeta_arith_vn - | .if DUALNUM - | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn - | .endif - | movsd xmm0, qword [BASE+RB*8] - | sseins ssereg, qword [KBASE+RC*8] - || break; - ||case 1: - | checknum RB, ->vmeta_arith_nv - | .if DUALNUM - | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv - | .endif - | movsd xmm0, qword [KBASE+RC*8] - | sseins ssereg, qword [BASE+RB*8] - || break; - ||default: - | checknum RB, ->vmeta_arith_vv - | checknum RC, ->vmeta_arith_vv - | movsd xmm0, qword [BASE+RB*8] - | sseins ssereg, qword [BASE+RC*8] - || break; - ||} - |.endmacro - | - |.macro ins_arithdn, intins - | ins_ABC - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | checkint RB, ->vmeta_arith_vn - | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn - | mov RB, [BASE+RB*8] - | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno - || break; - ||case 1: - | checkint RB, ->vmeta_arith_nv - | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv - | mov RC, [KBASE+RC*8] - | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo - || break; - ||default: - | checkint RB, ->vmeta_arith_vv - | checkint RC, ->vmeta_arith_vv - | mov RB, [BASE+RB*8] - | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo - || break; - ||} - | mov dword [BASE+RA*8+4], LJ_TISNUM - ||if (vk == 1) { - | mov dword [BASE+RA*8], RC - ||} else { - | mov dword [BASE+RA*8], RB - ||} - | ins_next - |.endmacro - | - |.macro ins_arithpost - | movsd qword [BASE+RA*8], xmm0 - |.endmacro - | - |.macro ins_arith, sseins - | ins_arithpre sseins, xmm0 - | ins_arithpost - | ins_next - |.endmacro - | - |.macro ins_arith, intins, sseins - |.if DUALNUM - | ins_arithdn intins - |.else - | ins_arith, sseins - |.endif - |.endmacro - - | // RA = dst, RB = src1 or num const, RC = src2 or num const - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arith add, addsd - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arith sub, subsd - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith imul, mulsd - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arith divsd - break; - case BC_MODVN: - | ins_arithpre movsd, xmm1 - |->BC_MODVN_Z: - | call ->vm_mod - | ins_arithpost - | ins_next - break; - case BC_MODNV: case BC_MODVV: - | ins_arithpre movsd, xmm1 - | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - break; - case BC_POW: - | ins_arithpre movsd, xmm1 - | mov RB, BASE - |.if not X64 - | movsd FPARG1, xmm0 - | movsd FPARG3, xmm1 - |.endif - | call extern pow - | movzx RA, PC_RA - | mov BASE, RB - |.if X64 - | ins_arithpost - |.else - | fstp qword [BASE+RA*8] - |.endif - | ins_next - break; - - case BC_CAT: - | ins_ABC // RA = dst, RB = src_start, RC = src_end - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE - | lea CARG2d, [BASE+RC*8] - | mov CARG3d, RC - | sub CARG3d, RB - |->BC_CAT_Z: - | mov L:RB, L:CARG1d - |.else - | lea RA, [BASE+RC*8] - | sub RC, RB - | mov ARG2, RA - | mov ARG3, RC - |->BC_CAT_Z: - | mov L:RB, SAVE_L - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) - | // NULL (finished) or TValue * (metamethod) returned in eax (RC). - | mov BASE, L:RB->base - | test RC, RC - | jnz ->vmeta_binop - | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB]. - | movzx RA, PC_RA - |.if X64 - | mov RCa, [BASE+RB*8] - | mov [BASE+RA*8], RCa - |.else - | mov RC, [BASE+RB*8+4] - | mov RB, [BASE+RB*8] - | mov [BASE+RA*8+4], RC - | mov [BASE+RA*8], RB - |.endif - | ins_next - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | ins_AND // RA = dst, RD = str const (~) - | mov RD, [KBASE+RD*4] - | mov dword [BASE+RA*8+4], LJ_TSTR - | mov [BASE+RA*8], RD - | ins_next - break; - case BC_KCDATA: - |.if FFI - | ins_AND // RA = dst, RD = cdata const (~) - | mov RD, [KBASE+RD*4] - | mov dword [BASE+RA*8+4], LJ_TCDATA - | mov [BASE+RA*8], RD - | ins_next - |.endif - break; - case BC_KSHORT: - | ins_AD // RA = dst, RD = signed int16 literal - |.if DUALNUM - | movsx RD, RDW - | mov dword [BASE+RA*8+4], LJ_TISNUM - | mov dword [BASE+RA*8], RD - |.else - | movsx RD, RDW // Sign-extend literal. - | cvtsi2sd xmm0, RD - | movsd qword [BASE+RA*8], xmm0 - |.endif - | ins_next - break; - case BC_KNUM: - | ins_AD // RA = dst, RD = num const - | movsd xmm0, qword [KBASE+RD*8] - | movsd qword [BASE+RA*8], xmm0 - | ins_next - break; - case BC_KPRI: - | ins_AND // RA = dst, RD = primitive type (~) - | mov [BASE+RA*8+4], RD - | ins_next - break; - case BC_KNIL: - | ins_AD // RA = dst_start, RD = dst_end - | lea RA, [BASE+RA*8+12] - | lea RD, [BASE+RD*8+4] - | mov RB, LJ_TNIL - | mov [RA-8], RB // Sets minimum 2 slots. - |1: - | mov [RA], RB - | add RA, 8 - | cmp RA, RD - | jbe <1 - | ins_next - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | ins_AD // RA = dst, RD = upvalue # - | mov LFUNC:RB, [BASE-8] - | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)] - | mov RB, UPVAL:RB->v - |.if X64 - | mov RDa, [RB] - | mov [BASE+RA*8], RDa - |.else - | mov RD, [RB+4] - | mov RB, [RB] - | mov [BASE+RA*8+4], RD - | mov [BASE+RA*8], RB - |.endif - | ins_next - break; - case BC_USETV: -#define TV2MARKOFS \ - ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) - | ins_AD // RA = upvalue #, RD = src - | mov LFUNC:RB, [BASE-8] - | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] - | cmp byte UPVAL:RB->closed, 0 - | mov RB, UPVAL:RB->v - | mov RA, [BASE+RD*8] - | mov RD, [BASE+RD*8+4] - | mov [RB], RA - | mov [RB+4], RD - | jz >1 - | // Check barrier for closed upvalue. - | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) - | jnz >2 - |1: - | ins_next - | - |2: // Upvalue is black. Check if new value is collectable and white. - | sub RD, LJ_TISGCV - | cmp RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) - | jbe <1 - | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) - | jz <1 - | // Crossed a write barrier. Move the barrier forward. - |.if X64 and not X64WIN - | mov FCARG2, RB - | mov RB, BASE // Save BASE. - |.else - | xchg FCARG2, RB // Save BASE (FCARG2 == BASE). - |.endif - | lea GL:FCARG1, [DISPATCH+GG_DISP2G] - | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) - | mov BASE, RB // Restore BASE. - | jmp <1 - break; -#undef TV2MARKOFS - case BC_USETS: - | ins_AND // RA = upvalue #, RD = str const (~) - | mov LFUNC:RB, [BASE-8] - | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] - | mov GCOBJ:RA, [KBASE+RD*4] - | mov RD, UPVAL:RB->v - | mov [RD], GCOBJ:RA - | mov dword [RD+4], LJ_TSTR - | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) - | jnz >2 - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) - | jz <1 - | cmp byte UPVAL:RB->closed, 0 - | jz <1 - | // Crossed a write barrier. Move the barrier forward. - | mov RB, BASE // Save BASE (FCARG2 == BASE). - | mov FCARG2, RD - | lea GL:FCARG1, [DISPATCH+GG_DISP2G] - | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) - | mov BASE, RB // Restore BASE. - | jmp <1 - break; - case BC_USETN: - | ins_AD // RA = upvalue #, RD = num const - | mov LFUNC:RB, [BASE-8] - | movsd xmm0, qword [KBASE+RD*8] - | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] - | mov RA, UPVAL:RB->v - | movsd qword [RA], xmm0 - | ins_next - break; - case BC_USETP: - | ins_AND // RA = upvalue #, RD = primitive type (~) - | mov LFUNC:RB, [BASE-8] - | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] - | mov RA, UPVAL:RB->v - | mov [RA+4], RD - | ins_next - break; - case BC_UCLO: - | ins_AD // RA = level, RD = target - | branchPC RD // Do this first to free RD. - | mov L:RB, SAVE_L - | cmp dword L:RB->openupval, 0 - | je >1 - | mov L:RB->base, BASE - | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE - | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA - | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level) - | mov BASE, L:RB->base - |1: - | ins_next - break; - - case BC_FNEW: - | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) - |.if X64 - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG3d, [BASE-8] - | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *. - | mov CARG1d, L:RB - |.else - | mov LFUNC:RA, [BASE-8] - | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *. - | mov L:RB, SAVE_L - | mov ARG3, LFUNC:RA - | mov ARG2, PROTO:RD - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | call extern lj_func_newL_gc - | // GCfuncL * returned in eax (RC). - | mov BASE, L:RB->base - | movzx RA, PC_RA - | mov [BASE+RA*8], LFUNC:RC - | mov dword [BASE+RA*8+4], LJ_TFUNC - | ins_next - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - | ins_AD // RA = dst, RD = hbits|asize - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] - | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] - | mov SAVE_PC, PC - | jae >5 - |1: - |.if X64 - | mov CARG3d, RD - | and RD, 0x7ff - | shr CARG3d, 11 - |.else - | mov RA, RD - | and RD, 0x7ff - | shr RA, 11 - | mov ARG3, RA - |.endif - | cmp RD, 0x7ff - | je >3 - |2: - |.if X64 - | mov L:CARG1d, L:RB - | mov CARG2d, RD - |.else - | mov ARG1, L:RB - | mov ARG2, RD - |.endif - | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) - | // Table * returned in eax (RC). - | mov BASE, L:RB->base - | movzx RA, PC_RA - | mov [BASE+RA*8], TAB:RC - | mov dword [BASE+RA*8+4], LJ_TTAB - | ins_next - |3: // Turn 0x7ff into 0x801. - | mov RD, 0x801 - | jmp <2 - |5: - | mov L:FCARG1, L:RB - | call extern lj_gc_step_fixtop@4 // (lua_State *L) - | movzx RD, PC_RD - | jmp <1 - break; - case BC_TDUP: - | ins_AND // RA = dst, RD = table const (~) (holding template table) - | mov L:RB, SAVE_L - | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] - | mov SAVE_PC, PC - | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] - | mov L:RB->base, BASE - | jae >3 - |2: - | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE - | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA - | call extern lj_tab_dup@8 // (lua_State *L, Table *kt) - | // Table * returned in eax (RC). - | mov BASE, L:RB->base - | movzx RA, PC_RA - | mov [BASE+RA*8], TAB:RC - | mov dword [BASE+RA*8+4], LJ_TTAB - | ins_next - |3: - | mov L:FCARG1, L:RB - | call extern lj_gc_step_fixtop@4 // (lua_State *L) - | movzx RD, PC_RD // Need to reload RD. - | not RDa - | jmp <2 - break; - - case BC_GGET: - | ins_AND // RA = dst, RD = str const (~) - | mov LFUNC:RB, [BASE-8] - | mov TAB:RB, LFUNC:RB->env - | mov STR:RC, [KBASE+RD*4] - | jmp ->BC_TGETS_Z - break; - case BC_GSET: - | ins_AND // RA = src, RD = str const (~) - | mov LFUNC:RB, [BASE-8] - | mov TAB:RB, LFUNC:RB->env - | mov STR:RC, [KBASE+RD*4] - | jmp ->BC_TSETS_Z - break; - - case BC_TGETV: - | ins_ABC // RA = dst, RB = table, RC = key - | checktab RB, ->vmeta_tgetv - | mov TAB:RB, [BASE+RB*8] - | - | // Integer key? - |.if DUALNUM - | checkint RC, >5 - | mov RC, dword [BASE+RC*8] - |.else - | // Convert number to int and back and compare. - | checknum RC, >5 - | movsd xmm0, qword [BASE+RC*8] - | cvttsd2si RC, xmm0 - | cvtsi2sd xmm1, RC - | ucomisd xmm0, xmm1 - | jne ->vmeta_tgetv // Generic numeric key? Use fallback. - |.endif - | cmp RC, TAB:RB->asize // Takes care of unordered, too. - | jae ->vmeta_tgetv // Not in array part? Use fallback. - | shl RC, 3 - | add RC, TAB:RB->array - | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. - | je >2 - | // Get array slot. - |.if X64 - | mov RBa, [RC] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [RC] - | mov RC, [RC+4] - | mov [BASE+RA*8], RB - | mov [BASE+RA*8+4], RC - |.endif - |1: - | ins_next - | - |2: // Check for __index if table value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz >3 - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tgetv // 'no __index' flag NOT set: check. - | movzx RA, PC_RA // Restore RA. - |3: - | mov dword [BASE+RA*8+4], LJ_TNIL - | jmp <1 - | - |5: // String key? - | checkstr RC, ->vmeta_tgetv - | mov STR:RC, [BASE+RC*8] - | jmp ->BC_TGETS_Z - break; - case BC_TGETS: - | ins_ABC // RA = dst, RB = table, RC = str const (~) - | not RCa - | mov STR:RC, [KBASE+RC*4] - | checktab RB, ->vmeta_tgets - | mov TAB:RB, [BASE+RB*8] - |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. - | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash - | imul RA, #NODE - | add NODE:RA, TAB:RB->node - |1: - | cmp dword NODE:RA->key.it, LJ_TSTR - | jne >4 - | cmp dword NODE:RA->key.gcr, STR:RC - | jne >4 - | // Ok, key found. Assumes: offsetof(Node, val) == 0 - | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath. - | je >5 // Key found, but nil value? - | movzx RC, PC_RA - | // Get node value. - |.if X64 - | mov RBa, [RA] - | mov [BASE+RC*8], RBa - |.else - | mov RB, [RA] - | mov RA, [RA+4] - | mov [BASE+RC*8], RB - | mov [BASE+RC*8+4], RA - |.endif - |2: - | ins_next - | - |3: - | movzx RC, PC_RA - | mov dword [BASE+RC*8+4], LJ_TNIL - | jmp <2 - | - |4: // Follow hash chain. - | mov NODE:RA, NODE:RA->next - | test NODE:RA, NODE:RA - | jnz <1 - | // End of hash chain: key not found, nil result. - | - |5: // Check for __index if table value is nil. - | mov TAB:RA, TAB:RB->metatable - | test TAB:RA, TAB:RA - | jz <3 // No metatable: done. - | test byte TAB:RA->nomm, 1<vmeta_tgets // Caveat: preserve STR:RC. - break; - case BC_TGETB: - | ins_ABC // RA = dst, RB = table, RC = byte literal - | checktab RB, ->vmeta_tgetb - | mov TAB:RB, [BASE+RB*8] - | cmp RC, TAB:RB->asize - | jae ->vmeta_tgetb - | shl RC, 3 - | add RC, TAB:RB->array - | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. - | je >2 - | // Get array slot. - |.if X64 - | mov RBa, [RC] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [RC] - | mov RC, [RC+4] - | mov [BASE+RA*8], RB - | mov [BASE+RA*8+4], RC - |.endif - |1: - | ins_next - | - |2: // Check for __index if table value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz >3 - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tgetb // 'no __index' flag NOT set: check. - | movzx RA, PC_RA // Restore RA. - |3: - | mov dword [BASE+RA*8+4], LJ_TNIL - | jmp <1 - break; - case BC_TGETR: - | ins_ABC // RA = dst, RB = table, RC = key - | mov TAB:RB, [BASE+RB*8] - |.if DUALNUM - | mov RC, dword [BASE+RC*8] - |.else - | cvttsd2si RC, qword [BASE+RC*8] - |.endif - | cmp RC, TAB:RB->asize - | jae ->vmeta_tgetr // Not in array part? Use fallback. - | shl RC, 3 - | add RC, TAB:RB->array - | // Get array slot. - |->BC_TGETR_Z: - |.if X64 - | mov RBa, [RC] - | mov [BASE+RA*8], RBa - |.else - | mov RB, [RC] - | mov RC, [RC+4] - | mov [BASE+RA*8], RB - | mov [BASE+RA*8+4], RC - |.endif - |->BC_TGETR2_Z: - | ins_next - break; - - case BC_TSETV: - | ins_ABC // RA = src, RB = table, RC = key - | checktab RB, ->vmeta_tsetv - | mov TAB:RB, [BASE+RB*8] - | - | // Integer key? - |.if DUALNUM - | checkint RC, >5 - | mov RC, dword [BASE+RC*8] - |.else - | // Convert number to int and back and compare. - | checknum RC, >5 - | movsd xmm0, qword [BASE+RC*8] - | cvttsd2si RC, xmm0 - | cvtsi2sd xmm1, RC - | ucomisd xmm0, xmm1 - | jne ->vmeta_tsetv // Generic numeric key? Use fallback. - |.endif - | cmp RC, TAB:RB->asize // Takes care of unordered, too. - | jae ->vmeta_tsetv - | shl RC, 3 - | add RC, TAB:RB->array - | cmp dword [RC+4], LJ_TNIL - | je >3 // Previous value is nil? - |1: - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |2: // Set array slot. - |.if X64 - | mov RBa, [BASE+RA*8] - | mov [RC], RBa - |.else - | mov RB, [BASE+RA*8+4] - | mov RA, [BASE+RA*8] - | mov [RC+4], RB - | mov [RC], RA - |.endif - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz <1 - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tsetv // 'no __newindex' flag NOT set: check. - | movzx RA, PC_RA // Restore RA. - | jmp <1 - | - |5: // String key? - | checkstr RC, ->vmeta_tsetv - | mov STR:RC, [BASE+RC*8] - | jmp ->BC_TSETS_Z - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, RA - | movzx RA, PC_RA // Restore RA. - | jmp <2 - break; - case BC_TSETS: - | ins_ABC // RA = src, RB = table, RC = str const (~) - | not RCa - | mov STR:RC, [KBASE+RC*4] - | checktab RB, ->vmeta_tsets - | mov TAB:RB, [BASE+RB*8] - |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. - | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash - | imul RA, #NODE - | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. - | add NODE:RA, TAB:RB->node - |1: - | cmp dword NODE:RA->key.it, LJ_TSTR - | jne >5 - | cmp dword NODE:RA->key.gcr, STR:RC - | jne >5 - | // Ok, key found. Assumes: offsetof(Node, val) == 0 - | cmp dword [RA+4], LJ_TNIL - | je >4 // Previous value is nil? - |2: - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |3: // Set node value. - | movzx RC, PC_RA - |.if X64 - | mov RBa, [BASE+RC*8] - | mov [RA], RBa - |.else - | mov RB, [BASE+RC*8+4] - | mov RC, [BASE+RC*8] - | mov [RA+4], RB - | mov [RA], RC - |.endif - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz <2 - | mov TMP1, RA // Save RA. - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - | mov RA, TMP1 // Restore RA. - | jmp <2 - | - |5: // Follow hash chain. - | mov NODE:RA, NODE:RA->next - | test NODE:RA, NODE:RA - | jnz <1 - | // End of hash chain: key not found, add a new one. - | - | // But check for __newindex first. - | mov TAB:RA, TAB:RB->metatable - | test TAB:RA, TAB:RA - | jz >6 // No metatable: continue. - | test byte TAB:RA->nomm, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. - |6: - | mov TMP1, STR:RC - | mov TMP2, LJ_TSTR - | mov TMP3, TAB:RB // Save TAB:RB for us. - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE - | lea CARG3, TMP1 - | mov CARG2d, TAB:RB - | mov L:RB, L:CARG1d - |.else - | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. - | mov ARG2, TAB:RB - | mov L:RB, SAVE_L - | mov ARG3, RC - | mov ARG1, L:RB - | mov L:RB->base, BASE - |.endif - | mov SAVE_PC, PC - | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Handles write barrier for the new key. TValue * returned in eax (RC). - | mov BASE, L:RB->base - | mov TAB:RB, TMP3 // Need TAB:RB for barrier. - | mov RA, eax - | jmp <2 // Must check write barrier for value. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, RC // Destroys STR:RC. - | jmp <3 - break; - case BC_TSETB: - | ins_ABC // RA = src, RB = table, RC = byte literal - | checktab RB, ->vmeta_tsetb - | mov TAB:RB, [BASE+RB*8] - | cmp RC, TAB:RB->asize - | jae ->vmeta_tsetb - | shl RC, 3 - | add RC, TAB:RB->array - | cmp dword [RC+4], LJ_TNIL - | je >3 // Previous value is nil? - |1: - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |2: // Set array slot. - |.if X64 - | mov RAa, [BASE+RA*8] - | mov [RC], RAa - |.else - | mov RB, [BASE+RA*8+4] - | mov RA, [BASE+RA*8] - | mov [RC+4], RB - | mov [RC], RA - |.endif - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. - | jz <1 - | mov TAB:RA, TAB:RB->metatable - | test byte TAB:RA->nomm, 1<vmeta_tsetb // 'no __newindex' flag NOT set: check. - | movzx RA, PC_RA // Restore RA. - | jmp <1 - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, RA - | movzx RA, PC_RA // Restore RA. - | jmp <2 - break; - case BC_TSETR: - | ins_ABC // RA = src, RB = table, RC = key - | mov TAB:RB, [BASE+RB*8] - |.if DUALNUM - | mov RC, dword [BASE+RC*8] - |.else - | cvttsd2si RC, qword [BASE+RC*8] - |.endif - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |2: - | cmp RC, TAB:RB->asize - | jae ->vmeta_tsetr - | shl RC, 3 - | add RC, TAB:RB->array - | // Set array slot. - |->BC_TSETR_Z: - |.if X64 - | mov RBa, [BASE+RA*8] - | mov [RC], RBa - |.else - | mov RB, [BASE+RA*8+4] - | mov RA, [BASE+RA*8] - | mov [RC+4], RB - | mov [RC], RA - |.endif - | ins_next - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, RA - | movzx RA, PC_RA // Restore RA. - | jmp <2 - break; - - case BC_TSETM: - | ins_AD // RA = base (table at base-1), RD = num const (start index) - | mov TMP1, KBASE // Need one more free register. - | mov KBASE, dword [KBASE+RD*8] // Integer constant is in lo-word. - |1: - | lea RA, [BASE+RA*8] - | mov TAB:RB, [RA-8] // Guaranteed to be a table. - | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) - | jnz >7 - |2: - | mov RD, MULTRES - | sub RD, 1 - | jz >4 // Nothing to copy? - | add RD, KBASE // Compute needed size. - | cmp RD, TAB:RB->asize - | ja >5 // Doesn't fit into array part? - | sub RD, KBASE - | shl KBASE, 3 - | add KBASE, TAB:RB->array - |3: // Copy result slots to table. - |.if X64 - | mov RBa, [RA] - | add RA, 8 - | mov [KBASE], RBa - |.else - | mov RB, [RA] - | mov [KBASE], RB - | mov RB, [RA+4] - | add RA, 8 - | mov [KBASE+4], RB - |.endif - | add KBASE, 8 - | sub RD, 1 - | jnz <3 - |4: - | mov KBASE, TMP1 - | ins_next - | - |5: // Need to resize array part. - |.if X64 - | mov L:CARG1d, SAVE_L - | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. - | mov CARG2d, TAB:RB - | mov CARG3d, RD - | mov L:RB, L:CARG1d - |.else - | mov ARG2, TAB:RB - | mov L:RB, SAVE_L - | mov L:RB->base, BASE - | mov ARG3, RD - | mov ARG1, L:RB - |.endif - | mov SAVE_PC, PC - | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - | mov BASE, L:RB->base - | movzx RA, PC_RA // Restore RA. - | jmp <1 // Retry. - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:RB, RD - | jmp <2 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALL: case BC_CALLM: - | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs - if (op == BC_CALLM) { - | add NARGS:RD, MULTRES - } - | cmp dword [BASE+RA*8+4], LJ_TFUNC - | mov LFUNC:RB, [BASE+RA*8] - | jne ->vmeta_call_ra - | lea BASE, [BASE+RA*8+8] - | ins_call - break; - - case BC_CALLMT: - | ins_AD // RA = base, RD = extra_nargs - | add NARGS:RD, MULTRES - | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. - break; - case BC_CALLT: - | ins_AD // RA = base, RD = nargs+1 - | lea RA, [BASE+RA*8+8] - | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. - | mov LFUNC:RB, [RA-8] - | cmp dword [RA-4], LJ_TFUNC - | jne ->vmeta_call - |->BC_CALLT_Z: - | mov PC, [BASE-4] - | test PC, FRAME_TYPE - | jnz >7 - |1: - | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below. - | mov MULTRES, NARGS:RD - | sub NARGS:RD, 1 - | jz >3 - |2: // Move args down. - |.if X64 - | mov RBa, [RA] - | add RA, 8 - | mov [KBASE], RBa - |.else - | mov RB, [RA] - | mov [KBASE], RB - | mov RB, [RA+4] - | add RA, 8 - | mov [KBASE+4], RB - |.endif - | add KBASE, 8 - | sub NARGS:RD, 1 - | jnz <2 - | - | mov LFUNC:RB, [BASE-8] - |3: - | mov NARGS:RD, MULTRES - | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? - | ja >5 - |4: - | ins_callt - | - |5: // Tailcall to a fast function. - | test PC, FRAME_TYPE // Lua frame below? - | jnz <4 - | movzx RA, PC_RA - | not RAa - | mov LFUNC:KBASE, [BASE+RA*8-8] // Need to prepare KBASE. - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | jmp <4 - | - |7: // Tailcall from a vararg function. - | sub PC, FRAME_VARG - | test PC, FRAME_TYPEP - | jnz >8 // Vararg frame below? - | sub BASE, PC // Need to relocate BASE/KBASE down. - | mov KBASE, BASE - | mov PC, [BASE-4] - | jmp <1 - |8: - | add PC, FRAME_VARG - | jmp <1 - break; - - case BC_ITERC: - | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) - | lea RA, [BASE+RA*8+8] // fb = base+1 - |.if X64 - | mov RBa, [RA-24] // Copy state. fb[0] = fb[-3]. - | mov RCa, [RA-16] // Copy control var. fb[1] = fb[-2]. - | mov [RA], RBa - | mov [RA+8], RCa - |.else - | mov RB, [RA-24] // Copy state. fb[0] = fb[-3]. - | mov RC, [RA-20] - | mov [RA], RB - | mov [RA+4], RC - | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2]. - | mov RC, [RA-12] - | mov [RA+8], RB - | mov [RA+12], RC - |.endif - | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4] - | mov RC, [RA-28] - | mov [RA-8], LFUNC:RB - | mov [RA-4], RC - | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call. - | mov NARGS:RD, 2+1 - | jne ->vmeta_call - | mov BASE, RA - | ins_call - break; - - case BC_ITERN: - | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | mov TMP1, KBASE // Need two more free registers. - | mov TMP2, DISPATCH - | mov TAB:RB, [BASE+RA*8-16] - | mov RC, [BASE+RA*8-8] // Get index from control var. - | mov DISPATCH, TAB:RB->asize - | add PC, 4 - | mov KBASE, TAB:RB->array - |1: // Traverse array part. - | cmp RC, DISPATCH; jae >5 // Index points after array part? - | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 - |.if DUALNUM - | mov dword [BASE+RA*8+4], LJ_TISNUM - | mov dword [BASE+RA*8], RC - |.else - | cvtsi2sd xmm0, RC - |.endif - | // Copy array slot to returned value. - |.if X64 - | mov RBa, [KBASE+RC*8] - | mov [BASE+RA*8+8], RBa - |.else - | mov RB, [KBASE+RC*8+4] - | mov [BASE+RA*8+12], RB - | mov RB, [KBASE+RC*8] - | mov [BASE+RA*8+8], RB - |.endif - | add RC, 1 - | // Return array index as a numeric key. - |.if DUALNUM - | // See above. - |.else - | movsd qword [BASE+RA*8], xmm0 - |.endif - | mov [BASE+RA*8-8], RC // Update control var. - |2: - | movzx RD, PC_RD // Get target from ITERL. - | branchPC RD - |3: - | mov DISPATCH, TMP2 - | mov KBASE, TMP1 - | ins_next - | - |4: // Skip holes in array part. - | add RC, 1 - | jmp <1 - | - |5: // Traverse hash part. - | sub RC, DISPATCH - |6: - | cmp RC, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1. - | imul KBASE, RC, #NODE - | add NODE:KBASE, TAB:RB->node - | cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7 - | lea DISPATCH, [RC+DISPATCH+1] - | // Copy key and value from hash slot. - |.if X64 - | mov RBa, NODE:KBASE->key - | mov RCa, NODE:KBASE->val - | mov [BASE+RA*8], RBa - | mov [BASE+RA*8+8], RCa - |.else - | mov RB, NODE:KBASE->key.gcr - | mov RC, NODE:KBASE->key.it - | mov [BASE+RA*8], RB - | mov [BASE+RA*8+4], RC - | mov RB, NODE:KBASE->val.gcr - | mov RC, NODE:KBASE->val.it - | mov [BASE+RA*8+8], RB - | mov [BASE+RA*8+12], RC - |.endif - | mov [BASE+RA*8-8], DISPATCH - | jmp <2 - | - |7: // Skip holes in hash part. - | add RC, 1 - | jmp <6 - break; - - case BC_ISNEXT: - | ins_AD // RA = base, RD = target (points to ITERN) - | cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5 - | mov CFUNC:RB, [BASE+RA*8-24] - | cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5 - | cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5 - | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 - | branchPC RD - | mov dword [BASE+RA*8-8], 0 // Initialize control var. - | mov dword [BASE+RA*8-4], 0xfffe7fff - |1: - | ins_next - |5: // Despecialize bytecode if any of the checks fail. - | mov PC_OP, BC_JMP - | branchPC RD - | mov byte [PC], BC_ITERC - | jmp <1 - break; - - case BC_VARG: - | ins_ABC // RA = base, RB = nresults+1, RC = numparams - | mov TMP1, KBASE // Need one more free register. - | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)] - | lea RA, [BASE+RA*8] - | sub KBASE, [BASE-4] - | // Note: KBASE may now be even _above_ BASE if nargs was < numparams. - | test RB, RB - | jz >5 // Copy all varargs? - | lea RB, [RA+RB*8-8] - | cmp KBASE, BASE // No vararg slots? - | jnb >2 - |1: // Copy vararg slots to destination slots. - |.if X64 - | mov RCa, [KBASE-8] - | add KBASE, 8 - | mov [RA], RCa - |.else - | mov RC, [KBASE-8] - | mov [RA], RC - | mov RC, [KBASE-4] - | add KBASE, 8 - | mov [RA+4], RC - |.endif - | add RA, 8 - | cmp RA, RB // All destination slots filled? - | jnb >3 - | cmp KBASE, BASE // No more vararg slots? - | jb <1 - |2: // Fill up remainder with nil. - | mov dword [RA+4], LJ_TNIL - | add RA, 8 - | cmp RA, RB - | jb <2 - |3: - | mov KBASE, TMP1 - | ins_next - | - |5: // Copy all varargs. - | mov MULTRES, 1 // MULTRES = 0+1 - | mov RC, BASE - | sub RC, KBASE - | jbe <3 // No vararg slots? - | mov RB, RC - | shr RB, 3 - | add RB, 1 - | mov MULTRES, RB // MULTRES = #varargs+1 - | mov L:RB, SAVE_L - | add RC, RA - | cmp RC, L:RB->maxstack - | ja >7 // Need to grow stack? - |6: // Copy all vararg slots. - |.if X64 - | mov RCa, [KBASE-8] - | add KBASE, 8 - | mov [RA], RCa - |.else - | mov RC, [KBASE-8] - | mov [RA], RC - | mov RC, [KBASE-4] - | add KBASE, 8 - | mov [RA+4], RC - |.endif - | add RA, 8 - | cmp KBASE, BASE // No more vararg slots? - | jb <6 - | jmp <3 - | - |7: // Grow stack for varargs. - | mov L:RB->base, BASE - | mov L:RB->top, RA - | mov SAVE_PC, PC - | sub KBASE, BASE // Need delta, because BASE may change. - | mov FCARG2, MULTRES - | sub FCARG2, 1 - | mov FCARG1, L:RB - | call extern lj_state_growstack@8 // (lua_State *L, int n) - | mov BASE, L:RB->base - | mov RA, L:RB->top - | add KBASE, BASE - | jmp <6 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | ins_AD // RA = results, RD = extra_nresults - | add RD, MULTRES // MULTRES >=1, so RD >=1. - | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. - break; - - case BC_RET: case BC_RET0: case BC_RET1: - | ins_AD // RA = results, RD = nresults+1 - if (op != BC_RET0) { - | shl RA, 3 - } - |1: - | mov PC, [BASE-4] - | mov MULTRES, RD // Save nresults+1. - | test PC, FRAME_TYPE // Check frame type marker. - | jnz >7 // Not returning to a fixarg Lua func? - switch (op) { - case BC_RET: - |->BC_RET_Z: - | mov KBASE, BASE // Use KBASE for result move. - | sub RD, 1 - | jz >3 - |2: // Move results down. - |.if X64 - | mov RBa, [KBASE+RA] - | mov [KBASE-8], RBa - |.else - | mov RB, [KBASE+RA] - | mov [KBASE-8], RB - | mov RB, [KBASE+RA+4] - | mov [KBASE-4], RB - |.endif - | add KBASE, 8 - | sub RD, 1 - | jnz <2 - |3: - | mov RD, MULTRES // Note: MULTRES may be >255. - | movzx RB, PC_RB // So cannot compare with RDL! - |5: - | cmp RB, RD // More results expected? - | ja >6 - break; - case BC_RET1: - |.if X64 - | mov RBa, [BASE+RA] - | mov [BASE-8], RBa - |.else - | mov RB, [BASE+RA+4] - | mov [BASE-4], RB - | mov RB, [BASE+RA] - | mov [BASE-8], RB - |.endif - /* fallthrough */ - case BC_RET0: - |5: - | cmp PC_RB, RDL // More results expected? - | ja >6 - default: - break; - } - | movzx RA, PC_RA - | not RAa // Note: ~RA = -(RA+1) - | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 - | mov LFUNC:KBASE, [BASE-8] - | mov KBASE, LFUNC:KBASE->pc - | mov KBASE, [KBASE+PC2PROTO(k)] - | ins_next - | - |6: // Fill up results with nil. - if (op == BC_RET) { - | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base. - | add KBASE, 8 - } else { - | mov dword [BASE+RD*8-12], LJ_TNIL - } - | add RD, 1 - | jmp <5 - | - |7: // Non-standard return case. - | lea RB, [PC-FRAME_VARG] - | test RB, FRAME_TYPEP - | jnz ->vm_return - | // Return from vararg function: relocate BASE down and RA up. - | sub BASE, RB - if (op != BC_RET0) { - | add RA, RB - } - | jmp <1 - break; - - /* -- Loops and branches ------------------------------------------------ */ - - |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4] - |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12] - |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20] - |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28] - - case BC_FORL: - |.if JIT - | hotloop RB - |.endif - | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - vk = (op == BC_IFORL || op == BC_JFORL); - | ins_AJ // RA = base, RD = target (after end of loop or start of loop) - | lea RA, [BASE+RA*8] - if (LJ_DUALNUM) { - | cmp FOR_TIDX, LJ_TISNUM; jne >9 - if (!vk) { - | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for - | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for - | mov RB, dword FOR_IDX - | cmp dword FOR_STEP, 0; jl >5 - } else { -#ifdef LUA_USE_ASSERT - | cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type - | cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type -#endif - | mov RB, dword FOR_STEP - | test RB, RB; js >5 - | add RB, dword FOR_IDX; jo >1 - | mov dword FOR_IDX, RB - } - | cmp RB, dword FOR_STOP - | mov FOR_TEXT, LJ_TISNUM - | mov dword FOR_EXT, RB - if (op == BC_FORI) { - | jle >7 - |1: - |6: - | branchPC RD - } else if (op == BC_JFORI) { - | branchPC RD - | movzx RD, PC_RD - | jle =>BC_JLOOP - |1: - |6: - } else if (op == BC_IFORL) { - | jg >7 - |6: - | branchPC RD - |1: - } else { - | jle =>BC_JLOOP - |1: - |6: - } - |7: - | ins_next - | - |5: // Invert check for negative step. - if (vk) { - | add RB, dword FOR_IDX; jo <1 - | mov dword FOR_IDX, RB - } - | cmp RB, dword FOR_STOP - | mov FOR_TEXT, LJ_TISNUM - | mov dword FOR_EXT, RB - if (op == BC_FORI) { - | jge <7 - } else if (op == BC_JFORI) { - | branchPC RD - | movzx RD, PC_RD - | jge =>BC_JLOOP - } else if (op == BC_IFORL) { - | jl <7 - } else { - | jge =>BC_JLOOP - } - | jmp <6 - |9: // Fallback to FP variant. - } else if (!vk) { - | cmp FOR_TIDX, LJ_TISNUM - } - if (!vk) { - | jae ->vmeta_for - | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for - } else { -#ifdef LUA_USE_ASSERT - | cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type - | cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type -#endif - } - | mov RB, FOR_TSTEP // Load type/hiword of for step. - if (!vk) { - | cmp RB, LJ_TISNUM; jae ->vmeta_for - } - | movsd xmm0, qword FOR_IDX - | movsd xmm1, qword FOR_STOP - if (vk) { - | addsd xmm0, qword FOR_STEP - | movsd qword FOR_IDX, xmm0 - | test RB, RB; js >3 - } else { - | jl >3 - } - | ucomisd xmm1, xmm0 - |1: - | movsd qword FOR_EXT, xmm0 - if (op == BC_FORI) { - |.if DUALNUM - | jnb <7 - |.else - | jnb >2 - | branchPC RD - |.endif - } else if (op == BC_JFORI) { - | branchPC RD - | movzx RD, PC_RD - | jnb =>BC_JLOOP - } else if (op == BC_IFORL) { - |.if DUALNUM - | jb <7 - |.else - | jb >2 - | branchPC RD - |.endif - } else { - | jnb =>BC_JLOOP - } - |.if DUALNUM - | jmp <6 - |.else - |2: - | ins_next - |.endif - | - |3: // Invert comparison if step is negative. - | ucomisd xmm0, xmm1 - | jmp <1 - break; - - case BC_ITERL: - |.if JIT - | hotloop RB - |.endif - | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | ins_AJ // RA = base, RD = target - | lea RA, [BASE+RA*8] - | mov RB, [RA+4] - | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. - if (op == BC_JITERL) { - | mov [RA-4], RB - | mov RB, [RA] - | mov [RA-8], RB - | jmp =>BC_JLOOP - } else { - | branchPC RD // Otherwise save control var + branch. - | mov RD, [RA] - | mov [RA-4], RB - | mov [RA-8], RD - } - |1: - | ins_next - break; - - case BC_LOOP: - | ins_A // RA = base, RD = target (loop extent) - | // Note: RA/RD is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop RB - |.endif - | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. - break; - - case BC_ILOOP: - | ins_A // RA = base, RD = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | ins_AD // RA = base (ignored), RD = traceno - | mov RA, [DISPATCH+DISPATCH_J(trace)] - | mov TRACE:RD, [RA+RD*4] - | mov RDa, TRACE:RD->mcode - | mov L:RB, SAVE_L - | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE - | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB - | // Save additional callee-save registers only used in compiled code. - |.if X64WIN - | mov TMPQ, r12 - | mov TMPa, r13 - | mov CSAVE_4, r14 - | mov CSAVE_3, r15 - | mov RAa, rsp - | sub rsp, 9*16+4*8 - | movdqa [RAa], xmm6 - | movdqa [RAa-1*16], xmm7 - | movdqa [RAa-2*16], xmm8 - | movdqa [RAa-3*16], xmm9 - | movdqa [RAa-4*16], xmm10 - | movdqa [RAa-5*16], xmm11 - | movdqa [RAa-6*16], xmm12 - | movdqa [RAa-7*16], xmm13 - | movdqa [RAa-8*16], xmm14 - | movdqa [RAa-9*16], xmm15 - |.elif X64 - | mov TMPQ, r12 - | mov TMPa, r13 - | sub rsp, 16 - |.endif - | jmp RDa - |.endif - break; - - case BC_JMP: - | ins_AJ // RA = unused, RD = target - | branchPC RD - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - /* - ** Reminder: A function may be called with func/args above L->maxstack, - ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, - ** too. This means all FUNC* ops (including fast functions) must check - ** for stack overflow _before_ adding more slots! - */ - - case BC_FUNCF: - |.if JIT - | hotcall RB - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 - | mov KBASE, [PC-4+PC2PROTO(k)] - | mov L:RB, SAVE_L - | lea RA, [BASE+RA*8] // Top of frame. - | cmp RA, L:RB->maxstack - | ja ->vm_growstack_f - | movzx RA, byte [PC-4+PC2PROTO(numparams)] - | cmp NARGS:RD, RA // Check for missing parameters. - | jbe >3 - |2: - if (op == BC_JFUNCF) { - | movzx RD, PC_RD - | jmp =>BC_JLOOP - } else { - | ins_next - } - | - |3: // Clear missing parameters. - | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL - | add NARGS:RD, 1 - | cmp NARGS:RD, RA - | jbe <3 - | jmp <2 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | int3 // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 - | lea RB, [NARGS:RD*8+FRAME_VARG] - | lea RD, [BASE+NARGS:RD*8] - | mov LFUNC:KBASE, [BASE-8] - | mov [RD-4], RB // Store delta + FRAME_VARG. - | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC. - | mov L:RB, SAVE_L - | lea RA, [RD+RA*8] - | cmp RA, L:RB->maxstack - | ja ->vm_growstack_v // Need to grow stack. - | mov RA, BASE - | mov BASE, RD - | movzx RB, byte [PC-4+PC2PROTO(numparams)] - | test RB, RB - | jz >2 - |1: // Copy fixarg slots up to new frame. - | add RA, 8 - | cmp RA, BASE - | jnb >3 // Less args than parameters? - | mov KBASE, [RA-8] - | mov [RD], KBASE - | mov KBASE, [RA-4] - | mov [RD+4], KBASE - | add RD, 8 - | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC). - | sub RB, 1 - | jnz <1 - |2: - if (op == BC_JFUNCV) { - | movzx RD, PC_RD - | jmp =>BC_JLOOP - } else { - | mov KBASE, [PC-4+PC2PROTO(k)] - | ins_next - } - | - |3: // Clear missing parameters. - | mov dword [RD+4], LJ_TNIL - | add RD, 8 - | sub RB, 1 - | jnz <3 - | jmp <2 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1 - | mov CFUNC:RB, [BASE-8] - | mov KBASEa, CFUNC:RB->f - | mov L:RB, SAVE_L - | lea RD, [BASE+NARGS:RD*8-8] - | mov L:RB->base, BASE - | lea RA, [RD+8*LUA_MINSTACK] - | cmp RA, L:RB->maxstack - | mov L:RB->top, RD - if (op == BC_FUNCC) { - |.if X64 - | mov CARG1d, L:RB // Caveat: CARG1d may be RA. - |.else - | mov ARG1, L:RB - |.endif - } else { - |.if X64 - | mov CARG2, KBASEa - | mov CARG1d, L:RB // Caveat: CARG1d may be RA. - |.else - | mov ARG2, KBASEa - | mov ARG1, L:RB - |.endif - } - | ja ->vm_growstack_c // Need to grow stack. - | set_vmstate C - if (op == BC_FUNCC) { - | call KBASEa // (lua_State *L) - } else { - | // (lua_State *L, lua_CFunction f) - | call aword [DISPATCH+DISPATCH_GL(wrapf)] - } - | // nresults returned in eax (RD). - | mov BASE, L:RB->base - | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB - | set_vmstate INTERP - | lea RA, [BASE+RD*8] - | neg RA - | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 - | mov PC, [BASE-4] // Fetch PC of caller. - | jmp ->vm_returnc - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - dasm_growpc(Dst, BC__MAX); - build_subroutines(ctx); - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); -#if LJ_64 -#define SZPTR "8" -#define BSZPTR "3" -#define REG_SP "0x7" -#define REG_RA "0x10" -#else -#define SZPTR "4" -#define BSZPTR "2" -#define REG_SP "0x4" -#define REG_RA "0x8" -#endif - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.long .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.long 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" - "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" - "\t.align " SZPTR "\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.long .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.long .Lframe0\n" -#if LJ_64 - "\t.quad .Lbegin\n" - "\t.quad %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ - "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ - "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ -#if LJ_NO_UNWIND - "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ - "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ -#endif -#else - "\t.long .Lbegin\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ - "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ - "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ - "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ - "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ -#endif - "\t.align " SZPTR "\n" - ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); -#if LJ_HASFFI - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.long .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.long .Lframe0\n" -#if LJ_64 - "\t.quad lj_vm_ffi_call\n" - "\t.quad %d\n" - "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ - "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ -#else - "\t.long lj_vm_ffi_call\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */ - "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ - "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */ -#endif - "\t.align " SZPTR "\n" - ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); -#endif -#if !LJ_NO_UNWIND -#if (defined(__sun__) && defined(__svr4__)) -#if LJ_64 - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); -#else - fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n"); -#endif -#else - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); -#endif - fprintf(ctx->fp, - ".Lframe1:\n" - "\t.long .LECIE1-.LSCIE1\n" - ".LSCIE1:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zPR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.uleb128 6\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.long lj_err_unwind_dwarf-.\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" - "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" - "\t.align " SZPTR "\n" - ".LECIE1:\n\n"); - fprintf(ctx->fp, - ".LSFDE2:\n" - "\t.long .LEFDE2-.LASFDE2\n" - ".LASFDE2:\n" - "\t.long .LASFDE2-.Lframe1\n" - "\t.long .Lbegin-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ -#if LJ_64 - "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ - "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ - "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ -#else - "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ - "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ - "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ - "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ -#endif - "\t.align " SZPTR "\n" - ".LEFDE2:\n\n", fcofs, CFRAME_SIZE); -#if LJ_HASFFI - fprintf(ctx->fp, - ".Lframe2:\n" - "\t.long .LECIE2-.LSCIE2\n" - ".LSCIE2:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.uleb128 1\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" - "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" - "\t.align " SZPTR "\n" - ".LECIE2:\n\n"); - fprintf(ctx->fp, - ".LSFDE3:\n" - "\t.long .LEFDE3-.LASFDE3\n" - ".LASFDE3:\n" - "\t.long .LASFDE3-.Lframe2\n" - "\t.long lj_vm_ffi_call-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ -#if LJ_64 - "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ - "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ -#else - "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */ - "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ - "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */ - "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */ -#endif - "\t.align " SZPTR "\n" - ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); -#endif -#endif - break; -#if !LJ_NO_UNWIND - /* Mental note: never let Apple design an assembler. - ** Or a linker. Or a plastic case. But I digress. - */ - case BUILD_machasm: { -#if LJ_HASFFI - int fcsize = 0; -#endif - int i; - fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); - fprintf(ctx->fp, - "EH_frame1:\n" - "\t.set L$set$x,LECIEX-LSCIEX\n" - "\t.long L$set$x\n" - "LSCIEX:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.ascii \"zPR\\0\"\n" - "\t.byte 0x1\n" - "\t.byte 128-" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.byte 6\n" /* augmentation length */ - "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ -#if LJ_64 - "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n" -#else - "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */ -#endif - "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n" - "\t.align " BSZPTR "\n" - "LECIEX:\n\n"); - for (i = 0; i < ctx->nsym; i++) { - const char *name = ctx->sym[i].name; - int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs; - if (size == 0) continue; -#if LJ_HASFFI - if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } -#endif - fprintf(ctx->fp, - "%s.eh:\n" - "LSFDE%d:\n" - "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" - "\t.long L$set$%d\n" - "LASFDE%d:\n" - "\t.long LASFDE%d-EH_frame1\n" - "\t.long %s-.\n" - "\t.long %d\n" - "\t.byte 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */ -#if LJ_64 - "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ - "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ - "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */ - "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */ -#else - "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/ - "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */ - "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */ - "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */ -#endif - "\t.align " BSZPTR "\n" - "LEFDE%d:\n\n", - name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i); - } -#if LJ_HASFFI - if (fcsize) { - fprintf(ctx->fp, - "EH_frame2:\n" - "\t.set L$set$y,LECIEY-LSCIEY\n" - "\t.long L$set$y\n" - "LSCIEY:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.ascii \"zR\\0\"\n" - "\t.byte 0x1\n" - "\t.byte 128-" SZPTR "\n" - "\t.byte " REG_RA "\n" - "\t.byte 1\n" /* augmentation length */ -#if LJ_64 - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n" -#else - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH. */ -#endif - "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n" - "\t.align " BSZPTR "\n" - "LECIEY:\n\n"); - fprintf(ctx->fp, - "_lj_vm_ffi_call.eh:\n" - "LSFDEY:\n" - "\t.set L$set$yy,LEFDEY-LASFDEY\n" - "\t.long L$set$yy\n" - "LASFDEY:\n" - "\t.long LASFDEY-EH_frame2\n" - "\t.long _lj_vm_ffi_call-.\n" - "\t.long %d\n" - "\t.byte 0\n" /* augmentation length */ -#if LJ_64 - "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */ - "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ - "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */ - "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ -#else - "\t.byte 0xe\n\t.byte 8\n" /* def_cfa_offset */ - "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/ - "\t.byte 0xd\n\t.byte 0x4\n" /* def_cfa_register ebp */ - "\t.byte 0x83\n\t.byte 0x3\n" /* offset ebx */ -#endif - "\t.align " BSZPTR "\n" - "LEFDEY:\n\n", fcsize); - } -#endif -#if !LJ_64 - fprintf(ctx->fp, - "\t.non_lazy_symbol_pointer\n" - "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" - ".indirect_symbol _lj_err_unwind_dwarf\n" - ".long 0\n\n"); - fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n"); - { - const char *const *xn; - for (xn = ctx->extnames; *xn; xn++) - if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) - fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn); - } -#endif - fprintf(ctx->fp, ".subsections_via_symbols\n"); - } - break; -#endif - default: /* Difficult for other modes. */ - break; - } -} - diff --git a/src/xb1build.bat b/src/xb1build.bat deleted file mode 100644 index 847e84a555..0000000000 --- a/src/xb1build.bat +++ /dev/null @@ -1,101 +0,0 @@ -@rem Script to build LuaJIT with the Xbox One SDK. -@rem Donated to the public domain. -@rem -@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler) -@rem Then cd to this directory and run this script. - -@if not defined INCLUDE goto :FAIL -@if not defined DurangoXDK goto :FAIL - -@setlocal -@echo ---- Host compiler ---- -@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /DLUAJIT_ENABLE_GC64 -@set LJLINK=link /nologo -@set LJMT=mt /nologo -@set DASMDIR=..\dynasm -@set DASM=%DASMDIR%\dynasm.lua -@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c - -%LJCOMPILE% host\minilua.c -@if errorlevel 1 goto :BAD -%LJLINK% /out:minilua.exe minilua.obj -@if errorlevel 1 goto :BAD -if exist minilua.exe.manifest^ - %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe - -@rem Error out for 64 bit host compiler -@minilua -@if not errorlevel 8 goto :FAIL - -@set DASMFLAGS=-D WIN -D FFI -D P64 -minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x64.dasc -@if errorlevel 1 goto :BAD - -%LJCOMPILE% /I "." /I %DASMDIR% /D_DURANGO host\buildvm*.c -@if errorlevel 1 goto :BAD -%LJLINK% /out:buildvm.exe buildvm*.obj -@if errorlevel 1 goto :BAD -if exist buildvm.exe.manifest^ - %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe - -buildvm -m peobj -o lj_vm.obj -@if errorlevel 1 goto :BAD -buildvm -m bcdef -o lj_bcdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m ffdef -o lj_ffdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m libdef -o lj_libdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m recdef -o lj_recdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m folddef -o lj_folddef.h lj_opt_fold.c -@if errorlevel 1 goto :BAD - -@echo ---- Cross compiler ---- - -@set CWD=%cd% -@call "%DurangoXDK%\xdk\DurangoVars.cmd" XDK -@cd /D "%CWD%" -@shift - -@set LJCOMPILE="cl" /nologo /c /W3 /GF /Gm- /GR- /GS- /Gy /openmp- /D_CRT_SECURE_NO_DEPRECATE /D_LIB /D_UNICODE /D_DURANGO -@set LJLIB="lib" /nologo - -@if "%1"=="debug" ( - @shift - @set LJCOMPILE=%LJCOMPILE% /Zi /MDd /Od - @set LJLINK=%LJLINK% /debug -) else ( - @set LJCOMPILE=%LJCOMPILE% /MD /O2 /DNDEBUG -) - -@if "%1"=="amalg" goto :AMALG -%LJCOMPILE% /DLUA_BUILD_AS_DLL lj_*.c lib_*.c -@if errorlevel 1 goto :BAD -%LJLIB% /OUT:luajit.lib lj_*.obj lib_*.obj -@if errorlevel 1 goto :BAD -@goto :NOAMALG -:AMALG -%LJCOMPILE% /DLUA_BUILD_AS_DLL ljamalg.c -@if errorlevel 1 goto :BAD -%LJLIB% /OUT:luajit.lib ljamalg.obj lj_vm.obj -@if errorlevel 1 goto :BAD -:NOAMALG - -@del *.obj *.manifest minilua.exe buildvm.exe -@echo. -@echo === Successfully built LuaJIT for Xbox One === - -@goto :END -:BAD -@echo. -@echo ******************************************************* -@echo *** Build FAILED -- Please check the error messages *** -@echo ******************************************************* -@goto :END -:FAIL -@echo To run this script you must open a "Visual Studio .NET Command Prompt" -@echo (64 bit host compiler). The Xbox One SDK must be installed, too. -:END diff --git a/src/xedkbuild.bat b/src/xedkbuild.bat deleted file mode 100644 index 240ec878da..0000000000 --- a/src/xedkbuild.bat +++ /dev/null @@ -1,92 +0,0 @@ -@rem Script to build LuaJIT with the Xbox 360 SDK. -@rem Donated to the public domain. -@rem -@rem Open a "Visual Studio .NET Command Prompt" (32 bit host compiler) -@rem Then cd to this directory and run this script. - -@if not defined INCLUDE goto :FAIL -@if not defined XEDK goto :FAIL - -@setlocal -@rem ---- Host compiler ---- -@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE -@set LJLINK=link /nologo -@set LJMT=mt /nologo -@set DASMDIR=..\dynasm -@set DASM=%DASMDIR%\dynasm.lua -@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c - -%LJCOMPILE% host\minilua.c -@if errorlevel 1 goto :BAD -%LJLINK% /out:minilua.exe minilua.obj -@if errorlevel 1 goto :BAD -if exist minilua.exe.manifest^ - %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe - -@rem Error out for 64 bit host compiler -@minilua -@if errorlevel 8 goto :FAIL - -@set DASMFLAGS=-D GPR64 -D FRAME32 -D PPE -D SQRT -D DUALNUM -minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_ppc.dasc -@if errorlevel 1 goto :BAD - -%LJCOMPILE% /I "." /I %DASMDIR% /D_XBOX_VER=200 /DLUAJIT_TARGET=LUAJIT_ARCH_PPC host\buildvm*.c -@if errorlevel 1 goto :BAD -%LJLINK% /out:buildvm.exe buildvm*.obj -@if errorlevel 1 goto :BAD -if exist buildvm.exe.manifest^ - %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe - -buildvm -m peobj -o lj_vm.obj -@if errorlevel 1 goto :BAD -buildvm -m bcdef -o lj_bcdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m ffdef -o lj_ffdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m libdef -o lj_libdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m recdef -o lj_recdef.h %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB% -@if errorlevel 1 goto :BAD -buildvm -m folddef -o lj_folddef.h lj_opt_fold.c -@if errorlevel 1 goto :BAD - -@rem ---- Cross compiler ---- -@set LJCOMPILE="%XEDK%\bin\win32\cl" /nologo /c /MT /O2 /W3 /GF /Gm- /GR- /GS- /Gy /openmp- /D_CRT_SECURE_NO_DEPRECATE /DNDEBUG /D_XBOX /D_LIB /DLUAJIT_USE_SYSMALLOC -@set LJLIB="%XEDK%\bin\win32\lib" /nologo -@set "INCLUDE=%XEDK%\include\xbox" - -@if "%1" neq "debug" goto :NODEBUG -@shift -@set "LJCOMPILE=%LJCOMPILE% /Zi" -:NODEBUG -@if "%1"=="amalg" goto :AMALG -%LJCOMPILE% /DLUA_BUILD_AS_DLL lj_*.c lib_*.c -@if errorlevel 1 goto :BAD -%LJLIB% /OUT:luajit20.lib lj_*.obj lib_*.obj -@if errorlevel 1 goto :BAD -@goto :NOAMALG -:AMALG -%LJCOMPILE% /DLUA_BUILD_AS_DLL ljamalg.c -@if errorlevel 1 goto :BAD -%LJLIB% /OUT:luajit20.lib ljamalg.obj lj_vm.obj -@if errorlevel 1 goto :BAD -:NOAMALG - -@del *.obj *.manifest minilua.exe buildvm.exe -@echo. -@echo === Successfully built LuaJIT for Xbox 360 === - -@goto :END -:BAD -@echo. -@echo ******************************************************* -@echo *** Build FAILED -- Please check the error messages *** -@echo ******************************************************* -@goto :END -:FAIL -@echo To run this script you must open a "Visual Studio .NET Command Prompt" -@echo (32 bit host compiler). The Xbox 360 SDK must be installed, too. -:END diff --git a/test-capi.nix b/test-capi.nix new file mode 100644 index 0000000000..fb5cba2edf --- /dev/null +++ b/test-capi.nix @@ -0,0 +1,21 @@ +{ pkgs, raptorjit, ... }: +with pkgs; + +let lpty = + fetchurl rec { + url = "http://tset.de/downloads/lpty-1.2.2-1.tar.gz"; + sha256 = "071mvz79wi9vr6hvrnb1rv19lqp1bh2fi742zkpv2sm1r9gy5rav"; + }; +in + +stdenv.mkDerivation { + name = "test-capi"; + src = lpty; + phases = "unpackPhase buildPhase testPhase"; + buildInputs = [ luajit raptorjit which ]; + testPhase = '' + ${raptorjit}/bin/raptorjit -e 'require "lpty" print("Successfully loaded a C library.")' \ + > $out + ''; +} + diff --git a/test-libraptorjit.nix b/test-libraptorjit.nix new file mode 100644 index 0000000000..fe16cdb86f --- /dev/null +++ b/test-libraptorjit.nix @@ -0,0 +1,18 @@ +# Test that libraptorjit can be dynamically linked. +{ pkgs, raptorjit, ... }: +with pkgs; with stdenv; + +# Trivial C program to dynamically link with raptorjit. +let csrc = writeScript "test.c" '' + #include + int main(int argc, char **argv) { + printf("dynamically linked executable worked ok!\n"); + return 0; + } +''; in + +runCommand "test-libraptorjit" { nativeBuildInputs = [ gcc raptorjit ]; } '' + gcc -lraptorjit-5.1 -o ./test ${csrc} + ./test | tee $out +'' + diff --git a/test.nix b/test.nix new file mode 100644 index 0000000000..bf4338827c --- /dev/null +++ b/test.nix @@ -0,0 +1,19 @@ +{ pkgs, raptorjit, source, name, args }: + +pkgs.stdenv.mkDerivation { + name = "test-${name}"; + src = source; + buildInputs = [ raptorjit ]; + phases = "unpackPhase buildPhase"; + buildPhase = '' + mkdir $out + cd testsuite/test + echo "Running testsuite with ${args} and output to $out/log.txt" + raptorjit ${args} test.lua 2>&1 > $out/log.txt + result=$? + echo -n "*** TEST RESULTS (${args}): " + tail -1 $out/log.txt + exit $result + ''; +} + diff --git a/testsuite/README b/testsuite/README new file mode 100644 index 0000000000..5258c4df27 --- /dev/null +++ b/testsuite/README @@ -0,0 +1,77 @@ +******************************************** +** THIS IS NOT THE TEST SUITE FOR LUAJIT! ** +******************************************** + +In fact it doesn't even have the steps to build it or run it, +so please don't complain. + +This repo is a place to collect and cleanup tests for LuaJIT. +They should eventually be merged into the main LuaJIT repo. + +It's definitely not in the best state and needs a serious +cleanup effort. Sorry. + + +Many issues need to be resolved before the merge can be performed: + +- Choose a portable test runner + Requirement: very few dependencies, possibly Lua/Shell only + +- Minimal test runner library, wherever assert() is not enough + +- Debugging test failures is a lot simpler, when individual tests can still + be run from the LuaJIT command line without any big dependencies + +- Define consistent grouping of all tests + +- Define consistent naming of all tests + +- Split everything into a lot of tiny tests + +- Reduce time taken to run the test suite + Separate tiers, parallelized testing + +- Some tests can only run under certain configurations (e.g. FFI) + +- Some tests need a clean slate to give reproducible results + Most others should be run from the same state for performance resons + +- Hard to check that the JIT compiler actually generates the intended code + Maybe use a test matching variant of the jit.dump module + +- Portability concerns + +- Avoiding undefined behavior in tests or ignoring it + +- Matrix of architectures + configuration options that need testing + +- Merge tests from other sources, e.g. the various Lua test suites. + +- Tests should go into the LuaJIT git repo, but in separate tarballs + for the releases + + +There are some benchmarks, too: + +- Some of the benchmarks can be used as tests (with low scaling) + by checksumming their output and comparing against known good results + +- Most benchmarks need different scalings to be useful for comparison + on all architectures + + +Note from Mike Pall: + +I've removed all tests of undeterminable origin or that weren't explicitly +contributed with the intention of being part of a public test suite. + +I hereby put all Lua/LuaJIT tests and benchmarks that I wrote under the +public domain. I've removed any copyright headers. + +If I've forgotten an attribution or you want your contributed test to be +removed, please open an issue. + +There are some benchmarks that bear other copyrights, probably public +domain, BSD or MIT licensed. If the status cannot be determined, they +need to be replaced or removed before merging with the LuaJIT repo. + diff --git a/testsuite/bench/PARAM_x86.txt b/testsuite/bench/PARAM_x86.txt new file mode 100644 index 0000000000..87088d7b13 --- /dev/null +++ b/testsuite/bench/PARAM_x86.txt @@ -0,0 +1,29 @@ +array3d 300 +binary-trees 16 +chameneos 1e7 +coroutine-ring 2e7 +euler14-bit 2e7 +fannkuch 11 +fasta 25e6 +k-nucleotide 5e6 FASTA_5000000 +life +mandelbrot 5000 +mandelbrot-bit 5000 +md5 20000 +nbody 5e6 +nsieve 12 +nsieve-bit 12 +nsieve-bit-fp 12 +partialsums 1e7 +pidigits-nogmp 5000 +ray 9 +recursive-ack 10 +recursive-fib 40 +revcomp 5e6 FASTA_5000000 +scimark-fft 50000 +scimark-lu 5000 +scimark-sor 50000 +scimark-sparse 15e4 +series 10000 +spectral-norm 3000 +sum-file 5000 SUMCOL_5000 diff --git a/testsuite/bench/PARAM_x86_CI.txt b/testsuite/bench/PARAM_x86_CI.txt new file mode 100644 index 0000000000..5f8752c23b --- /dev/null +++ b/testsuite/bench/PARAM_x86_CI.txt @@ -0,0 +1,26 @@ +array3d 500 +binary-trees 16 +chameneos 1e7 +coroutine-ring 5e7 +euler14-bit 2e7 +fannkuch 11 +fasta 5e6 +life +mandelbrot 5000 +mandelbrot-bit 5000 +md5 30000 +nbody 8e6 +nsieve 12 +nsieve-bit 13 +nsieve-bit-fp 12 +partialsums 3e7 +pidigits-nogmp 5000 +ray 9 +recursive-fib 40 +scimark-fft 50000 +scimark-lu 5000 +scimark-sor 50000 +scimark-sparse 15e4 +series 10000 +spectral-norm 3000 +roulette diff --git a/testsuite/bench/SUMCOL_1.txt b/testsuite/bench/SUMCOL_1.txt new file mode 100644 index 0000000000..956aba1447 --- /dev/null +++ b/testsuite/bench/SUMCOL_1.txt @@ -0,0 +1,1000 @@ +276 +498 +-981 +770 +-401 +702 +966 +950 +-853 +-53 +-293 +604 +288 +892 +-697 +204 +96 +408 +880 +-7 +-817 +422 +-261 +-485 +-77 +826 +184 +864 +-751 +626 +812 +-369 +-353 +-371 +488 +-83 +-659 +24 +524 +-21 +840 +-757 +-17 +-973 +-843 +260 +858 +-389 +-521 +-99 +482 +-561 +-213 +630 +766 +932 +112 +-419 +-877 +762 +266 +-837 +170 +834 +746 +764 +922 +-89 +576 +-63 +90 +684 +316 +506 +-959 +708 +70 +252 +-747 +342 +-593 +-895 +-937 +-707 +350 +588 +-201 +-683 +-113 +-511 +-867 +322 +202 +472 +150 +-9 +-643 +28 +336 +86 +-925 +836 +-473 +-451 +-971 +-805 +-619 +84 +-67 +806 +270 +366 +334 +-555 +-557 +-331 +-409 +-553 +-145 +-71 +528 +490 +492 +828 +628 +-961 +536 +-859 +-271 +974 +-671 +-749 +414 +-257 +778 +56 +598 +-437 +-899 +-785 +-987 +32 +-999 +132 +-821 +-209 +402 +-543 +194 +-967 +294 +-943 +-285 +-483 +-97 +660 +-481 +-829 +-309 +-597 +-855 +80 +-355 +192 +-823 +436 +916 +282 +-629 +612 +-329 +-535 +780 +-47 +706 +110 +756 +-857 +-933 +-345 +-523 +718 +-31 +902 +678 +540 +698 +456 +-399 +126 +412 +-563 +-321 +-487 +-641 +-195 +-199 +-955 +772 +570 +18 +-217 +886 +984 +-721 +-995 +46 +-989 +946 +64 +716 +-719 +-869 +-579 +776 +450 +936 +980 +-439 +-977 +-455 +-997 +6 +268 +-269 +-421 +328 +352 +578 +-575 +476 +976 +-57 +-469 +544 +582 +-43 +510 +-939 +-581 +-337 +-203 +-737 +-827 +852 +-279 +-803 +-911 +-865 +548 +48 +-75 +416 +-275 +688 +-255 +-687 +-461 +-233 +420 +912 +-901 +-299 +12 +568 +694 +-411 +-883 +-327 +-361 +-339 +646 +-137 +-905 +670 +686 +-131 +-849 +-825 +256 +228 +-841 +68 +368 +-909 +242 +298 +118 +10 +222 +954 +-493 +-459 +-445 +608 +-765 +34 +468 +-715 +690 +-185 +-551 +-571 +-241 +292 +92 +768 +-923 +956 +614 +8 +730 +208 +-417 +300 +136 +-59 +-251 +-539 +166 +798 +866 +454 +-391 +-317 +668 +502 +-15 +994 +854 +-189 +666 +446 +-565 +-5 +42 +-227 +-87 +-779 +26 +312 +354 +754 +396 +-515 +220 +872 +654 +88 +-667 +250 +572 +952 +72 +982 +972 +-529 +-471 +-533 +-427 +538 +154 +-457 +-819 +750 +152 +452 +-41 +838 +-489 +418 +-649 +-637 +-197 +74 +394 +-653 +-727 +-435 +-23 +348 +638 +-611 +914 +-357 +-743 +-685 +580 +-247 +-577 +54 +-931 +-3 +558 +-793 +-443 +-759 +162 +-811 +384 +720 +-117 +900 +-519 +-39 +744 +432 +286 +-873 +380 +-167 +-283 +430 +-155 +-755 +206 +100 +364 +-677 +332 +-567 +382 +-605 +-181 +676 +-475 +-845 +910 +546 +14 +398 +616 +-769 +424 +992 +-235 +-239 +774 +478 +-919 +168 +-771 +-773 +-69 +-509 +930 +550 +-463 +178 +-861 +-761 +-795 +234 +-831 +-61 +-979 +-851 +-665 +-709 +896 +742 +-123 +590 +-693 +-887 +-379 +144 +-717 +20 +174 +82 +464 +30 +-969 +-349 +-531 +-799 +-661 +-647 +-623 +878 +148 +-545 +238 +-259 +554 +726 +-37 +-797 +98 +78 +-591 +-975 +962 +120 +906 +-207 +656 +-171 +652 +188 +672 +-133 +-91 +224 +818 +-333 +-839 +-499 +22 +-739 +142 +378 +-403 +-315 +370 +284 +122 +230 +-527 +-127 +442 +534 +160 +722 +262 +-657 +304 +258 +-103 +960 +-495 +-265 +634 +-101 +480 +-363 +308 +76 +-949 +-585 +904 +146 +-703 +164 +850 +246 +732 +-725 +566 +274 +-163 +-935 +-681 +-229 +254 +-733 +-547 +-273 +-903 +736 +-711 +794 +392 +-655 +-549 +808 +-429 +484 +-701 +-617 +804 +36 +-775 +-335 +-927 +714 +-177 +-325 +-413 +-963 +114 +-253 +-789 +-645 +40 +434 +898 +924 +-19 +738 +788 +280 +-121 +594 +-913 +426 +816 +-373 +-45 +340 +-109 +-323 +58 +-249 +940 +-297 +988 +998 +-607 +-745 +-633 +-115 +996 +-893 +696 +400 +848 +500 +-263 +562 +-807 +-105 +-603 +658 +-73 +-863 +448 +680 +-157 +-161 +728 +814 +-477 +-375 +1000 +-631 +-991 +362 +156 +-187 +-705 +-917 +-449 +-741 +556 +440 +-589 +-11 +-359 +-891 +-801 +-153 +-381 +938 +-173 +-243 +618 +-599 +-497 +486 +128 +790 +460 +-27 +-305 +-205 +-215 +324 +-341 +50 +458 +52 +-621 +874 +386 +560 +-569 +-51 +802 +786 +920 +-425 +466 +444 +-507 +-915 +346 +622 +-679 +784 +-689 +388 +508 +-613 +-313 +-447 +564 +-897 +-211 +-225 +-615 +-367 +186 +894 +-65 +-453 +-245 +602 +496 +-651 +-601 +820 +226 +-695 +-119 +372 +180 +94 +214 +542 +648 +-871 +592 +584 +824 +796 +374 +-945 +-311 +516 +942 +-221 +-433 +200 +-465 +-953 +870 +868 +-879 +518 +356 +-223 +682 +990 +-191 +-541 +-951 +-921 +-319 +-169 +-291 +-289 +792 +876 +306 +-491 +326 +-885 +62 +514 +-929 +318 +-231 +632 +44 +-107 +644 +-267 +-343 +-847 +934 +734 +-505 +-351 +574 +-627 +636 +-93 +-431 +-835 +428 +-183 +-151 +2 +-813 +-595 +958 +-141 +692 +-385 +610 +-179 +376 +948 +198 +-675 +964 +-907 +918 +-165 +-1 +406 +748 +-111 +532 +-55 +-281 +740 +504 +236 +-29 +662 +-713 +-537 +196 +-587 +822 +-135 +700 +-35 +674 +-407 +240 +-673 +-669 +-393 +470 +-525 +-875 +-383 +-625 +296 +-85 +-147 +-277 +800 +-691 +-143 +16 +-983 +-303 +290 +-139 +172 +320 +512 +596 +640 +664 +-791 +-783 +-387 +-735 +-467 +-301 +810 +134 +216 +278 +176 +606 +140 +-787 +978 +586 +890 +882 +-753 +-13 +970 +-941 +-175 +-777 +-809 +-441 +-347 +-377 +390 +-423 +842 +642 +190 +302 +438 +704 +310 +-49 +124 +-781 +-287 +724 +-767 +830 +620 +-295 +244 +-159 +-307 +-397 +66 +-237 +314 +-79 +624 +710 +272 +-365 +928 +856 +138 +-479 +520 +832 +862 +760 +846 +-81 +106 +-513 +-193 +650 +782 +-517 +944 +218 +712 +-663 +-559 +462 +-635 +-25 +182 +530 +844 +330 +-833 +102 +-881 +108 +-947 +-763 +-405 +232 +410 +104 +-729 +-149 +-889 +888 +360 +968 +908 +116 +-815 +-129 +522 +-723 +-993 +860 +-503 +926 +-219 +-415 +60 +158 +-609 +-501 +986 +-699 +-583 +884 +212 +210 +-957 +526 +-985 +552 +344 +-395 +-95 +338 +248 +494 +130 +404 +358 +600 +-639 +-125 +-33 +-965 +752 +474 +-731 +758 +-573 +4 +38 +264 diff --git a/testsuite/bench/TEST_md5sum.txt b/testsuite/bench/TEST_md5sum.txt new file mode 100644 index 0000000000..15aa8a1c8f --- /dev/null +++ b/testsuite/bench/TEST_md5sum.txt @@ -0,0 +1,20 @@ +binarytrees 10 7202f4e13df7abc5ad8c07f05fe9d644 +chameneos 1e5 a629ce12f63050c6656bce175258cf8f +cheapconcr 1000 d29799d1e263810a4db7bbf43ca66499 +cheapconcw 1000 d29799d1e263810a4db7bbf43ca66499 +fannkuch 8 51e5e372cbc5471ea8940b20ad782319 +fasta 1e5 78cd327de6f0a5667da0aa9349888279 +knucleotide x 88efb24c1fed533959ed84bb32c88142 = 0 and x < self.nx, "x outside PA") + assert(y >= 0 and y < self.ny, "y outside PA") + assert(z >= 0 and z < self.nz, "z outside PA") + local pos = (z*self.ny + y)*self.nx + x + local image = self.image + if self.packed then + local maxv = self.max_voltage + if p > maxv then self.max_voltage = p*2.0 end + local oldp = image[pos] or 0.0 -- Works with uninitialized table, too + if oldp > maxv then p = p + maxv*2.0 end + image[pos] = p + else + image[pos] = p + end + self.changed = true + self.changed_recently = true +end + +local function array_points(self) + local y, z = 0, 0 + return function(self, x) + x = x + 1 + if x >= self.nx then + x = 0 + y = y + 1 + if y >= self.ny then + y = 0 + z = z + 1 + if z >= self.nz then + return nil, nil, nil + end + end + end + return x, y, z + end, self, 0 +end + +local function array_new(nx, ny, nz, packed) + return { + nx = nx, ny = ny, nz = nz, + packed = packed, max_voltage = 0.0, + changed = false, changed_recently = false, + image = {}, -- Preferably use a fixed-type, pre-sized array here. + set = array_set, + points = array_points, + } +end + +local dim = tonumber(arg and arg[1]) or 300 -- Array dimension dim^3 +local packed = arg and arg[2] == "packed" -- Packed image or flat +local arr = array_new(dim, dim, dim, packed) + +for x,y,z in arr:points() do + arr:set(x, y, z, x*x) +end +assert(arr.image[dim^3-1] == (dim-1)^2) + diff --git a/testsuite/bench/bench.R b/testsuite/bench/bench.R new file mode 100644 index 0000000000..e76382bf15 --- /dev/null +++ b/testsuite/bench/bench.R @@ -0,0 +1,49 @@ +# R subroutines for reading and visualizing benchmark results. + +suppressPackageStartupMessages({ + library(dplyr) + library(ggplot2) +}) + +## R library routines for analyzing benchmark results +bench.read <- function(filename) { + data <- read.csv(filename) + ## baseline is the mean performance of the "A" version + baseline <- data %>% + filter(letter=="A") %>% + group_by(benchmark) %>% + summarize(baseline = mean(cycles)) + ## Add 'relative' performance column: compared to mean from baseline branch + relative <- data %>% + left_join(baseline, by="benchmark") %>% + group_by(benchmark, version) %>% + mutate(relative = first(baseline) / cycles) + return(relative) +} + +## Jitter plot faceted by benchmark +bench.jitterplot <- function(data) { + ggplot(aes(y=relative, x=version, color=version), data=data) + + geom_jitter(shape=1, alpha=0.5) + + scale_y_continuous(breaks=seq(0, 3, 0.1), labels=scales::percent) + + theme(aspect.ratio = 1) + + theme(axis.text.x = element_text(angle=90)) + + ylab("Performance relative to baseline average") + + ggtitle("Comparative performance between RaptorJIT versions") + + facet_wrap(~ benchmark, scales="free_x") +} + +## ECDF plot faceted by benchmark +bench.ecdfplot <- function(data) { + ggplot(aes(x=relative, color=version), data=data) + + stat_ecdf() + + scale_x_continuous(labels=scales::percent) + + scale_y_log10(labels=scales::percent) + + theme(aspect.ratio = 1) + + theme(axis.text.x = element_text(angle=90)) + + ylab("Performance relative to baseline average") + + xlab("Percentage of results at or above this performance level") + + ggtitle("Comparative performance between RaptorJIT variants") + + facet_wrap(~ benchmark) +} + diff --git a/testsuite/bench/binary-trees.lua b/testsuite/bench/binary-trees.lua new file mode 100644 index 0000000000..bf0404666f --- /dev/null +++ b/testsuite/bench/binary-trees.lua @@ -0,0 +1,47 @@ + +local function BottomUpTree(item, depth) + if depth > 0 then + local i = item + item + depth = depth - 1 + local left, right = BottomUpTree(i-1, depth), BottomUpTree(i, depth) + return { item, left, right } + else + return { item } + end +end + +local function ItemCheck(tree) + if tree[2] then + return tree[1] + ItemCheck(tree[2]) - ItemCheck(tree[3]) + else + return tree[1] + end +end + +local N = tonumber(arg and arg[1]) or 0 +local mindepth = 4 +local maxdepth = mindepth + 2 +if maxdepth < N then maxdepth = N end + +do + local stretchdepth = maxdepth + 1 + local stretchtree = BottomUpTree(0, stretchdepth) + io.write(string.format("stretch tree of depth %d\t check: %d\n", + stretchdepth, ItemCheck(stretchtree))) +end + +local longlivedtree = BottomUpTree(0, maxdepth) + +for depth=mindepth,maxdepth,2 do + local iterations = 2 ^ (maxdepth - depth + mindepth) + local check = 0 + for i=1,iterations do + check = check + ItemCheck(BottomUpTree(1, depth)) + + ItemCheck(BottomUpTree(-1, depth)) + end + io.write(string.format("%d\t trees of depth %d\t check: %d\n", + iterations*2, depth, check)) +end + +io.write(string.format("long lived tree of depth %d\t check: %d\n", + maxdepth, ItemCheck(longlivedtree))) diff --git a/testsuite/bench/chameneos.lua b/testsuite/bench/chameneos.lua new file mode 100644 index 0000000000..78b64c3f3b --- /dev/null +++ b/testsuite/bench/chameneos.lua @@ -0,0 +1,68 @@ + +local co = coroutine +local create, resume, yield = co.create, co.resume, co.yield + +local N = tonumber(arg and arg[1]) or 10 +local first, second + +-- Meet another creature. +local function meet(me) + while second do yield() end -- Wait until meeting place clears. + local other = first + if other then -- Hey, I found a new friend! + first = nil + second = me + else -- Sniff, nobody here (yet). + local n = N - 1 + if n < 0 then return end -- Uh oh, the mall is closed. + N = n + first = me + repeat yield(); other = second until other -- Wait for another creature. + second = nil + yield() -- Be nice and let others meet up. + end + return other +end + +-- Create a very social creature. +local function creature(color) + return create(function() + local me = color + for met=0,1000000000 do + local other = meet(me) + if not other then return met end + if me ~= other then + if me == "blue" then me = other == "red" and "yellow" or "red" + elseif me == "red" then me = other == "blue" and "yellow" or "blue" + else me = other == "blue" and "red" or "blue" end + end + end + end) +end + +-- Trivial round-robin scheduler. +local function schedule(threads) + local resume = resume + local nthreads, meetings = #threads, 0 + repeat + for i=1,nthreads do + local thr = threads[i] + if not thr then return meetings end + local ok, met = resume(thr) + if met then + meetings = meetings + met + threads[i] = nil + end + end + until false +end + +-- A bunch of colorful creatures. +local threads = { + creature("blue"), + creature("red"), + creature("yellow"), + creature("blue"), +} + +io.write(schedule(threads), "\n") diff --git a/testsuite/bench/coroutine-ring.lua b/testsuite/bench/coroutine-ring.lua new file mode 100644 index 0000000000..1e8c5ef688 --- /dev/null +++ b/testsuite/bench/coroutine-ring.lua @@ -0,0 +1,42 @@ +-- The Computer Language Benchmarks Game +-- http://shootout.alioth.debian.org/ +-- contributed by Sam Roberts +-- reviewed by Bruno Massa + +local n = tonumber(arg and arg[1]) or 2e7 + +-- fixed size pool +local poolsize = 503 +local threads = {} + +-- cache these to avoid global environment lookups +local create = coroutine.create +local resume = coroutine.resume +local yield = coroutine.yield + +local id = 1 +local token = 0 +local ok + +local body = function(token) + while true do + token = yield(token + 1) + end +end + +-- create all threads +for id = 1, poolsize do + threads[id] = create(body) +end + +-- send the token +repeat + if id == poolsize then + id = 1 + else + id = id + 1 + end + ok, token = resume(threads[id], token) +until token == n + +io.write(id, "\n") diff --git a/testsuite/bench/default.nix b/testsuite/bench/default.nix new file mode 100644 index 0000000000..717fd94a38 --- /dev/null +++ b/testsuite/bench/default.nix @@ -0,0 +1,103 @@ +# Run a large parallel benchmark campaign and generate R/ggplot2 reports. + +{ pkgs ? (import ../../pkgs.nix) {}, + Asrc, Aname ? "A", Aargs ? "", + Bsrc ? null, Bname ? "B", Bargs ? "", + Csrc ? null, Cname ? "C", Cargs ? "", + Dsrc ? null, Dname ? "D", Dargs ? "", + Esrc ? null, Ename ? "E", Eargs ? "", + hardware ? null, + runs ? 30 }: + +with pkgs; +with stdenv; + +# Derivation to run benchmarks and produce a CSV result. +let benchmark = letter: name: src: args: run: + let raptorjit = (import src {inherit pkgs; version = name;}).raptorjit; in + mkDerivation { + name = "benchmark-${name}-${toString run}"; + src = pkgs.lib.cleanSource ./.; + # Force consistent hardware + requiredSystemFeatures = if hardware != null then [hardware] else []; + buildInputs = [ raptorjit linuxPackages.perf utillinux ]; + buildPhase = '' + # Run multiple iterations of the benchmarks + echo "Run $run" + mkdir -p result/$run + # Run each individual benchmark + cat PARAM_x86_CI.txt | + (while read benchmark params; do + echo "running $benchmark" + # Execute with performance monitoring & time supervision + # Note: discard stdout due to overwhelming output + timeout -sKILL 60 \ + perf stat -x, -o result/$run/$benchmark.perf \ + raptorjit ${args} -e "math.randomseed(${toString run})" $benchmark.lua $params \ + > /dev/null || \ + rm result/$run/$benchmark.perf + done) + ''; + installPhase = '' + # Copy the raw perf output for reference + cp -r result $out + # Log the exact CPU + lscpu > $out/cpu.txt + # Create a CSV file + # Create the rows based on the perf logs + for result in result/*.perf; do + version=${name} + benchmark=$(basename -s.perf -a $result) + instructions=$(awk -F, -e '$3 ~ "^instructions" { print $1; }' $result) + cycles=$( awk -F, -e '$3 ~ "^cycles" { print $1; }' $result) + echo ${letter},$version,$benchmark,${toString run},$instructions,$cycles >> $out/bench.csv + done + ''; + }; + +# Run a set of benchmarks and aggregate the results into a CSV file. +# Each benchmark run is a separate derivation. This allows nix to +# parallelize and distribute the benchmarking. + benchmarkSet = letter: name: src: args: + let benchmarks = map (benchmark letter name src args) (pkgs.lib.range 1 runs); + in + runCommand "benchmarks-${name}" { buildInputs = benchmarks; } '' + source $stdenv/setup + mkdir -p $out + for dir in ${pkgs.lib.fold (acc: x: "${acc} ${x}") "" benchmarks}; do + cat $dir/bench.csv >> $out/bench.csv + done + ''; + + benchA = (benchmarkSet "A" Aname Asrc Aargs); + benchB = if Bsrc != null then (benchmarkSet "B" Bname Bsrc Bargs) else ""; + benchC = if Csrc != null then (benchmarkSet "C" Cname Csrc Cargs) else ""; + benchD = if Dsrc != null then (benchmarkSet "D" Dname Dsrc Dargs) else ""; + benchE = if Esrc != null then (benchmarkSet "E" Ename Esrc Eargs) else ""; +in + +rec { + benchmarkResults = mkDerivation { + name = "benchmark-results"; + buildInputs = with pkgs.rPackages; [ pkgs.R ggplot2 dplyr ]; + builder = pkgs.writeText "builder.csv" '' + source $stdenv/setup + # Get the CSV file + mkdir -p $out/nix-support + echo "letter,version,benchmark,run,instructions,cycles" > bench.csv + cat ${benchA}/bench.csv >> bench.csv + [ -n "${benchB}" ] && cat ${benchB}/bench.csv >> bench.csv + [ -n "${benchC}" ] && cat ${benchC}/bench.csv >> bench.csv + [ -n "${benchD}" ] && cat ${benchD}/bench.csv >> bench.csv + [ -n "${benchE}" ] && cat ${benchE}/bench.csv >> bench.csv + cp bench.csv $out + echo "file CSV $out/bench.csv" >> $out/nix-support/hydra-build-products + # Generate the report + (cd ${./.}; Rscript ./generate.R $out/bench.csv $out) + for png in $out/*.png; do + echo "file PNG $png" >> $out/nix-support/hydra-build-products + done + ''; + }; +} + diff --git a/testsuite/bench/euler14-bit.lua b/testsuite/bench/euler14-bit.lua new file mode 100644 index 0000000000..537f2bf322 --- /dev/null +++ b/testsuite/bench/euler14-bit.lua @@ -0,0 +1,22 @@ + +local bit = require("bit") +local bnot, bor, band = bit.bnot, bit.bor, bit.band +local shl, shr = bit.lshift, bit.rshift + +local N = tonumber(arg and arg[1]) or 10000000 +local cache, m, n = { 1 }, 1, 1 +if arg and arg[2] then cache = nil end +for i=2,N do + local j = i + for len=1,1000000000 do + j = bor(band(shr(j,1), band(j,1)-1), band(shl(j,1)+j+1, bnot(band(j,1)-1))) + if cache then + local x = cache[j]; if x then j = x+len; break end + elseif j == 1 then + j = len+1; break + end + end + if cache then cache[i] = j end + if j > m then m, n = j, i end +end +io.write("Found ", n, " (chain length: ", m, ")\n") diff --git a/testsuite/bench/fannkuch.lua b/testsuite/bench/fannkuch.lua new file mode 100644 index 0000000000..2a4cd4267a --- /dev/null +++ b/testsuite/bench/fannkuch.lua @@ -0,0 +1,50 @@ + +local function fannkuch(n) + local p, q, s, odd, check, maxflips = {}, {}, {}, true, 0, 0 + for i=1,n do p[i] = i; q[i] = i; s[i] = i end + repeat + -- Print max. 30 permutations. + if check < 30 then + if not p[n] then return maxflips end -- Catch n = 0, 1, 2. + io.write(unpack(p)); io.write("\n") + check = check + 1 + end + -- Copy and flip. + local q1 = p[1] -- Cache 1st element. + if p[n] ~= n and q1 ~= 1 then -- Avoid useless work. + for i=2,n do q[i] = p[i] end -- Work on a copy. + local flips = 1 -- Flip ... + while true do + local qq = q[q1] + if qq == 1 then -- ... until 1st element is 1. + if flips > maxflips then maxflips = flips end -- New maximum? + break + end + q[q1] = q1 + if q1 >= 4 then + local i, j = 2, q1 - 1 + repeat q[i], q[j] = q[j], q[i]; i = i + 1; j = j - 1; until i >= j + end + q1 = qq + flips=flips+1 + end + end + -- Permute. + if odd then + p[2], p[1] = p[1], p[2]; odd = false -- Rotate 1<-2. + else + p[2], p[3] = p[3], p[2]; odd = true -- Rotate 1<-2 and 1<-2<-3. + for i=3,n do + local sx = s[i] + if sx ~= 1 then s[i] = sx-1; break end + if i == n then return maxflips end -- Out of permutations. + s[i] = i + -- Rotate 1<-...<-i+1. + local t=p[1]; for j=i+1,1,-1 do p[j],t=t,p[j] end + end + end + until false +end + +local n = tonumber(arg and arg[1]) or 1 +io.write("Pfannkuchen(", n, ") = ", fannkuch(n), "\n") diff --git a/testsuite/bench/fasta.lua b/testsuite/bench/fasta.lua new file mode 100644 index 0000000000..7ce6080411 --- /dev/null +++ b/testsuite/bench/fasta.lua @@ -0,0 +1,95 @@ + +local Last = 42 +local function random(max) + local y = (Last * 3877 + 29573) % 139968 + Last = y + return (max * y) / 139968 +end + +local function make_repeat_fasta(id, desc, s, n) + local write, sub = io.write, string.sub + write(">", id, " ", desc, "\n") + local p, sn, s2 = 1, #s, s..s + for i=60,n,60 do + write(sub(s2, p, p + 59), "\n") + p = p + 60; if p > sn then p = p - sn end + end + local tail = n % 60 + if tail > 0 then write(sub(s2, p, p + tail-1), "\n") end +end + +local function make_random_fasta(id, desc, bs, n) + io.write(">", id, " ", desc, "\n") + loadstring([=[ + local write, char, unpack, n, random = io.write, string.char, unpack, ... + local buf, p = {}, 1 + for i=60,n,60 do + for j=p,p+59 do ]=]..bs..[=[ end + buf[p+60] = 10; p = p + 61 + if p >= 2048 then write(char(unpack(buf, 1, p-1))); p = 1 end + end + local tail = n % 60 + if tail > 0 then + for j=p,p+tail-1 do ]=]..bs..[=[ end + p = p + tail; buf[p] = 10; p = p + 1 + end + write(char(unpack(buf, 1, p-1))) + ]=], desc)(n, random) +end + +local function bisect(c, p, lo, hi) + local n = hi - lo + if n == 0 then return "buf[j] = "..c[hi].."\n" end + local mid = math.floor(n / 2) + return "if r < "..p[lo+mid].." then\n"..bisect(c, p, lo, lo+mid).. + "else\n"..bisect(c, p, lo+mid+1, hi).."end\n" +end + +local function make_bisect(tab) + local c, p, sum = {}, {}, 0 + for i,row in ipairs(tab) do + c[i] = string.byte(row[1]) + sum = sum + row[2] + p[i] = sum + end + return "local r = random(1)\n"..bisect(c, p, 1, #tab) +end + +local alu = + "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG".. + "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA".. + "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT".. + "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA".. + "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG".. + "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC".. + "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA" + +local iub = make_bisect{ + { "a", 0.27 }, + { "c", 0.12 }, + { "g", 0.12 }, + { "t", 0.27 }, + { "B", 0.02 }, + { "D", 0.02 }, + { "H", 0.02 }, + { "K", 0.02 }, + { "M", 0.02 }, + { "N", 0.02 }, + { "R", 0.02 }, + { "S", 0.02 }, + { "V", 0.02 }, + { "W", 0.02 }, + { "Y", 0.02 }, +} + +local homosapiens = make_bisect{ + { "a", 0.3029549426680 }, + { "c", 0.1979883004921 }, + { "g", 0.1975473066391 }, + { "t", 0.3015094502008 }, +} + +local N = tonumber(arg and arg[1]) or 1000 +make_repeat_fasta('ONE', 'Homo sapiens alu', alu, N*2) +make_random_fasta('TWO', 'IUB ambiguity codes', iub, N*3) +make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, N*5) diff --git a/testsuite/bench/generate.R b/testsuite/bench/generate.R new file mode 100755 index 0000000000..fb43f01b1f --- /dev/null +++ b/testsuite/bench/generate.R @@ -0,0 +1,25 @@ +#!/usr/bin/env nix-shell +#!nix-shell -i Rscript -p R rpkgs.dplyr rpkgs.ggplot2 + +# R command-line program for making visualizations from benchmark results. + +suppressWarnings(source("bench.R")) + +args <- commandArgs(trailingOnly=T) +if (length(args) != 2) { + message("Usage: generate.R "); quit(status=1) +} + +filename <- args[[1]] +outdir <- args[[2]] + +data <- bench.read(filename) +if (!dir.exists(outdir)) { dir.create(outdir, recursive=T) } + +ggsave(filename = file.path(outdir,"bench-jitter.png"), + plot = bench.jitterplot(data), + width=12, height=12) + +ggsave(filename = file.path(outdir,"bench-ecdf.png"), + plot = bench.ecdfplot(data), + width=12, height=12) diff --git a/testsuite/bench/k-nucleotide.lua b/testsuite/bench/k-nucleotide.lua new file mode 100644 index 0000000000..0bfb41bec5 --- /dev/null +++ b/testsuite/bench/k-nucleotide.lua @@ -0,0 +1,58 @@ + +local function kfrequency(seq, freq, k, frame) + local sub = string.sub + local k1 = k - 1 + for i=frame,#seq-k1,k do + local c = sub(seq, i, i+k1) + freq[c] = (freq[c] or 0) + 1 + end +end + +local function count(seq, frag) + local k = #frag + local freq = {} + for frame=1,k do kfrequency(seq, freq, k, frame) end + io.write(freq[frag] or 0, "\t", frag, "\n") +end + +local function frequency(seq, k) + local freq = {} + for frame=1,k do kfrequency(seq, freq, k, frame) end + local sfreq, sn, sum = {}, 1, 0 + for c,v in pairs(freq) do sfreq[sn] = c; sn = sn + 1; sum = sum + v end + table.sort(sfreq, function(a, b) + local fa, fb = freq[a], freq[b] + return fa == fb and a > b or fa > fb + end) + for _,c in ipairs(sfreq) do + io.write(string.format("%s %0.3f\n", c, (freq[c]*100)/sum)) + end + io.write("\n") +end + +local function readseq() + local sub = string.sub + for line in io.lines() do + if sub(line, 1, 1) == ">" and sub(line, 2, 6) == "THREE" then break end + end + local lines, ln = {}, 0 + for line in io.lines() do + local c = sub(line, 1, 1) + if c == ">" then + break + elseif c ~= ";" then + ln = ln + 1 + lines[ln] = line + end + end + return string.upper(table.concat(lines, "", 1, ln)) +end + +local seq = readseq() +frequency(seq, 1) +frequency(seq, 2) +count(seq, "GGT") +count(seq, "GGTA") +count(seq, "GGTATT") +count(seq, "GGTATTTTAATT") +count(seq, "GGTATTTTAATTTATAGT") diff --git a/testsuite/bench/life.lua b/testsuite/bench/life.lua new file mode 100644 index 0000000000..4b7029dac6 --- /dev/null +++ b/testsuite/bench/life.lua @@ -0,0 +1,111 @@ +-- life.lua +-- original by Dave Bollinger posted to lua-l +-- modified to use ANSI terminal escape sequences +-- modified to use for instead of while + +local write=io.write + +ALIVE="¥" DEAD="þ" +ALIVE="O" DEAD="-" + +function delay() -- NOTE: SYSTEM-DEPENDENT, adjust as necessary + for i=1,10000 do end + -- local i=os.clock()+1 while(os.clock() 0 do + local xm1,x,xp1,xi=self.w-1,self.w,1,self.w + while xi > 0 do + local sum = self[ym1][xm1] + self[ym1][x] + self[ym1][xp1] + + self[y][xm1] + self[y][xp1] + + self[yp1][xm1] + self[yp1][x] + self[yp1][xp1] + next[y][x] = ((sum==2) and self[y][x]) or ((sum==3) and 1) or 0 + xm1,x,xp1,xi = x,xp1,xp1+1,xi-1 + end + ym1,y,yp1,yi = y,yp1,yp1+1,yi-1 + end +end + +-- output the array to screen +function _CELLS:draw() + local out="" -- accumulate to reduce flicker + for y=1,self.h do + for x=1,self.w do + out=out..(((self[y][x]>0) and ALIVE) or DEAD) + end + out=out.."\n" + end + write(out) +end + +-- constructor +function CELLS(w,h) + local c = ARRAY2D(w,h) + c.spawn = _CELLS.spawn + c.evolve = _CELLS.evolve + c.draw = _CELLS.draw + return c +end + +-- +-- shapes suitable for use with spawn() above +-- +HEART = { 1,0,1,1,0,1,1,1,1; w=3,h=3 } +GLIDER = { 0,0,1,1,0,1,0,1,1; w=3,h=3 } +EXPLODE = { 0,1,0,1,1,1,1,0,1,0,1,0; w=3,h=4 } +FISH = { 0,1,1,1,1,1,0,0,0,1,0,0,0,0,1,1,0,0,1,0; w=5,h=4 } +BUTTERFLY = { 1,0,0,0,1,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1; w=5,h=5 } + +-- the main routine +function LIFE(w,h) + -- create two arrays + local thisgen = CELLS(w,h) + local nextgen = CELLS(w,h) + + -- create some life + -- about 1000 generations of fun, then a glider steady-state + thisgen:spawn(GLIDER,5,4) + thisgen:spawn(EXPLODE,25,10) + thisgen:spawn(FISH,4,12) + + -- run until break + local gen=1 + write("\027[2J") -- ANSI clear screen + while 1 do + thisgen:evolve(nextgen) + thisgen,nextgen = nextgen,thisgen + write("\027[H") -- ANSI home cursor + thisgen:draw() + write("Life - generation ",gen,"\n") + gen=gen+1 + if gen>10000 then break end + --delay() -- no delay + end +end + +LIFE(40,20) diff --git a/testsuite/bench/mandelbrot-bit.lua b/testsuite/bench/mandelbrot-bit.lua new file mode 100644 index 0000000000..91d96975c8 --- /dev/null +++ b/testsuite/bench/mandelbrot-bit.lua @@ -0,0 +1,33 @@ + +local bit = require("bit") +local bor, band = bit.bor, bit.band +local shl, shr, rol = bit.lshift, bit.rshift, bit.rol +local write, char, unpack = io.write, string.char, unpack +local N = tonumber(arg and arg[1]) or 100 +local M, buf = 2/N, {} +write("P4\n", N, " ", N, "\n") +for y=0,N-1 do + local Ci, b, p = y*M-1, -16777216, 0 + local Ciq = Ci*Ci + for x=0,N-1,2 do + local Cr, Cr2 = x*M-1.5, (x+1)*M-1.5 + local Zr, Zi, Zrq, Ziq = Cr, Ci, Cr*Cr, Ciq + local Zr2, Zi2, Zrq2, Ziq2 = Cr2, Ci, Cr2*Cr2, Ciq + b = rol(b, 2) + for i=1,49 do + Zi = Zr*Zi*2 + Ci; Zi2 = Zr2*Zi2*2 + Ci + Zr = Zrq-Ziq + Cr; Zr2 = Zrq2-Ziq2 + Cr2 + Ziq = Zi*Zi; Ziq2 = Zi2*Zi2 + Zrq = Zr*Zr; Zrq2 = Zr2*Zr2 + if band(b, 2) ~= 0 and Zrq+Ziq > 4.0 then b = band(b, -3) end + if band(b, 1) ~= 0 and Zrq2+Ziq2 > 4.0 then b = band(b, -2) end + if band(b, 3) == 0 then break end + end + if b >= 0 then p = p + 1; buf[p] = b; b = -16777216; end + end + if b ~= -16777216 then + if band(N, 1) ~= 0 then b = shr(b, 1) end + p = p + 1; buf[p] = shl(b, 8-band(N, 7)) + end + write(char(unpack(buf, 1, p))) +end diff --git a/testsuite/bench/mandelbrot.lua b/testsuite/bench/mandelbrot.lua new file mode 100644 index 0000000000..0ef595a2fe --- /dev/null +++ b/testsuite/bench/mandelbrot.lua @@ -0,0 +1,23 @@ + +local write, char, unpack = io.write, string.char, unpack +local N = tonumber(arg and arg[1]) or 100 +local M, ba, bb, buf = 2/N, 2^(N%8+1)-1, 2^(8-N%8), {} +write("P4\n", N, " ", N, "\n") +for y=0,N-1 do + local Ci, b, p = y*M-1, 1, 0 + for x=0,N-1 do + local Cr = x*M-1.5 + local Zr, Zi, Zrq, Ziq = Cr, Ci, Cr*Cr, Ci*Ci + b = b + b + for i=1,49 do + Zi = Zr*Zi*2 + Ci + Zr = Zrq-Ziq + Cr + Ziq = Zi*Zi + Zrq = Zr*Zr + if Zrq+Ziq > 4.0 then b = b + 1; break; end + end + if b >= 256 then p = p + 1; buf[p] = 511 - b; b = 1; end + end + if b ~= 1 then p = p + 1; buf[p] = (ba-b)*bb; end + write(char(unpack(buf, 1, p))) +end diff --git a/testsuite/bench/md5.lua b/testsuite/bench/md5.lua new file mode 100644 index 0000000000..fdf6b4a7c7 --- /dev/null +++ b/testsuite/bench/md5.lua @@ -0,0 +1,183 @@ + +local bit = require("bit") +local tobit, tohex, bnot = bit.tobit or bit.cast, bit.tohex, bit.bnot +local bor, band, bxor = bit.bor, bit.band, bit.bxor +local lshift, rshift, rol, bswap = bit.lshift, bit.rshift, bit.rol, bit.bswap +local byte, char, sub, rep = string.byte, string.char, string.sub, string.rep + +if not rol then -- Replacement function if rotates are missing. + local bor, shl, shr = bit.bor, bit.lshift, bit.rshift + function rol(a, b) return bor(shl(a, b), shr(a, 32-b)) end +end + +if not bswap then -- Replacement function if bswap is missing. + local bor, band, shl, shr = bit.bor, bit.band, bit.lshift, bit.rshift + function bswap(a) + return bor(shr(a, 24), band(shr(a, 8), 0xff00), + shl(band(a, 0xff00), 8), shl(a, 24)); + end +end + +if not tohex then -- (Unreliable) replacement function if tohex is missing. + function tohex(a) + return string.sub(string.format("%08x", a), -8) + end +end + +local function tr_f(a, b, c, d, x, s) + return rol(bxor(d, band(b, bxor(c, d))) + a + x, s) + b +end + +local function tr_g(a, b, c, d, x, s) + return rol(bxor(c, band(d, bxor(b, c))) + a + x, s) + b +end + +local function tr_h(a, b, c, d, x, s) + return rol(bxor(b, c, d) + a + x, s) + b +end + +local function tr_i(a, b, c, d, x, s) + return rol(bxor(c, bor(b, bnot(d))) + a + x, s) + b +end + +local function transform(x, a1, b1, c1, d1) + local a, b, c, d = a1, b1, c1, d1 + + a = tr_f(a, b, c, d, x[ 1] + 0xd76aa478, 7) + d = tr_f(d, a, b, c, x[ 2] + 0xe8c7b756, 12) + c = tr_f(c, d, a, b, x[ 3] + 0x242070db, 17) + b = tr_f(b, c, d, a, x[ 4] + 0xc1bdceee, 22) + a = tr_f(a, b, c, d, x[ 5] + 0xf57c0faf, 7) + d = tr_f(d, a, b, c, x[ 6] + 0x4787c62a, 12) + c = tr_f(c, d, a, b, x[ 7] + 0xa8304613, 17) + b = tr_f(b, c, d, a, x[ 8] + 0xfd469501, 22) + a = tr_f(a, b, c, d, x[ 9] + 0x698098d8, 7) + d = tr_f(d, a, b, c, x[10] + 0x8b44f7af, 12) + c = tr_f(c, d, a, b, x[11] + 0xffff5bb1, 17) + b = tr_f(b, c, d, a, x[12] + 0x895cd7be, 22) + a = tr_f(a, b, c, d, x[13] + 0x6b901122, 7) + d = tr_f(d, a, b, c, x[14] + 0xfd987193, 12) + c = tr_f(c, d, a, b, x[15] + 0xa679438e, 17) + b = tr_f(b, c, d, a, x[16] + 0x49b40821, 22) + + a = tr_g(a, b, c, d, x[ 2] + 0xf61e2562, 5) + d = tr_g(d, a, b, c, x[ 7] + 0xc040b340, 9) + c = tr_g(c, d, a, b, x[12] + 0x265e5a51, 14) + b = tr_g(b, c, d, a, x[ 1] + 0xe9b6c7aa, 20) + a = tr_g(a, b, c, d, x[ 6] + 0xd62f105d, 5) + d = tr_g(d, a, b, c, x[11] + 0x02441453, 9) + c = tr_g(c, d, a, b, x[16] + 0xd8a1e681, 14) + b = tr_g(b, c, d, a, x[ 5] + 0xe7d3fbc8, 20) + a = tr_g(a, b, c, d, x[10] + 0x21e1cde6, 5) + d = tr_g(d, a, b, c, x[15] + 0xc33707d6, 9) + c = tr_g(c, d, a, b, x[ 4] + 0xf4d50d87, 14) + b = tr_g(b, c, d, a, x[ 9] + 0x455a14ed, 20) + a = tr_g(a, b, c, d, x[14] + 0xa9e3e905, 5) + d = tr_g(d, a, b, c, x[ 3] + 0xfcefa3f8, 9) + c = tr_g(c, d, a, b, x[ 8] + 0x676f02d9, 14) + b = tr_g(b, c, d, a, x[13] + 0x8d2a4c8a, 20) + + a = tr_h(a, b, c, d, x[ 6] + 0xfffa3942, 4) + d = tr_h(d, a, b, c, x[ 9] + 0x8771f681, 11) + c = tr_h(c, d, a, b, x[12] + 0x6d9d6122, 16) + b = tr_h(b, c, d, a, x[15] + 0xfde5380c, 23) + a = tr_h(a, b, c, d, x[ 2] + 0xa4beea44, 4) + d = tr_h(d, a, b, c, x[ 5] + 0x4bdecfa9, 11) + c = tr_h(c, d, a, b, x[ 8] + 0xf6bb4b60, 16) + b = tr_h(b, c, d, a, x[11] + 0xbebfbc70, 23) + a = tr_h(a, b, c, d, x[14] + 0x289b7ec6, 4) + d = tr_h(d, a, b, c, x[ 1] + 0xeaa127fa, 11) + c = tr_h(c, d, a, b, x[ 4] + 0xd4ef3085, 16) + b = tr_h(b, c, d, a, x[ 7] + 0x04881d05, 23) + a = tr_h(a, b, c, d, x[10] + 0xd9d4d039, 4) + d = tr_h(d, a, b, c, x[13] + 0xe6db99e5, 11) + c = tr_h(c, d, a, b, x[16] + 0x1fa27cf8, 16) + b = tr_h(b, c, d, a, x[ 3] + 0xc4ac5665, 23) + + a = tr_i(a, b, c, d, x[ 1] + 0xf4292244, 6) + d = tr_i(d, a, b, c, x[ 8] + 0x432aff97, 10) + c = tr_i(c, d, a, b, x[15] + 0xab9423a7, 15) + b = tr_i(b, c, d, a, x[ 6] + 0xfc93a039, 21) + a = tr_i(a, b, c, d, x[13] + 0x655b59c3, 6) + d = tr_i(d, a, b, c, x[ 4] + 0x8f0ccc92, 10) + c = tr_i(c, d, a, b, x[11] + 0xffeff47d, 15) + b = tr_i(b, c, d, a, x[ 2] + 0x85845dd1, 21) + a = tr_i(a, b, c, d, x[ 9] + 0x6fa87e4f, 6) + d = tr_i(d, a, b, c, x[16] + 0xfe2ce6e0, 10) + c = tr_i(c, d, a, b, x[ 7] + 0xa3014314, 15) + b = tr_i(b, c, d, a, x[14] + 0x4e0811a1, 21) + a = tr_i(a, b, c, d, x[ 5] + 0xf7537e82, 6) + d = tr_i(d, a, b, c, x[12] + 0xbd3af235, 10) + c = tr_i(c, d, a, b, x[ 3] + 0x2ad7d2bb, 15) + b = tr_i(b, c, d, a, x[10] + 0xeb86d391, 21) + + return tobit(a+a1), tobit(b+b1), tobit(c+c1), tobit(d+d1) +end + +-- Note: this is copying the original string and NOT particularly fast. +-- A library for struct unpacking would make this task much easier. +local function md5(msg) + local len = #msg + msg = msg.."\128"..rep("\0", 63 - band(len + 8, 63)) + ..char(band(lshift(len, 3), 255), band(rshift(len, 5), 255), + band(rshift(len, 13), 255), band(rshift(len, 21), 255)) + .."\0\0\0\0" + local a, b, c, d = 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476 + local x, k = {}, 1 + for i=1,#msg,4 do + local m0, m1, m2, m3 = byte(msg, i, i+3) + x[k] = bor(m0, lshift(m1, 8), lshift(m2, 16), lshift(m3, 24)) + if k == 16 then + a, b, c, d = transform(x, a, b, c, d) + k = 1 + else + k = k + 1 + end + end + return tohex(bswap(a))..tohex(bswap(b))..tohex(bswap(c))..tohex(bswap(d)) +end + +assert(md5('') == 'd41d8cd98f00b204e9800998ecf8427e') +assert(md5('a') == '0cc175b9c0f1b6a831c399e269772661') +assert(md5('abc') == '900150983cd24fb0d6963f7d28e17f72') +assert(md5('message digest') == 'f96b697d7cb7938d525a2f31aaf161d0') +assert(md5('abcdefghijklmnopqrstuvwxyz') == 'c3fcd3d76192e4007dfb496cca67e13b') +assert(md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789') == + 'd174ab98d277d9f5a5611c2c9f419d9f') +assert(md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890') == + '57edf4a22be3c955ac49da2e2107b67a') + +local N = tonumber(arg and arg[1]) or 10000 + + -- Credits: William Shakespeare, Romeo and Juliet +local txt = [[Rebellious subjects, enemies to peace, +Profaners of this neighbour-stained steel,-- +Will they not hear? What, ho! you men, you beasts, +That quench the fire of your pernicious rage +With purple fountains issuing from your veins, +On pain of torture, from those bloody hands +Throw your mistemper'd weapons to the ground, +And hear the sentence of your moved prince. +Three civil brawls, bred of an airy word, +By thee, old Capulet, and Montague, +Have thrice disturb'd the quiet of our streets, +And made Verona's ancient citizens +Cast by their grave beseeming ornaments, +To wield old partisans, in hands as old, +Canker'd with peace, to part your canker'd hate: +If ever you disturb our streets again, +Your lives shall pay the forfeit of the peace. +For this time, all the rest depart away: +You Capulet; shall go along with me: +And, Montague, come you this afternoon, +To know our further pleasure in this case, +To old Free-town, our common judgment-place. +Once more, on pain of death, all men depart.]] + txt = txt..txt..txt..txt + txt = txt..txt..txt..txt + +for i=1,N do + res = md5(txt) +end +assert(res == 'a831e91e0f70eddcb70dc61c6f82f6cd') + diff --git a/testsuite/bench/meteor.lua b/testsuite/bench/meteor.lua new file mode 100644 index 0000000000..80588ab532 --- /dev/null +++ b/testsuite/bench/meteor.lua @@ -0,0 +1,220 @@ + +-- Generate a decision tree based solver for the meteor puzzle. +local function generatesolver(countinit) + local pairs, ipairs, format = pairs, ipairs, string.format + local byte, min, sort = string.byte, math.min, table.sort + + -- Cached position to distance lookup. + local dist = setmetatable({}, { __index = function(t, xy) + local x = xy%10; local y = (xy-x)/10 + if (x+y)%2 == 1 then y = y + 1; x = 10 - x end + local d = xy + 256*x*x + 1024*y*y; t[xy] = d; return d + end}) + + -- Lookup table to validate a cell and to find its successor. + local ok = {} + for i=0,150 do ok[i] = false end + for i=99,0,-1 do + local x = i%10 + if ((i-x)/10+x)%2 == 0 then + ok[i] = i + (ok[i+1] and 1 or (ok[i+2] and 2 or 3)) + end + end + + -- Temporary board state for the island checks. + local islands, slide = {}, {20,22,24,26,28,31,33,35,37,39} + local bbc, bb = 0, {} + for i=0,19 do bb[i] = false; bb[i+80] = false end + for i=20,79 do bb[i] = ok[i] end + + -- Recursive flood fill algorithm. + local function fill(bb, p) + bbc = bbc + 1 + local n = p+2; if bb[n] then bb[n] = false; fill(bb, n) end + n = p-2; if bb[n] then bb[n] = false; fill(bb, n) end + n = p-9; if bb[n] then bb[n] = false; fill(bb, n) end + n = p-11; if bb[n] then bb[n] = false; fill(bb, n) end + n = p+9; if bb[n] then bb[n] = false; fill(bb, n) end + n = p+11; if bb[n] then bb[n] = false; fill(bb, n) end + end + + -- Generate pruned, sliding decision trees. + local dtrees = {{}, {}, {}, {}, {}, {}, {}, {}, {}, {}} + local rot = { nil, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} } + for k=0,9 do + -- Generate 10 initial pieces from line noise. :-) + local t = { 60, 62, byte("@BMBIK@KT@GPIKR@IKIKT@GK@KM@BG", k*3+1, k*3+3) } + rot[1] = t + for i,xy in ipairs(t) do + local x = xy%10; local y = (xy-x-60)/10 + -- Add 11 more variations by rotating and flipping. + for j=2,12 do + if j == 7 then y = -y else x,y = (x+3*y)/2, (y-x)/2 end + rot[j][i] = x+10*y + end + end + for r,v in ipairs(rot) do + -- Exploit symmetry and leave out half of the orientations of one piece. + -- The selected piece gives the best reduction of the solution space. + if k ~= 3 or r%2 == 0 then + -- Normalize to origin, add distance, sort by distance from origin. + local m = min(v[1], v[2], v[3], v[4], v[5]) + for i=1,5 do v[i] = dist[v[i]-m] end + sort(v) + local v2, v3, v4, v5 = v[2]%256, v[3]%256, v[4]%256, v[5]%256 + -- Slide the piece across 2 rows, prune the tree, check for islands. + for j,p in ipairs(slide) do + bb[p] = false + if ok[p+v2] and ok[p+v3] and ok[p+v4] and ok[p+v5] then -- Prune. + for i=p+1,79 do bb[i] = ok[i] end -- Clear remaining board. + bb[p+v2] = false; bb[p+v3] = false -- Add piece. + bb[p+v4] = false; bb[p+v5] = false + bbc = j -- Flood fill and count the filled positions. + if bb[71] then bb[71] = false; fill(bb, 71) end -- Lower left. + if bb[79] then bb[79] = false; fill(bb, 79) end -- Lower right. + local di = 0 + if bbc < 22 then bbc = 26 + elseif bbc < 26 then -- Island found, locate it, fill from above. + for i=p+2,79 do if bb[i] then di = i-p; break end end + for i=p-9,p-1 do if ok[i] then fill(bb, i) bbc = bbc - 1 end end + end + if bbc == 26 then -- Prune boards with static islands. + local tb = dtrees[j] -- Build decision tree in distance order. + local ta = tb[v2]; if not ta then ta = {}; tb[v2] = ta end + tb = ta[v3]; if not tb then tb = {}; ta[v3] = tb end + ta = tb[v4]; if not ta then ta = {}; tb[v4] = ta; islands[ta] = di + elseif islands[ta] ~= di then islands[ta] = 0 end + ta[v5] = di*10+k -- Leaves hold island check and piece number. + end + end + end + end + end + end + + local s = "local u0,u1,u2,u3,u4,u5,u6,u7,u8,u9" -- Piece use flags. + for p=0,99 do if ok[p] then s = s..",b"..p end end -- Board cells. + s = s.."\n"..[[ +local countinit = ... +local count = countinit +local bmin, bmax, pcs = 9, 0, {} +local smin, smax +local write, reverse = io.write, string.reverse + +-- Print min/max boards. +local function printboard(s) + local flip = true + for x in string.gmatch(string.gsub(s, ".", "%1 "), "..........") do + write(x, flip and "\n " or "\n") + flip = not flip + end + write("\n") +end + +-- Print result. +local function printresult() + write(countinit-count, " solutions found\n\n") + printboard(smin) + printboard(smax) +end + +-- Generate piece lookup array from the order of use. +local function genp() + local p = pcs + p[u0] = "0" p[u1] = "1" p[u2] = "2" p[u3] = "3" p[u4] = "4" + p[u5] = "5" p[u6] = "6" p[u7] = "7" p[u8] = "8" p[u9] = "9" + return p +end + +-- Goal function. +local function f91(k) + if k ~= 10 then return end + count = count - 2 -- Need to count the symmetric solution, too. + repeat + -- Quick precheck before constructing the string. + local b0, b99 = b0, b99 + if b0 <= bmin then bmin = b0 elseif b0 >= bmax then bmax = b0 + elseif b99 <= bmin then bmin = b99 elseif b99 >= bmax then bmax = b99 + else break end + -- Translate the filled board to a string. + local p = genp() + local s = p[b0] ]] + for p=2,99 do if ok[p] then s = s.."..p[b"..p.."]" end end + s = s..[[ + -- Remember min/max boards, dito for the symmetric board. + if not smin then smin = s; smax = s + elseif s < smin then smin = s elseif s > smax then smax = s end + s = reverse(s) + if s < smin then smin = s elseif s > smax then smax = s end + until true + if count <= 0 then error() end -- Early abort if max count given. +end +local f93 = f91 +]] + + -- Recursively convert the decision tree to Lua code. + local function codetree(tree, d, p, pn) + local found, s = false, "" + d = d + 1 + for a,t in pairs(tree) do + local b = p+a + if b < 100 then -- Prune the tree at the lower border. + local pp = b ~= pn and pn or ok[b] -- Find maximum successor function. + if d >= 5 then -- Try to place the last cell of a piece and advance. + found = true + local u = t%10 + local di = (t-u)/10 + if di ~= 0 and d == 5 then + di = di + p; if pp == di then pp = ok[di] end + s = format("%sif b%d and not u%d and not b%d then b%d=k u%d=k f%d(k) u%d=N b%d=N end\n", + s, di, u, b, b, u, pp, u, b) + else + s = format("%sif not u%d and not b%d then b%d=k u%d=k f%d(k) u%d=N b%d=N end\n", + s, u, b, b, u, pp, u, b) + end + else -- Try to place an intermediate cell. + local di = d ~= 4 and 0 or islands[t] + if di == 0 then + local st = codetree(t, d, p, pp) + if st then + found = true + s = format("%sif not b%d then b%d=k\n%sb%d=N end\n", s, b, b, st, b) + end + else -- Combine island checks. + di = di + p; if pp == di then pp = ok[di] end + local st = codetree(t, 6, p, pp) + if st then + found = true + s = format("%sif b%d and not b%d then b%d=k\n%sb%d=N end\n", s, di, b, b, st, b) + end + end + end + end + end + return found and s + end + + -- Embed the decision tree into a function hierarchy. + local j = 5 + for p=88,0,-1 do + local pn = ok[p] + if pn then + s = format("%slocal function f%d(k)\nlocal N if b%d then return f%d(k) end k=k+1 b%d=k\n%sb%d=N end\n", + s, p, p, pn, p, codetree(dtrees[j], 1, p, pn), p) + j = j - 1; if j == 0 then j = 10 end + end + end + + -- Compile and return solver function and result getter. + return loadstring(s.."return f0, printresult\n", "solver")(countinit) +end + +-- Generate the solver function hierarchy. +local solver, printresult = generatesolver(tonumber(arg and arg[1]) or 10000) + +-- The optimizer for LuaJIT 1.1.x is not helpful here, so turn it off. +if jit and jit.opt and jit.version_num < 10200 then jit.opt.start(0) end + +-- Run the solver protected to get partial results (max count or ctrl-c). +pcall(solver, 0) +printresult() diff --git a/testsuite/bench/nbody.lua b/testsuite/bench/nbody.lua new file mode 100644 index 0000000000..e0ff8f7712 --- /dev/null +++ b/testsuite/bench/nbody.lua @@ -0,0 +1,119 @@ + +local sqrt = math.sqrt + +local PI = 3.141592653589793 +local SOLAR_MASS = 4 * PI * PI +local DAYS_PER_YEAR = 365.24 +local bodies = { + { -- Sun + x = 0, + y = 0, + z = 0, + vx = 0, + vy = 0, + vz = 0, + mass = SOLAR_MASS + }, + { -- Jupiter + x = 4.84143144246472090e+00, + y = -1.16032004402742839e+00, + z = -1.03622044471123109e-01, + vx = 1.66007664274403694e-03 * DAYS_PER_YEAR, + vy = 7.69901118419740425e-03 * DAYS_PER_YEAR, + vz = -6.90460016972063023e-05 * DAYS_PER_YEAR, + mass = 9.54791938424326609e-04 * SOLAR_MASS + }, + { -- Saturn + x = 8.34336671824457987e+00, + y = 4.12479856412430479e+00, + z = -4.03523417114321381e-01, + vx = -2.76742510726862411e-03 * DAYS_PER_YEAR, + vy = 4.99852801234917238e-03 * DAYS_PER_YEAR, + vz = 2.30417297573763929e-05 * DAYS_PER_YEAR, + mass = 2.85885980666130812e-04 * SOLAR_MASS + }, + { -- Uranus + x = 1.28943695621391310e+01, + y = -1.51111514016986312e+01, + z = -2.23307578892655734e-01, + vx = 2.96460137564761618e-03 * DAYS_PER_YEAR, + vy = 2.37847173959480950e-03 * DAYS_PER_YEAR, + vz = -2.96589568540237556e-05 * DAYS_PER_YEAR, + mass = 4.36624404335156298e-05 * SOLAR_MASS + }, + { -- Neptune + x = 1.53796971148509165e+01, + y = -2.59193146099879641e+01, + z = 1.79258772950371181e-01, + vx = 2.68067772490389322e-03 * DAYS_PER_YEAR, + vy = 1.62824170038242295e-03 * DAYS_PER_YEAR, + vz = -9.51592254519715870e-05 * DAYS_PER_YEAR, + mass = 5.15138902046611451e-05 * SOLAR_MASS + } +} + +local function advance(bodies, nbody, dt) + for i=1,nbody do + local bi = bodies[i] + local bix, biy, biz, bimass = bi.x, bi.y, bi.z, bi.mass + local bivx, bivy, bivz = bi.vx, bi.vy, bi.vz + for j=i+1,nbody do + local bj = bodies[j] + local dx, dy, dz = bix-bj.x, biy-bj.y, biz-bj.z + local mag = sqrt(dx*dx + dy*dy + dz*dz) + mag = dt / (mag * mag * mag) + local bm = bj.mass*mag + bivx = bivx - (dx * bm) + bivy = bivy - (dy * bm) + bivz = bivz - (dz * bm) + bm = bimass*mag + bj.vx = bj.vx + (dx * bm) + bj.vy = bj.vy + (dy * bm) + bj.vz = bj.vz + (dz * bm) + end + bi.vx = bivx + bi.vy = bivy + bi.vz = bivz + bi.x = bix + dt * bivx + bi.y = biy + dt * bivy + bi.z = biz + dt * bivz + end +end + +local function energy(bodies, nbody) + local e = 0 + for i=1,nbody do + local bi = bodies[i] + local vx, vy, vz, bim = bi.vx, bi.vy, bi.vz, bi.mass + e = e + (0.5 * bim * (vx*vx + vy*vy + vz*vz)) + for j=i+1,nbody do + local bj = bodies[j] + local dx, dy, dz = bi.x-bj.x, bi.y-bj.y, bi.z-bj.z + local distance = sqrt(dx*dx + dy*dy + dz*dz) + e = e - ((bim * bj.mass) / distance) + end + end + return e +end + +local function offsetMomentum(b, nbody) + local px, py, pz = 0, 0, 0 + for i=1,nbody do + local bi = b[i] + local bim = bi.mass + px = px + (bi.vx * bim) + py = py + (bi.vy * bim) + pz = pz + (bi.vz * bim) + end + b[1].vx = -px / SOLAR_MASS + b[1].vy = -py / SOLAR_MASS + b[1].vz = -pz / SOLAR_MASS +end + +local N = tonumber(arg and arg[1]) or 1000 +local nbody = #bodies + +offsetMomentum(bodies, nbody) +io.write( string.format("%0.9f",energy(bodies, nbody)), "\n") +for i=1,N do advance(bodies, nbody, 0.01) end +io.write( string.format("%0.9f",energy(bodies, nbody)), "\n") diff --git a/testsuite/bench/nsieve-bit-fp.lua b/testsuite/bench/nsieve-bit-fp.lua new file mode 100644 index 0000000000..3971ec1f1e --- /dev/null +++ b/testsuite/bench/nsieve-bit-fp.lua @@ -0,0 +1,37 @@ + +local floor, ceil = math.floor, math.ceil + +local precision = 50 -- Maximum precision of lua_Number (minus safety margin). +local onebits = (2^precision)-1 + +local function nsieve(p, m) + local cm = ceil(m/precision) + do local onebits = onebits; for i=0,cm do p[i] = onebits end end + local count, idx, bit = 0, 2, 2 + for i=2,m do + local r = p[idx] / bit + if r - floor(r) >= 0.5 then -- Bit set? + local kidx, kbit = idx, bit + for k=i+i,m,i do + kidx = kidx + i + while kidx >= cm do kidx = kidx - cm; kbit = kbit + kbit end + local x = p[kidx] + local r = x / kbit + if r - floor(r) >= 0.5 then p[kidx] = x - kbit*0.5 end -- Clear bit. + end + count = count + 1 + end + idx = idx + 1 + if idx >= cm then idx = 0; bit = bit + bit end + end + return count +end + +local N = tonumber(arg and arg[1]) or 1 +if N < 2 then N = 2 end +local primes = {} + +for i=0,2 do + local m = (2^(N-i))*10000 + io.write(string.format("Primes up to %8d %8d\n", m, nsieve(primes, m))) +end diff --git a/testsuite/bench/nsieve-bit.lua b/testsuite/bench/nsieve-bit.lua new file mode 100644 index 0000000000..820a372647 --- /dev/null +++ b/testsuite/bench/nsieve-bit.lua @@ -0,0 +1,27 @@ + +local bit = require("bit") +local band, bxor, rshift, rol = bit.band, bit.bxor, bit.rshift, bit.rol + +local function nsieve(p, m) + local count = 0 + for i=0,rshift(m, 5) do p[i] = -1 end + for i=2,m do + if band(rshift(p[rshift(i, 5)], i), 1) ~= 0 then + count = count + 1 + for j=i+i,m,i do + local jx = rshift(j, 5) + p[jx] = band(p[jx], rol(-2, j)) + end + end + end + return count +end + +local N = tonumber(arg and arg[1]) or 1 +if N < 2 then N = 2 end +local primes = {} + +for i=0,2 do + local m = (2^(N-i))*10000 + io.write(string.format("Primes up to %8d %8d\n", m, nsieve(primes, m))) +end diff --git a/testsuite/bench/nsieve.lua b/testsuite/bench/nsieve.lua new file mode 100644 index 0000000000..6de0524f95 --- /dev/null +++ b/testsuite/bench/nsieve.lua @@ -0,0 +1,21 @@ + +local function nsieve(p, m) + for i=2,m do p[i] = true end + local count = 0 + for i=2,m do + if p[i] then + for k=i+i,m,i do p[k] = false end + count = count + 1 + end + end + return count +end + +local N = tonumber(arg and arg[1]) or 1 +if N < 2 then N = 2 end +local primes = {} + +for i=0,2 do + local m = (2^(N-i))*10000 + io.write(string.format("Primes up to %8d %8d\n", m, nsieve(primes, m))) +end diff --git a/testsuite/bench/partialsums.lua b/testsuite/bench/partialsums.lua new file mode 100644 index 0000000000..46bb9da35f --- /dev/null +++ b/testsuite/bench/partialsums.lua @@ -0,0 +1,29 @@ + +local n = tonumber(arg[1]) +local function pr(fmt, x) io.write(string.format(fmt, x)) end + +local a1, a2, a3, a4, a5, a6, a7, a8, a9, alt = 1, 0, 0, 0, 0, 0, 0, 0, 0, 1 +local sqrt, sin, cos = math.sqrt, math.sin, math.cos +for k=1,n do + local k2, sk, ck = k*k, sin(k), cos(k) + local k3 = k2*k + a1 = a1 + (2/3)^k + a2 = a2 + 1/sqrt(k) + a3 = a3 + 1/(k2+k) + a4 = a4 + 1/(k3*sk*sk) + a5 = a5 + 1/(k3*ck*ck) + a6 = a6 + 1/k + a7 = a7 + 1/k2 + a8 = a8 + alt/k + a9 = a9 + alt/(k+k-1) + alt = -alt +end +pr("%.9f\t(2/3)^k\n", a1) +pr("%.9f\tk^-0.5\n", a2) +pr("%.9f\t1/k(k+1)\n", a3) +pr("%.9f\tFlint Hills\n", a4) +pr("%.9f\tCookson Hills\n", a5) +pr("%.9f\tHarmonic\n", a6) +pr("%.9f\tRiemann Zeta\n", a7) +pr("%.9f\tAlternating Harmonic\n", a8) +pr("%.9f\tGregory\n", a9) diff --git a/testsuite/bench/pidigits-nogmp.lua b/testsuite/bench/pidigits-nogmp.lua new file mode 100644 index 0000000000..63a1cb0ee8 --- /dev/null +++ b/testsuite/bench/pidigits-nogmp.lua @@ -0,0 +1,100 @@ + +-- Start of dynamically compiled chunk. +local chunk = [=[ + +-- Factory function for multi-precision number (mpn) operations. +local function fmm(fa, fb) + return loadstring([[ + return function(y, a, ka, b, kb) + local carry, n = 0, #a ]]..(fb == 0 and "" or [[ + local na, nb = n, #b -- Need to adjust lengths. 1 element suffices here. + if na > nb then b[na] = 0 elseif na < nb then a[nb] = 0; n = nb end + ]])..[[ + for i=1,n do -- Sum up all elements and propagate carry. + local x = a[i] ]]..(fa == 2 and "*ka" or "").. + (fb == 2 and "+b[i]*kb" or (fb == 1 and "+b[i]" or ""))..[[ + carry + if x < RADIX and x >= 0 then carry = 0; y[i] = x -- Check for overflow. + else local d = x % RADIX; carry = (x-d) / RADIX; y[i] = d end + end + y[n+1] = nil -- Truncate target. 1 element suffices here. + if carry == 0 then while n > 0 and y[n] == 0 do y[n] = nil end + elseif carry == -1 then y[n] = y[n] - RADIX else y[n+1] = carry end + ]]..(fb == 0 and "" or [[ -- Undo length adjustment. + if na > nb then b[na] = nil elseif na < nb and y ~= a then a[nb] = nil end + ]])..[[ + return y + end]])() +end + +-- Generate needed mpn functions. +local mm_kk, mm_k1, mm_k0, mm_11 = fmm(2, 2), fmm(2, 1), fmm(2, 0), fmm(1, 1) + +-- Choose the most efficient mpn function for y = a*ka + b*kb at run-time. +local function mm(y, a, ka, b, kb) + local f = mm_kk + if kb == 0 or #b == 0 then if ka == 1 then return a else f = mm_k0 end + elseif kb == 1 then if ka == 1 then f = mm_11 else f = mm_k1 end end + return f(y, a, ka, b, kb) +end + +-- Compose matrix with numbers on the right. +local function compose_r(aq,ar,as,at, bq,br,bs,bt) + mm(ar, ar,bq, at,br) mm(at, at,bt, ar,bs) + mm(as, as,bt, aq,bs) mm(aq, aq,bq, nil,0) +end + +-- Compose matrix with numbers on the left. +local function compose_l(aq,ar,as,at, bq,br,bs,bt) + mm(ar, ar,bt, aq,br) mm(at, at,bt, as,br) + mm(as, as,bq, at,bs) mm(aq, aq,bq, nil,0) +end + +-- Extract one digit. +local u, v, jj = {}, {}, 0 +local function extract(q,r,s,t, j) + local u = j == jj + 1 and mm(u, u,1, q,1) or mm(u, q,j, r,1); jj = j + local v = mm(v, t,1, s,j) + local nu, nv, y = #u, #v + if nu == nv then + if nu == 1 then y = u[1] / v[1] + else y = (u[nu]*RADIX + u[nu-1]) / (v[nv]*RADIX + v[nv-1]) end + elseif nu == nv+1 then y = (u[nu]*RADIX + u[nv]) / v[nv] + else return 0 end + return math.floor(y) +end + +-- Coroutine which yields successive digits of PI. +return coroutine.wrap(function() + local q, r, s, t, k = {1}, {}, {}, {1}, 1 + repeat + local y = extract(q,r,s,t, 3) + if y == extract(q,r,s,t, 4) then + coroutine.yield(y) + compose_r(q,r,s,t, 10, -10*y, 0, 1) + else + compose_l(q,r,s,t, k, 4*k+2, 0, 2*k+1) + k = k + 1 + end + until false +end) + +]=] -- End of dynamically compiled chunk. + +local N = tonumber(arg and arg[1]) or 27 +local RADIX = N < 6500 and 2^36 or 2^32 -- Avoid overflow. + +-- Substitute radix and compile chunk. +local pidigit = loadstring(string.gsub(chunk, "RADIX", tostring(RADIX)))() + +-- Print lines with 10 digits. +for i=10,N,10 do + for j=1,10 do io.write(pidigit()) end + io.write("\t:", i, "\n") +end + +-- Print remaining digits (if any). +local n10 = N % 10 +if n10 ~= 0 then + for i=1,n10 do io.write(pidigit()) end + io.write(string.rep(" ", 10-n10), "\t:", N, "\n") +end diff --git a/testsuite/bench/ray.lua b/testsuite/bench/ray.lua new file mode 100644 index 0000000000..2acc24c0bf --- /dev/null +++ b/testsuite/bench/ray.lua @@ -0,0 +1,135 @@ +local sqrt = math.sqrt +local huge = math.huge + +local delta = 1 +while delta * delta + 1 ~= 1 do + delta = delta * 0.5 +end + +local function length(x, y, z) return sqrt(x*x + y*y + z*z) end +local function vlen(v) return length(v[1], v[2], v[3]) end +local function mul(c, x, y, z) return c*x, c*y, c*z end +local function unitise(x, y, z) return mul(1/length(x, y, z), x, y, z) end +local function dot(x1, y1, z1, x2, y2, z2) + return x1*x2 + y1*y2 + z1*z2 +end + +local function vsub(a, b) return a[1] - b[1], a[2] - b[2], a[3] - b[3] end +local function vdot(a, b) return dot(a[1], a[2], a[3], b[1], b[2], b[3]) end + + +local sphere = {} +function sphere:new(centre, radius) + self.__index = self + return setmetatable({centre=centre, radius=radius}, self) +end + +local function sphere_distance(self, origin, dir) + local vx, vy, vz = vsub(self.centre, origin) + local b = dot(vx, vy, vz, dir[1], dir[2], dir[3]) + local r = self.radius + local disc = r*r + b*b - vx*vx-vy*vy-vz*vz + if disc < 0 then return huge end + local d = sqrt(disc) + local t2 = b + d + if t2 < 0 then return huge end + local t1 = b - d + return t1 > 0 and t1 or t2 +end + +function sphere:intersect(origin, dir, best) + local lambda = sphere_distance(self, origin, dir) + if lambda < best[1] then + local c = self.centre + best[1] = lambda + local b2 = best[2] + b2[1], b2[2], b2[3] = + unitise( + origin[1] - c[1] + lambda * dir[1], + origin[2] - c[2] + lambda * dir[2], + origin[3] - c[3] + lambda * dir[3]) + end +end + +local group = {} +function group:new(bound) + self.__index = self + return setmetatable({bound=bound, children={}}, self) +end + +function group:add(s) + self.children[#self.children+1] = s +end + +function group:intersect(origin, dir, best) + local lambda = sphere_distance(self.bound, origin, dir) + if lambda < best[1] then + for _, c in ipairs(self.children) do + c:intersect(origin, dir, best) + end + end +end + +local hit = { 0, 0, 0 } +local ilight +local best = { huge, { 0, 0, 0 } } + +local function ray_trace(light, camera, dir, scene) + best[1] = huge + scene:intersect(camera, dir, best) + local b1 = best[1] + if b1 == huge then return 0 end + local b2 = best[2] + local g = vdot(b2, light) + if g >= 0 then return 0 end + hit[1] = camera[1] + b1*dir[1] + delta*b2[1] + hit[2] = camera[2] + b1*dir[2] + delta*b2[2] + hit[3] = camera[3] + b1*dir[3] + delta*b2[3] + best[1] = huge + scene:intersect(hit, ilight, best) + if best[1] == huge then + return -g + else + return 0 + end +end + +local function create(level, centre, radius) + local s = sphere:new(centre, radius) + if level == 1 then return s end + local gr = group:new(sphere:new(centre, 3*radius)) + gr:add(s) + local rn = 3*radius/sqrt(12) + for dz = -1,1,2 do + for dx = -1,1,2 do + gr:add(create(level-1, { centre[1] + rn*dx, centre[2] + rn, centre[3] + rn*dz }, radius*0.5)) + end + end + return gr +end + + +local level, n, ss = tonumber(arg[1]) or 9, tonumber(arg[2]) or 256, 4 +local iss = 1/ss +local gf = 255/(ss*ss) + +io.write(("P5\n%d %d\n255\n"):format(n, n)) +local light = { unitise(-1, -3, 2) } +ilight = { -light[1], -light[2], -light[3] } +local camera = { 0, 0, -4 } +local dir = { 0, 0, 0 } + +local scene = create(level, {0, -1, 0}, 1) + +for y = n/2-1, -n/2, -1 do + for x = -n/2, n/2-1 do + local g = 0 + for d = y, y+.99, iss do + for e = x, x+.99, iss do + dir[1], dir[2], dir[3] = unitise(e, d, n) + g = g + ray_trace(light, camera, dir, scene) + end + end + io.write(string.char(math.floor(0.5 + g*gf))) + end +end diff --git a/testsuite/bench/recursive-ack.lua b/testsuite/bench/recursive-ack.lua new file mode 100644 index 0000000000..fad30589bc --- /dev/null +++ b/testsuite/bench/recursive-ack.lua @@ -0,0 +1,8 @@ +local function Ack(m, n) + if m == 0 then return n+1 end + if n == 0 then return Ack(m-1, 1) end + return Ack(m-1, (Ack(m, n-1))) -- The parentheses are deliberate. +end + +local N = tonumber(arg and arg[1]) or 10 +io.write("Ack(3,", N ,"): ", Ack(3,N), "\n") diff --git a/testsuite/bench/recursive-fib.lua b/testsuite/bench/recursive-fib.lua new file mode 100644 index 0000000000..ef9950decb --- /dev/null +++ b/testsuite/bench/recursive-fib.lua @@ -0,0 +1,7 @@ +local function fib(n) + if n < 2 then return 1 end + return fib(n-2) + fib(n-1) +end + +local n = tonumber(arg[1]) or 10 +io.write(string.format("Fib(%d): %d\n", n, fib(n))) diff --git a/testsuite/bench/revcomp.lua b/testsuite/bench/revcomp.lua new file mode 100644 index 0000000000..34fe347bf9 --- /dev/null +++ b/testsuite/bench/revcomp.lua @@ -0,0 +1,37 @@ + +local sub = string.sub +iubc = setmetatable({ + A="T", C="G", B="V", D="H", K="M", R="Y", + a="T", c="G", b="V", d="H", k="M", r="Y", + T="A", G="C", V="B", H="D", M="K", Y="R", U="A", + t="A", g="C", v="B", h="D", m="K", y="R", u="A", + N="N", S="S", W="W", n="N", s="S", w="W", +}, { __index = function(t, s) + local r = t[sub(s, 2)]..t[sub(s, 1, 1)]; t[s] = r; return r end }) + +local wcode = [=[ +return function(t, n) + if n == 1 then return end + local iubc, sub, write = iubc, string.sub, io.write + local s = table.concat(t, "", 1, n-1) + for i=#s-59,1,-60 do + write(]=] +for i=59,3,-4 do wcode = wcode.."iubc[sub(s, i+"..(i-3)..", i+"..i..")], " end +wcode = wcode..[=["\n") + end + local r = #s % 60 + if r ~= 0 then + for i=r,1,-4 do write(iubc[sub(s, i-3 < 1 and 1 or i-3, i)]) end + write("\n") + end +end +]=] +local writerev = loadstring(wcode)() + +local t, n = {}, 1 +for line in io.lines() do + local c = sub(line, 1, 1) + if c == ">" then writerev(t, n); io.write(line, "\n"); n = 1 + elseif c ~= ";" then t[n] = line; n = n + 1 end +end +writerev(t, n) diff --git a/testsuite/bench/roulette.lua b/testsuite/bench/roulette.lua new file mode 100644 index 0000000000..84afe8fdd6 --- /dev/null +++ b/testsuite/bench/roulette.lua @@ -0,0 +1,21 @@ +-- Russian Roulette simulator +-- This benchmark includes randomness from an external source that can +-- produce non-deterministic performance. +-- See https://github.com/LuaJIT/LuaJIT/issues/218 + +-- (Let the test harness determine the random seed) +-- math.randomseed(os.time()) + +local population = 200e6 +local live = 0 +local die = 0 + +for i = 1, population do + if math.random(6) == 6 then + die = die + 1 + else + live = live + 1 + end +end + +print(("Survived %d/%d (%.3f%%)"):format(live, population, live*100/(live+die))) diff --git a/testsuite/bench/scimark-2010-12-20.lua b/testsuite/bench/scimark-2010-12-20.lua new file mode 100644 index 0000000000..353acb7cbc --- /dev/null +++ b/testsuite/bench/scimark-2010-12-20.lua @@ -0,0 +1,400 @@ +------------------------------------------------------------------------------ +-- Lua SciMark (2010-12-20). +-- +-- A literal translation of SciMark 2.0a, written in Java and C. +-- Credits go to the original authors Roldan Pozo and Bruce Miller. +-- See: http://math.nist.gov/scimark2/ +------------------------------------------------------------------------------ + +local SCIMARK_VERSION = "2010-12-10" +local SCIMARK_COPYRIGHT = "Copyright (C) 2006-2010 Mike Pall" + +local MIN_TIME = 2.0 +local RANDOM_SEED = 101009 -- Must be odd. +local SIZE_SELECT = "small" + +local benchmarks = { + "FFT", "SOR", "MC", "SPARSE", "LU", + small = { + FFT = { 1024 }, + SOR = { 100 }, + MC = { }, + SPARSE = { 1000, 5000 }, + LU = { 100 }, + }, + large = { + FFT = { 1048576 }, + SOR = { 1000 }, + MC = { }, + SPARSE = { 100000, 1000000 }, + LU = { 1000 }, + }, +} + +local abs, log, sin, floor = math.abs, math.log, math.sin, math.floor +local pi, clock = math.pi, os.clock +local format = string.format + +------------------------------------------------------------------------------ +-- Select array type: Lua tables or native (FFI) arrays +------------------------------------------------------------------------------ + +local darray, iarray + +local function array_init() + if jit and jit.status and jit.status() then + local ok, ffi = pcall(require, "ffi") + if ok then + darray = ffi.typeof("double[?]") + iarray = ffi.typeof("int[?]") + return + end + end + function darray(n) return {} end + iarray = darray +end + +------------------------------------------------------------------------------ +-- This is a Lagged Fibonacci Pseudo-random Number Generator with +-- j, k, M = 5, 17, 31. Pretty weak, but same as C/Java SciMark. +------------------------------------------------------------------------------ + +local rand, rand_init + +if jit and jit.status and jit.status() then + -- LJ2 has bit operations and zero-based arrays (internally). + local bit = require("bit") + local band, sar = bit.band, bit.arshift + function rand_init(seed) + local Rm, Rj, Ri = iarray(17), 16, 11 + for i=0,16 do Rm[i] = 0 end + for i=16,0,-1 do + seed = band(seed*9069, 0x7fffffff) + Rm[i] = seed + end + function rand() + local i = band(Ri+1, sar(Ri-16, 31)) + local j = band(Rj+1, sar(Rj-16, 31)) + Ri, Rj = i, j + local k = band(Rm[i] - Rm[j], 0x7fffffff) + Rm[j] = k + return k * (1.0/2147483647.0) + end + end +else + -- Better for standard Lua with one-based arrays and without bit operations. + function rand_init(seed) + local Rm, Rj = {}, 1 + for i=1,17 do Rm[i] = 0 end + for i=17,1,-1 do + seed = (seed*9069) % (2^31) + Rm[i] = seed + end + function rand() + local j, m = Rj, Rm + local h = j - 5 + if h < 1 then h = h + 17 end + local k = m[h] - m[j] + if k < 0 then k = k + 2147483647 end + m[j] = k + if j < 17 then Rj = j + 1 else Rj = 1 end + return k * (1.0/2147483647.0) + end + end +end + +local function random_vector(n) + local v = darray(n+1) + for x=1,n do v[x] = rand() end + return v +end + +local function random_matrix(m, n) + local a = {} + for y=1,m do + local v = darray(n+1) + a[y] = v + for x=1,n do v[x] = rand() end + end + return a +end + +------------------------------------------------------------------------------ +-- FFT: Fast Fourier Transform. +------------------------------------------------------------------------------ + +local function fft_bitreverse(v, n) + local j = 0 + for i=0,2*n-4,2 do + if i < j then + v[i+1], v[i+2], v[j+1], v[j+2] = v[j+1], v[j+2], v[i+1], v[i+2] + end + local k = n + while k <= j do j = j - k; k = k / 2 end + j = j + k + end +end + +local function fft_transform(v, n, dir) + if n <= 1 then return end + fft_bitreverse(v, n) + local dual = 1 + repeat + local dual2 = 2*dual + for i=1,2*n-1,2*dual2 do + local j = i+dual2 + local ir, ii = v[i], v[i+1] + local jr, ji = v[j], v[j+1] + v[j], v[j+1] = ir - jr, ii - ji + v[i], v[i+1] = ir + jr, ii + ji + end + local theta = dir * pi / dual + local s, s2 = sin(theta), 2.0 * sin(theta * 0.5)^2 + local wr, wi = 1.0, 0.0 + for a=3,dual2-1,2 do + wr, wi = wr - s*wi - s2*wr, wi + s*wr - s2*wi + for i=a,a+2*(n-dual2),2*dual2 do + local j = i+dual2 + local jr, ji = v[j], v[j+1] + local dr, di = wr*jr - wi*ji, wr*ji + wi*jr + local ir, ii = v[i], v[i+1] + v[j], v[j+1] = ir - dr, ii - di + v[i], v[i+1] = ir + dr, ii + di + end + end + dual = dual2 + until dual >= n +end + +function benchmarks.FFT(n) + local l2n = log(n)/log(2) + if l2n % 1 ~= 0 then + io.stderr:write("Error: FFT data length is not a power of 2\n") + os.exit(1) + end + local v = random_vector(n*2) + return function(cycles) + local norm = 1.0 / n + for p=1,cycles do + fft_transform(v, n, -1) + fft_transform(v, n, 1) + for i=1,n*2 do v[i] = v[i] * norm end + end + return ((5*n-2)*l2n + 2*(n+1)) * cycles + end +end + +------------------------------------------------------------------------------ +-- SOR: Jacobi Successive Over-Relaxation. +------------------------------------------------------------------------------ + +local function sor_run(mat, m, n, cycles, omega) + local om4, om1 = omega*0.25, 1.0-omega + m = m - 1 + n = n - 1 + for i=1,cycles do + for y=2,m do + local v, vp, vn = mat[y], mat[y-1], mat[y+1] + for x=2,n do + v[x] = om4*((vp[x]+vn[x])+(v[x-1]+v[x+1])) + om1*v[x] + end + end + end +end + +function benchmarks.SOR(n) + local mat = random_matrix(n, n) + return function(cycles) + sor_run(mat, n, n, cycles, 1.25) + return (n-1)*(n-1)*cycles*6 + end +end + +------------------------------------------------------------------------------ +-- MC: Monte Carlo Integration. +------------------------------------------------------------------------------ + +local function mc_integrate(cycles) + local under_curve = 0 + local rand = rand + for i=1,cycles do + local x = rand() + local y = rand() + if x*x + y*y <= 1.0 then under_curve = under_curve + 1 end + end + return (under_curve/cycles) * 4 +end + +function benchmarks.MC() + return function(cycles) + local res = mc_integrate(cycles) + assert(math.sqrt(cycles)*math.abs(res-math.pi) < 5.0, "bad MC result") + return cycles * 4 -- Way off, but same as SciMark in C/Java. + end +end + +------------------------------------------------------------------------------ +-- Sparse Matrix Multiplication. +------------------------------------------------------------------------------ + +local function sparse_mult(n, cycles, vy, val, row, col, vx) + for p=1,cycles do + for r=1,n do + local sum = 0 + for i=row[r],row[r+1]-1 do sum = sum + vx[col[i]] * val[i] end + vy[r] = sum + end + end +end + +function benchmarks.SPARSE(n, nz) + local nr = floor(nz/n) + local anz = nr*n + local vx = random_vector(n) + local val = random_vector(anz) + local vy, col, row = darray(n+1), iarray(nz+1), iarray(n+2) + row[1] = 1 + for r=1,n do + local step = floor(r/nr) + if step < 1 then step = 1 end + local rr = row[r] + row[r+1] = rr+nr + for i=0,nr-1 do col[rr+i] = 1+i*step end + end + return function(cycles) + sparse_mult(n, cycles, vy, val, row, col, vx) + return anz*cycles*2 + end +end + +------------------------------------------------------------------------------ +-- LU: Dense Matrix Factorization. +------------------------------------------------------------------------------ + +local function lu_factor(a, pivot, m, n) + local min_m_n = m < n and m or n + for j=1,min_m_n do + local jp, t = j, abs(a[j][j]) + for i=j+1,m do + local ab = abs(a[i][j]) + if ab > t then + jp = i + t = ab + end + end + pivot[j] = jp + if a[jp][j] == 0 then error("zero pivot") end + if jp ~= j then a[j], a[jp] = a[jp], a[j] end + if j < m then + local recp = 1.0 / a[j][j] + for k=j+1,m do + local v = a[k] + v[j] = v[j] * recp + end + end + if j < min_m_n then + for i=j+1,m do + local vi, vj = a[i], a[j] + local eij = vi[j] + for k=j+1,n do vi[k] = vi[k] - eij * vj[k] end + end + end + end +end + +local function matrix_alloc(m, n) + local a = {} + for y=1,m do a[y] = darray(n+1) end + return a +end + +local function matrix_copy(dst, src, m, n) + for y=1,m do + local vd, vs = dst[y], src[y] + for x=1,n do vd[x] = vs[x] end + end +end + +function benchmarks.LU(n) + local mat = random_matrix(n, n) + local tmp = matrix_alloc(n, n) + local pivot = iarray(n+1) + return function(cycles) + for i=1,cycles do + matrix_copy(tmp, mat, n, n) + lu_factor(tmp, pivot, n, n) + end + return 2.0/3.0*n*n*n*cycles + end +end + +------------------------------------------------------------------------------ +-- Main program. +------------------------------------------------------------------------------ + +local function printf(...) + io.write(format(...)) +end + +local function fmtparams(p1, p2) + if p2 then return format("[%d, %d]", p1, p2) + elseif p1 then return format("[%d]", p1) end + return "" +end + +local function measure(min_time, name, ...) + array_init() + rand_init(RANDOM_SEED) + local run = benchmarks[name](...) + local cycles = 1 + repeat + local tm = clock() + local flops = run(cycles, ...) + tm = clock() - tm + if tm >= min_time then + local res = flops / tm * 1.0e-6 + local p1, p2 = ... + printf("%-7s %8.2f %s\n", name, res, fmtparams(...)) + return res + end + cycles = cycles * 2 + until false +end + +printf("Lua SciMark %s based on SciMark 2.0a. %s.\n\n", + SCIMARK_VERSION, SCIMARK_COPYRIGHT) + +while arg and arg[1] do + local a = table.remove(arg, 1) + if a == "-noffi" then + package.preload.ffi = nil + elseif a == "-small" then + SIZE_SELECT = "small" + elseif a == "-large" then + SIZE_SELECT = "large" + elseif benchmarks[a] then + local p = benchmarks[SIZE_SELECT][a] + measure(MIN_TIME, a, tonumber(arg[1]) or p[1], tonumber(arg[2]) or p[2]) + return + else + printf("Usage: scimark [-noffi] [-small|-large] [BENCH params...]\n\n") + printf("BENCH -small -large\n") + printf("---------------------------------------\n") + for _,name in ipairs(benchmarks) do + printf("%-7s %-13s %s\n", name, + fmtparams(unpack(benchmarks.small[name])), + fmtparams(unpack(benchmarks.large[name]))) + end + printf("\n") + os.exit(1) + end +end + +local params = benchmarks[SIZE_SELECT] +local sum = 0 +for _,name in ipairs(benchmarks) do + sum = sum + measure(MIN_TIME, name, unpack(params[name])) +end +printf("\nSciMark %8.2f [%s problem sizes]\n", sum / #benchmarks, SIZE_SELECT) +io.flush() + diff --git a/testsuite/bench/scimark-fft.lua b/testsuite/bench/scimark-fft.lua new file mode 100644 index 0000000000..c05bb69a6b --- /dev/null +++ b/testsuite/bench/scimark-fft.lua @@ -0,0 +1 @@ +require("scimark_lib").FFT(1024)(tonumber(arg and arg[1]) or 50000) diff --git a/testsuite/bench/scimark-lu.lua b/testsuite/bench/scimark-lu.lua new file mode 100644 index 0000000000..7636d994c6 --- /dev/null +++ b/testsuite/bench/scimark-lu.lua @@ -0,0 +1 @@ +require("scimark_lib").LU(100)(tonumber(arg and arg[1]) or 5000) diff --git a/testsuite/bench/scimark-sor.lua b/testsuite/bench/scimark-sor.lua new file mode 100644 index 0000000000..e537e9867f --- /dev/null +++ b/testsuite/bench/scimark-sor.lua @@ -0,0 +1 @@ +require("scimark_lib").SOR(100)(tonumber(arg and arg[1]) or 50000) diff --git a/testsuite/bench/scimark-sparse.lua b/testsuite/bench/scimark-sparse.lua new file mode 100644 index 0000000000..01a2258df9 --- /dev/null +++ b/testsuite/bench/scimark-sparse.lua @@ -0,0 +1 @@ +require("scimark_lib").SPARSE(1000, 5000)(tonumber(arg and arg[1]) or 150000) diff --git a/testsuite/bench/scimark_lib.lua b/testsuite/bench/scimark_lib.lua new file mode 100644 index 0000000000..aeffd75a62 --- /dev/null +++ b/testsuite/bench/scimark_lib.lua @@ -0,0 +1,297 @@ +------------------------------------------------------------------------------ +-- Lua SciMark (2010-03-15). +-- +-- A literal translation of SciMark 2.0a, written in Java and C. +-- Credits go to the original authors Roldan Pozo and Bruce Miller. +-- See: http://math.nist.gov/scimark2/ +------------------------------------------------------------------------------ + + +local SCIMARK_VERSION = "2010-03-15" + +local RANDOM_SEED = 101009 -- Must be odd. + +local abs, log, sin, floor = math.abs, math.log, math.sin, math.floor +local pi, clock = math.pi, os.clock + +local benchmarks = {} + +------------------------------------------------------------------------------ +-- This is a Lagged Fibonacci Pseudo-random Number Generator with +-- j, k, M = 5, 17, 31. Pretty weak, but same as C/Java SciMark. +------------------------------------------------------------------------------ + +local rand, rand_init + +if jit and jit.status and jit.status() then + -- LJ2 has bit operations and zero-based arrays (internally). + local bit = require("bit") + local band, sar = bit.band, bit.arshift + local Rm, Rj, Ri = {}, 0, 0 + for i=0,16 do Rm[i] = 0 end + function rand_init(seed) + Rj, Ri = 16, 11 + for i=16,0,-1 do + seed = band(seed*9069, 0x7fffffff) + Rm[i] = seed + end + end + function rand() + local i = band(Ri+1, sar(Ri-16, 31)) + local j = band(Rj+1, sar(Rj-16, 31)) + Ri, Rj = i, j + local k = band(Rm[i] - Rm[j], 0x7fffffff) + Rm[j] = k + return k * (1.0/2147483647.0) + end +else + -- Better for standard Lua with one-based arrays and without bit operations. + local Rm, Rj = {}, 1 + for i=1,17 do Rm[i] = 0 end + function rand_init(seed) + Rj = 1 + for i=17,1,-1 do + seed = (seed*9069) % (2^31) + Rm[i] = seed + end + end + function rand() + local j, m = Rj, Rm + local h = j - 5 + if h < 1 then h = h + 17 end + local k = m[h] - m[j] + if k < 0 then k = k + 2147483647 end + m[j] = k + if j < 17 then Rj = j + 1 else Rj = 1 end + return k * (1.0/2147483647.0) + end +end + +local function random_vector(n) + local v = {} + for x=1,n do v[x] = rand() end + return v +end + +local function random_matrix(m, n) + local a = {} + for y=1,m do + local v = {} + a[y] = v + for x=1,n do v[x] = rand() end + end + return a +end + +------------------------------------------------------------------------------ +-- FFT: Fast Fourier Transform. +------------------------------------------------------------------------------ + +local function fft_bitreverse(v, n) + local j = 0 + for i=0,2*n-4,2 do + if i < j then + v[i+1], v[i+2], v[j+1], v[j+2] = v[j+1], v[j+2], v[i+1], v[i+2] + end + local k = n + while k <= j do j = j - k; k = k / 2 end + j = j + k + end +end + +local function fft_transform(v, n, dir) + if n <= 1 then return end + fft_bitreverse(v, n) + local dual = 1 + repeat + local dual2 = 2*dual + for i=1,2*n-1,2*dual2 do + local j = i+dual2 + local ir, ii = v[i], v[i+1] + local jr, ji = v[j], v[j+1] + v[j], v[j+1] = ir - jr, ii - ji + v[i], v[i+1] = ir + jr, ii + ji + end + local theta = dir * pi / dual + local s, s2 = sin(theta), 2.0 * sin(theta * 0.5)^2 + local wr, wi = 1.0, 0.0 + for a=3,dual2-1,2 do + wr, wi = wr - s*wi - s2*wr, wi + s*wr - s2*wi + for i=a,a+2*(n-dual2),2*dual2 do + local j = i+dual2 + local jr, ji = v[j], v[j+1] + local dr, di = wr*jr - wi*ji, wr*ji + wi*jr + local ir, ii = v[i], v[i+1] + v[j], v[j+1] = ir - dr, ii - di + v[i], v[i+1] = ir + dr, ii + di + end + end + dual = dual2 + until dual >= n +end + +function benchmarks.FFT(n) + local l2n = log(n)/log(2) + if l2n % 1 ~= 0 then + io.stderr:write("Error: FFT data length is not a power of 2\n") + os.exit(1) + end + local v = random_vector(n*2) + return function(cycles) + local norm = 1.0 / n + for p=1,cycles do + fft_transform(v, n, -1) + fft_transform(v, n, 1) + for i=1,n*2 do v[i] = v[i] * norm end + end + return ((5*n-2)*l2n + 2*(n+1)) * cycles + end +end + +------------------------------------------------------------------------------ +-- SOR: Jacobi Successive Over-Relaxation. +------------------------------------------------------------------------------ + +local function sor_run(mat, m, n, cycles, omega) + local om4, om1 = omega*0.25, 1.0-omega + m = m - 1 + n = n - 1 + for i=1,cycles do + for y=2,m do + local v, vp, vn = mat[y], mat[y-1], mat[y+1] + for x=2,n do + v[x] = om4*((vp[x]+vn[x])+(v[x-1]+v[x+1])) + om1*v[x] + end + end + end +end + +function benchmarks.SOR(n) + local mat = random_matrix(n, n) + return function(cycles) + sor_run(mat, n, n, cycles, 1.25) + return (n-1)*(n-1)*cycles*6 + end +end + +------------------------------------------------------------------------------ +-- MC: Monte Carlo Integration. +------------------------------------------------------------------------------ + +local function mc_integrate(cycles) + local under_curve = 0 + local rand = rand + for i=1,cycles do + local x = rand() + local y = rand() + if x*x + y*y <= 1.0 then under_curve = under_curve + 1 end + end + return (under_curve/cycles) * 4 +end + +function benchmarks.MC() + return function(cycles) + local res = mc_integrate(cycles) + assert(math.sqrt(cycles)*math.abs(res-math.pi) < 5.0, "bad MC result") + return cycles * 4 -- Way off, but same as SciMark in C/Java. + end +end + +------------------------------------------------------------------------------ +-- Sparse Matrix Multiplication. +------------------------------------------------------------------------------ + +local function sparse_mult(n, cycles, vy, val, row, col, vx) + for p=1,cycles do + for r=1,n do + local sum = 0 + for i=row[r],row[r+1]-1 do sum = sum + vx[col[i]] * val[i] end + vy[r] = sum + end + end +end + +function benchmarks.SPARSE(n, nz) + local nr = floor(nz/n) + local anz = nr*n + local vx = random_vector(n) + local val = random_vector(anz) + local vy, col, row = {}, {}, {} + row[1] = 1 + for r=1,n do + local step = floor(r/nr) + if step < 1 then step = 1 end + local rr = row[r] + row[r+1] = rr+nr + for i=0,nr-1 do col[rr+i] = 1+i*step end + end + return function(cycles) + sparse_mult(n, cycles, vy, val, row, col, vx) + return anz*cycles*2 + end +end + +------------------------------------------------------------------------------ +-- LU: Dense Matrix Factorization. +------------------------------------------------------------------------------ + +local function lu_factor(a, pivot, m, n) + local min_m_n = m < n and m or n + for j=1,min_m_n do + local jp, t = j, abs(a[j][j]) + for i=j+1,m do + local ab = abs(a[i][j]) + if ab > t then + jp = i + t = ab + end + end + pivot[j] = jp + if a[jp][j] == 0 then error("zero pivot") end + if jp ~= j then a[j], a[jp] = a[jp], a[j] end + if j < m then + local recp = 1.0 / a[j][j] + for k=j+1,m do + local v = a[k] + v[j] = v[j] * recp + end + end + if j < min_m_n then + for i=j+1,m do + local vi, vj = a[i], a[j] + local eij = vi[j] + for k=j+1,n do vi[k] = vi[k] - eij * vj[k] end + end + end + end +end + +local function matrix_alloc(m, n) + local a = {} + for y=1,m do a[y] = {} end + return a +end + +local function matrix_copy(dst, src, m, n) + for y=1,m do + local vd, vs = dst[y], src[y] + for x=1,n do vd[x] = vs[x] end + end +end + +function benchmarks.LU(n) + local mat = random_matrix(n, n) + local tmp = matrix_alloc(n, n) + local pivot = {} + return function(cycles) + for i=1,cycles do + matrix_copy(tmp, mat, n, n) + lu_factor(tmp, pivot, n, n) + end + return 2.0/3.0*n*n*n*cycles + end +end + +rand_init(RANDOM_SEED) + +return benchmarks diff --git a/testsuite/bench/series.lua b/testsuite/bench/series.lua new file mode 100644 index 0000000000..f766cb3247 --- /dev/null +++ b/testsuite/bench/series.lua @@ -0,0 +1,34 @@ + +local function integrate(x0, x1, nsteps, omegan, f) + local x, dx = x0, (x1-x0)/nsteps + local rvalue = ((x0+1)^x0 * f(omegan*x0)) / 2 + for i=3,nsteps do + x = x + dx + rvalue = rvalue + (x+1)^x * f(omegan*x) + end + return (rvalue + ((x1+1)^x1 * f(omegan*x1)) / 2) * dx +end + +local function series(n) + local sin, cos = math.sin, math.cos + local omega = math.pi + local t = {} + + t[1] = integrate(0, 2, 1000, 0, function() return 1 end) / 2 + t[2] = 0 + + for i=2,n do + t[2*i-1] = integrate(0, 2, 1000, omega*i, cos) + t[2*i] = integrate(0, 2, 1000, omega*i, sin) + end + + return t +end + +local n = tonumber(arg and arg[1]) or 10000 +local tm = os.clock() +local t = series(n) +tm = os.clock() - tm +assert(math.abs(t[1]-2.87295) < 0.00001) +io.write(string.format("size %d, %.2f s, %.1f iterations/s\n", + n, tm, (2*n-1)/tm)) diff --git a/testsuite/bench/spectral-norm.lua b/testsuite/bench/spectral-norm.lua new file mode 100644 index 0000000000..ecc8011208 --- /dev/null +++ b/testsuite/bench/spectral-norm.lua @@ -0,0 +1,40 @@ + +local function A(i, j) + local ij = i+j-1 + return 1.0 / (ij * (ij-1) * 0.5 + i) +end + +local function Av(x, y, N) + for i=1,N do + local a = 0 + for j=1,N do a = a + x[j] * A(i, j) end + y[i] = a + end +end + +local function Atv(x, y, N) + for i=1,N do + local a = 0 + for j=1,N do a = a + x[j] * A(j, i) end + y[i] = a + end +end + +local function AtAv(x, y, t, N) + Av(x, t, N) + Atv(t, y, N) +end + +local N = tonumber(arg and arg[1]) or 100 +local u, v, t = {}, {}, {} +for i=1,N do u[i] = 1 end + +for i=1,10 do AtAv(u, v, t, N) AtAv(v, u, t, N) end + +local vBv, vv = 0, 0 +for i=1,N do + local ui, vi = u[i], v[i] + vBv = vBv + ui*vi + vv = vv + vi*vi +end +io.write(string.format("%0.9f\n", math.sqrt(vBv / vv))) diff --git a/testsuite/bench/sum-file.lua b/testsuite/bench/sum-file.lua new file mode 100644 index 0000000000..c9e618fdf5 --- /dev/null +++ b/testsuite/bench/sum-file.lua @@ -0,0 +1,6 @@ + +local sum = 0 +for line in io.lines() do + sum = sum + line +end +io.write(sum, "\n") diff --git a/testsuite/test/README.md b/testsuite/test/README.md new file mode 100644 index 0000000000..ff16ac8ef8 --- /dev/null +++ b/testsuite/test/README.md @@ -0,0 +1,110 @@ +This directory contains the LuaJIT test suite, or at least something which +will evolve into the LuaJIT test suite. Large chunks of the suite can also +be run with any other Lua 5.1 or 5.2 interpreter. + +## Running the test suite ## + +To run the default test suite, run `test.lua` using the Lua interpreter you +wish to test, for example: + + $ ~/luajit-2.0/src/luajit test.lua + +If the test suite passes, the final line printed to stdout will be +`NNN passed`, and the exit code of the process will be zero. If any tests +fail, the exit code will be non-zero. If the failures caused catastrophic +termination of the entire process (such as a segmentation fault or assertion +failure), the last line of output will be number and name of the test which +caused the catastrophe. If the failures weren't catastrophic, the penultimate +line of output will be `NNN passed, MMM failed`, and the last line will say +how to re-run just the failing tests. + +Various flags and options can be passed to `test.lua` to control which tests +are run, and in which order. Run `lua test.lua --help` for details. + +## Structure of the test suite ## + +The test suite consists of a directory tree. Within said tree there are various +`.lua` files, and within every `.lua` file there are one or more tests. Every +directory in the tree contains a file called `index`, which enumerates the +members of the directory which contribute to the test suite (this is done to +avoid an external dependency for directory iteration, and to allow metadata to +be specified at the file/directory level). Every `.lua` file is structured as: + + << local definitions >> + << test 1 >> + ... + << test N >> + +Where `<< local definitions >>` consists of Lua code to act as a common prefix +for every test in the file, and each `<< test >>` looks like: + + do --- <> <> + << code >> + end + +Where `<>` is (almost) free-form, and `<< code >>` is Lua code which +performs some actions and probably calls `assert` alot. The `<>` +fragment can be used to specify the conditions under which the test should +or should not run, to adjust the environment in which the test is run, and to +allow key/value pairs to be specified in a standard place/format. + +Some common pieces of metadata are: + * `+luajit>=2.1` - The test requires LuaJIT 2.1 or later to run. + * `+lua<5.2` - The test requires Lua 5.1 or earlier to run (all versions of + LuaJIT report themselves as 5.1). + * `+ffi` - The test requires the `ffi` library to be present. + * `+bit` - The test requires the `bit` library to be present. + * `+jit` - The test requires JIT compilation be available and turned on. + * `+slow` - The test is too slow to run as part of the default suite, and + thus requires `+slow` to be specified on the command line. + * `!private_G` - The test modifies globals, and thus needs to be run with a + private (shallow) copy of `_G`. + +Lua code which is common across all (or some) tests in a single file can be +written at the top of the file as part of `<< local definitions >>`. Code +which is common across multiple files lives in the `common` directory, and +is pulled into applicable tests by means of `local x = require"common.x"`. + +It is intended that most `.lua` files in the test suite can be exercised +without the test runner by just passing them to a Lua interpreter. In such +cases, metadata is ignored, the tests are executed from top to bottom, and +any failure results in later tests not running. Also note that the test +runner converts every test into a separate function, which causes references +to local definitions to become upvalue accesses rather than local variable +accesses - in some cases this can cause differences in behaviour. + +## Extending the test suite ## + +First of all, decide where your new test(s) should live. This might be within +an existing `.lua` file, or might involve creating new files and/or directories. +If new files are created, remember to add them to the `index` file of the +enclosing directory. If new directories are created, remember to create an +`index` file in said directory, and add the new directory to the `index` file +in the parent directory. + +Once you've decided in which file the test(s) should live, you're ready to add +your test(s) to said file. Each test should be wrapped in a `do`/`end` block, +and given some kind of name (after the `do` keyword, as in `do --- <>`). +The test should call `assert` to confirm whether the thing under test is +behaving, or otherwise raise an error if the thing under test is misbehaving. +Your test(s) should not write to stdout or stderr, nor should they mutate +global state. After your test(s) are written, you should be able to determine +which features they require, and put on metadata appropriately. + +## Completing the tidy-up of the test suite ## + +Some files/directories in this directory need some thought: + + * `common/ffi_util.inc` - Needs renaming and being made `require`-able. + * `lib/ffi` - Tests need converting to structure described in this document. + * `lib/table/misc.lua` - Tests need organising and converting to structure + described in this document. + * `misc` - Tests need organising and converting to structure described in + this document. + * `src` - C/C++ source which needs to be compiled into a dynamic library and + loaded for certain tests. Need to figure out a good way of handling + C/C++ source. + * `sysdep` - Need to figure out a good way of handling these. + * `unportable` - Need to figure out a good way of handling these. + +After that, consult the README file by Mike in the directory above this one. diff --git a/testsuite/test/bc/constov.lua b/testsuite/test/bc/constov.lua new file mode 100644 index 0000000000..5827840b46 --- /dev/null +++ b/testsuite/test/bc/constov.lua @@ -0,0 +1,16 @@ + +do --- float + local t = { "local x\n" } + for i=2,65537 do t[i] = "x="..i..".5\n" end + assert(loadstring(table.concat(t)) ~= nil) + t[65538] = "x=65538.5" + assert(loadstring(table.concat(t)) == nil) +end + +do --- int + local t = { "local x\n" } + for i=2,65537 do t[i] = "x='"..i.."'\n" end + assert(loadstring(table.concat(t)) ~= nil) + t[65538] = "x='65538'" + assert(loadstring(table.concat(t)) == nil) +end diff --git a/testsuite/test/bc/index b/testsuite/test/bc/index new file mode 100644 index 0000000000..dead10f555 --- /dev/null +++ b/testsuite/test/bc/index @@ -0,0 +1 @@ +constov.lua +slow diff --git a/testsuite/test/common/expect_error.lua b/testsuite/test/common/expect_error.lua new file mode 100644 index 0000000000..e155090eda --- /dev/null +++ b/testsuite/test/common/expect_error.lua @@ -0,0 +1,16 @@ +return function(f, msg) + local ok, err = pcall(f) + if ok then error("error check unexpectedly succeeded", 2) end + if msg then + if type(err) ~= "string" then + error("error check failed with "..tostring(err), 2) + end + local line, err2 = string.match(err, ":(%d*): (.*)") + if err2 ~= msg then + if err2:gsub(" got no value", " got nil") == msg then + return + end + error("error check failed with "..err, 2) + end + end +end diff --git a/testsuite/test/common/ffi_util.inc b/testsuite/test/common/ffi_util.inc new file mode 100644 index 0000000000..1eee8dd933 --- /dev/null +++ b/testsuite/test/common/ffi_util.inc @@ -0,0 +1,41 @@ +-- This should be turned into a proper module and not use globals. +-- Or combined into a generiv test utility module. With FFI +-- functionality turned off, if the FFI module is not built-in. + +local ffi = require("ffi") + +function checkfail(t, f) + f = f or ffi.typeof + for i=1,1e9 do + local tp = t[i] + if not tp then break end + assert(pcall(f, tp) == false, tp) + end +end + +function checktypes(t) + for i=1,1e9,3 do + local tp = t[i+2] + if not tp then break end + local id = ffi.typeof(tp) + assert(ffi.sizeof(id) == t[i], tp) + assert(ffi.alignof(id) == t[i+1], tp) + end +end + +function fails(f, ...) + if pcall(f, ...) ~= false then error("failure expected", 2) end +end + +local incroot = os.getenv("INCROOT") or "/usr/include" +local cdefs = os.getenv("CDEFS") or "" + +function include(name) + local flags = ffi.abi("32bit") and "-m32" or "-m64" + if string.sub(name, 1, 1) ~= "/" then name = incroot.."/"..name end + local fp = assert(io.popen("cc -E -P "..flags.." "..cdefs.." "..name)) + local s = fp:read("*a") + fp:close() + ffi.cdef(s) +end + diff --git a/testsuite/test/common/test_runner_canary.lua b/testsuite/test/common/test_runner_canary.lua new file mode 100644 index 0000000000..fc9cadc637 --- /dev/null +++ b/testsuite/test/common/test_runner_canary.lua @@ -0,0 +1 @@ +return "canary is alive" diff --git a/testsuite/test/computations.lua b/testsuite/test/computations.lua new file mode 100644 index 0000000000..4fce7fcd4a --- /dev/null +++ b/testsuite/test/computations.lua @@ -0,0 +1,113 @@ +do --- ack + local function Ack(m, n) + if m == 0 then return n+1 end + if n == 0 then return Ack(m-1, 1) end + return Ack(m-1, (Ack(m, n-1))) -- The parentheses are deliberate. + end + + assert(Ack(3,5) == 253) +end + +do --- ack notail + local function Ack(m, n) + if m == 0 then return n+1 end + if n == 0 then return Ack(m-1, 1) end + return (Ack(m-1, (Ack(m, n-1)))) -- The parentheses are deliberate. + end + + assert(Ack(3,5) == 253) +end + +do --- fac + local function fac(n) + local x = 1 + for i=2,n do + x = x * i + end + return x + end + + assert(fac(10) == 3628800) +end + +do --- ffib + local function ffib(n) + if n <= 2 then return n,1 end + if n % 2 == 1 then + local a,b = ffib((n-1)/2) + local aa = a*a + return aa+a*(b+b), aa+b*b + else + local a,b = ffib(n/2-1) + local ab = a+b + return ab*ab+a*a, (ab+b)*a + end + end + + local function fib(n) + return (ffib(n)) + end + + assert(fib(40) == 165580141) + assert(fib(39) == 102334155) + assert(fib(77) == 8944394323791464) +end + +do --- fib + local function fib(n) + if n < 2 then return 1 end + return fib(n-2) + fib(n-1) + end + + assert(fib(27) == 317811) +end + +do --- nsieve + local function nsieve(m) + local isPrime = {} + for i=2,m do isPrime[i] = true end + local count = 0 + for i=2,m do + if isPrime[i] then + for k=i+i,m,i do isPrime[k] = false end + count = count + 1 + end + end + return count + end + + assert(nsieve(100) == 25) + assert(nsieve(12345) == 1474) +end + +do --- recsum + local function sum(n) + if n == 1 then return 1 end + return n + sum(n-1) + end + + for i=1, 100 do + assert(sum(i) == i*(i+1)/2) + end +end + +do --- recsump + local abs = math.abs + local function sum(n) + if n == 1 then return 1 end + return abs(n + sum(n-1)) + end + + for i=1, 100 do + assert(sum(i) == i*(i+1)/2) + end +end + +do --- tak + local function tak(x, y, z) + if y >= x then return z end + return tak(tak(x-1, y, z), tak(y-1, z, x), (tak(z-1, x, y))) + end + + assert(tak(21, 14, 7) == 14) +end diff --git a/testsuite/test/index b/testsuite/test/index new file mode 100644 index 0000000000..d9424feea3 --- /dev/null +++ b/testsuite/test/index @@ -0,0 +1,7 @@ +lang +lib +compat5.2 +bc +luajit>=2 +computations.lua +trace +jit +opt +jit +raptorjit diff --git a/testsuite/test/lang/andor.lua b/testsuite/test/lang/andor.lua new file mode 100644 index 0000000000..55b2c756e5 --- /dev/null +++ b/testsuite/test/lang/andor.lua @@ -0,0 +1,61 @@ +do --- smoke + local x = ((1 or false) and true) or false + assert(x == true) +end + +do --- allcases + local basiccases = { + {"nil", nil}, + {"false", false}, + {"true", true}, + {"10", 10}, + } + + local mem = {basiccases} -- for memoization + + local function allcases (n) + if mem[n] then return mem[n] end + local res = {} + -- include all smaller cases + for _, v in ipairs(allcases(n - 1)) do + res[#res + 1] = v + end + for i = 1, n - 1 do + for _, v1 in ipairs(allcases(i)) do + for _, v2 in ipairs(allcases(n - i)) do + res[#res + 1] = { + "(" .. v1[1] .. " and " .. v2[1] .. ")", + v1[2] and v2[2] + } + res[#res + 1] = { + "(" .. v1[1] .. " or " .. v2[1] .. ")", + v1[2] or v2[2] + } + end + end + end + mem[n] = res -- memoize + return res + end + + for _, v in pairs(allcases(4)) do + local res = (loadstring or load)("return " .. v[1])() + if res ~= v[2] then + error(string.format("bad conditional eval\n%s\nexpected: %s\ngot: %s", + v[1], tostring(v[2]), tostring(res))) + end + end +end + +do --- tracefib + -- 0001 KSHORT 1 2 + -- 0002 ISGE 0 1 + -- 0003 JMP 1 => 0006 + -- 0004 KSHORT 1 1 + -- 0005 JMP 1 => 0013 + -- ^^^ must be 2 + -- fix in jmp_patchtestreg + local function fib(n) return (n < 2) and 1 or fib(n-1)+fib(n-2) end + assert(fib(5) == 8) + assert(fib(10) == 89) +end diff --git a/testsuite/test/lang/assignment.lua b/testsuite/test/lang/assignment.lua new file mode 100644 index 0000000000..e9745ef667 --- /dev/null +++ b/testsuite/test/lang/assignment.lua @@ -0,0 +1,46 @@ +local assert = assert + +do --- local + local a, b, c + a, b, c = 0, 1 + assert(a == 0) + assert(b == 1) + assert(c == nil) + a, b = a+1, b+1, a+b + assert(a == 1) + assert(b == 2) + a, b, c = 0 + assert(a == 0) + assert(b == nil) + assert(c == nil) +end + +do --- global !private_G + a, b, c = 0, 1 + assert(a == 0) + assert(b == 1) + assert(c == nil) + a, b = a+1, b+1, a+b + assert(a == 1) + assert(b == 2) + a, b, c = 0 + assert(a == 0) + assert(b == nil) + assert(c == nil) +end + +do --- local lhs in key on lhs + local a = {} + local i = 3 + i, a[i] = i+1, 20 + assert(i == 4) + assert(a[3] == 20) +end + +do --- global lhs in key on lhs !private_G + a = {} + i = 3 + i, a[i] = i+1, 20 + assert(i == 4) + assert(a[3] == 20) +end diff --git a/testsuite/test/lang/compare.lua b/testsuite/test/lang/compare.lua new file mode 100644 index 0000000000..09c5488d15 --- /dev/null +++ b/testsuite/test/lang/compare.lua @@ -0,0 +1,323 @@ +local function lt(x, y) + if x < y then return true else return false end +end + +local function le(x, y) + if x <= y then return true else return false end +end + +local function gt(x, y) + if x > y then return true else return false end +end + +local function ge(x, y) + if x >= y then return true else return false end +end + +local function eq(x, y) + if x == y then return true else return false end +end + +local function ne(x, y) + if x ~= y then return true else return false end +end + + +local function ltx1(x) + if x < 1 then return true else return false end +end + +local function lex1(x) + if x <= 1 then return true else return false end +end + +local function gtx1(x) + if x > 1 then return true else return false end +end + +local function gex1(x) + if x >= 1 then return true else return false end +end + +local function eqx1(x) + if x == 1 then return true else return false end +end + +local function nex1(x) + if x ~= 1 then return true else return false end +end + + +local function lt1x(x) + if 1 < x then return true else return false end +end + +local function le1x(x) + if 1 <= x then return true else return false end +end + +local function gt1x(x) + if 1 > x then return true else return false end +end + +local function ge1x(x) + if 1 >= x then return true else return false end +end + +local function eq1x(x) + if 1 == x then return true else return false end +end + +local function ne1x(x) + if 1 ~= x then return true else return false end +end + +local function check(a, b) + if a ~= b then + error("check failed with "..tostring(a).." ~= "..tostring(b), 2) + end +end + +do --- 1,2 + local x,y = 1,2 + + check(xy, false) + check(x>=y, false) + check(x==y, false) + check(x~=y, true) + + check(1y, false) + check(1>=y, false) + check(1==y, false) + check(1~=y, true) + + check(x<2, true) + check(x<=2, true) + check(x>2, false) + check(x>=2, false) + check(x==2, false) + check(x~=2, true) + + check(lt(x,y), true) + check(le(x,y), true) + check(gt(x,y), false) + check(ge(x,y), false) + check(eq(y,x), false) + check(ne(y,x), true) +end + +do --- 2,1 + local x,y = 2,1 + + check(xy, true) + check(x>=y, true) + check(x==y, false) + check(x~=y, true) + + check(2y, true) + check(2>=y, true) + check(2==y, false) + check(2~=y, true) + + check(x<1, false) + check(x<=1, false) + check(x>1, true) + check(x>=1, true) + check(x==1, false) + check(x~=1, true) + + check(lt(x,y), false) + check(le(x,y), false) + check(gt(x,y), true) + check(ge(x,y), true) + check(eq(y,x), false) + check(ne(y,x), true) +end + +do --- 1,1 + local x,y = 1,1 + + check(xy, false) + check(x>=y, true) + check(x==y, true) + check(x~=y, false) + + check(1y, false) + check(1>=y, true) + check(1==y, true) + check(1~=y, false) + + check(x<1, false) + check(x<=1, true) + check(x>1, false) + check(x>=1, true) + check(x==1, true) + check(x~=1, false) + + check(lt(x,y), false) + check(le(x,y), true) + check(gt(x,y), false) + check(ge(x,y), true) + check(eq(y,x), true) + check(ne(y,x), false) +end + +do --- 2 + check(lt1x(2), true) + check(le1x(2), true) + check(gt1x(2), false) + check(ge1x(2), false) + check(eq1x(2), false) + check(ne1x(2), true) + + check(ltx1(2), false) + check(lex1(2), false) + check(gtx1(2), true) + check(gex1(2), true) + check(eqx1(2), false) + check(nex1(2), true) +end + +do --- 1 + check(lt1x(1), false) + check(le1x(1), true) + check(gt1x(1), false) + check(ge1x(1), true) + check(eq1x(1), true) + check(ne1x(1), false) + + check(ltx1(1), false) + check(lex1(1), true) + check(gtx1(1), false) + check(gex1(1), true) + check(eqx1(1), true) + check(nex1(1), false) +end + +do --- 0 + check(lt1x(0), false) + check(le1x(0), false) + check(gt1x(0), true) + check(ge1x(0), true) + check(eq1x(0), false) + check(ne1x(0), true) + + check(ltx1(0), true) + check(lex1(0), true) + check(gtx1(0), false) + check(gex1(0), false) + check(eqx1(0), false) + check(nex1(0), true) +end + +do --- pcall + assert(not pcall(function() + local a, b = 10.5, nil + return a < b + end)) +end + +do --- bit +bit + for i=1,100 do + assert(bit.tobit(i+0x7fffffff) < 0) + end + for i=1,100 do + assert(bit.tobit(i+0x7fffffff) <= 0) + end +end + +do --- string 1 255 + local a = "\255\255\255\255" + local b = "\1\1\1\1" + + assert(a > b) + assert(a > b) + assert(a >= b) + assert(b <= a) +end + +do --- String comparisons: + local function str_cmp(a, b, lt, gt, le, ge) + assert(ab == gt) + assert(a<=b == le) + assert(a>=b == ge) + assert((not (ab)) == (not gt)) + assert((not (a<=b)) == (not le)) + assert((not (a>=b)) == (not ge)) + end + + local function str_lo(a, b) + str_cmp(a, b, true, false, true, false) + end + + local function str_eq(a, b) + str_cmp(a, b, false, false, true, true) + end + + local function str_hi(a, b) + str_cmp(a, b, false, true, false, true) + end + + str_lo("a", "b") + str_eq("a", "a") + str_hi("b", "a") + + str_lo("a", "aa") + str_hi("aa", "a") + + str_lo("a", "a\0") + str_hi("a\0", "a") +end + +do --- obj_eq/ne + local function obj_eq(a, b) + assert(a==b == true) + assert(a~=b == false) + end + + local function obj_ne(a, b) + assert(a==b == false) + assert(a~=b == true) + end + + obj_eq(nil, nil) + obj_ne(nil, false) + obj_ne(nil, true) + + obj_ne(false, nil) + obj_eq(false, false) + obj_ne(false, true) + + obj_ne(true, nil) + obj_ne(true, false) + obj_eq(true, true) + + obj_eq(1, 1) + obj_ne(1, 2) + obj_ne(2, 1) + + obj_eq("a", "a") + obj_ne("a", "b") + obj_ne("a", 1) + obj_ne(1, "a") + + local t, t2 = {}, {} + obj_eq(t, t) + obj_ne(t, t2) + obj_ne(t, 1) + obj_ne(t, "") +end diff --git a/testsuite/test/lang/compare_nan.lua b/testsuite/test/lang/compare_nan.lua new file mode 100644 index 0000000000..878f39a7f5 --- /dev/null +++ b/testsuite/test/lang/compare_nan.lua @@ -0,0 +1,99 @@ + +local function check(a, b) + if a ~= b then + error("check failed with "..tostring(a).." ~= "..tostring(b), 2) + end +end + +local nan, one = 0/0, 1 + +do --- nan nan + check(nannan, false) + check(nan>=nan, false) + check(nan==nan, false) + check(nan~=nan, true) +end + +do --- nan one + check(nanone, false) + check(nan>=one, false) + check(nan==one, false) + check(nan~=one, true) +end + +do --- one nan + check(onenan, false) + check(one>=nan, false) + check(one==nan, false) + check(one~=nan, true) +end + +do --- nan 1 + check(nan<1, false) + check(nan<=1, false) + check(nan>1, false) + check(nan>=1, false) + check(nan==1, false) + check(nan~=1, true) +end + +do --- 1 nan + check(1nan, false) + check(1>=nan, false) + check(1==nan, false) + check(1~=nan, true) +end + +do --- not nan nan + check(not (nannan), true) + check(not (nan>=nan), true) + check(not (nan==nan), true) + check(not (nan~=nan), false) +end + +do --- not nan one + check(not (nanone), true) + check(not (nan>=one), true) + check(not (nan==one), true) + check(not (nan~=one), false) +end + +do --- not one nan + check(not (onenan), true) + check(not (one>=nan), true) + check(not (one==nan), true) + check(not (one~=nan), false) +end + +do --- not nan 1 + check(not (nan<1), true) + check(not (nan<=1), true) + check(not (nan>1), true) + check(not (nan>=1), true) + check(not (nan==1), true) + check(not (nan~=1), false) +end + +do --- not 1 nan + check(not (1nan), true) + check(not (1>=nan), true) + check(not (1==nan), true) + check(not (1~=nan), false) +end + diff --git a/testsuite/test/lang/concat.lua b/testsuite/test/lang/concat.lua new file mode 100644 index 0000000000..04d665b2b7 --- /dev/null +++ b/testsuite/test/lang/concat.lua @@ -0,0 +1,112 @@ +do --- Constant folding + local y + for i=1,100 do y = "a".."b" end + assert(y == "ab") + for i=1,100 do y = "ab"..(1).."cd"..(1.5) end + assert(y == "ab1cd1.5") +end + +do --- Fuse conversions to strings + local y + local x = "a" + for i=1,100 do y = x..i end + assert(y == "a100") + x = "a" + for i=1.5,100.5 do y = x..i end + assert(y == "a100.5") +end + +do --- Fuse string construction + local y + local x = "abc" + for i=1,100 do y = "x"..string.sub(x, 2) end + assert(y == "xbc") +end + +do --- CSE, sink + local y + local x = "a" + for i=1,100 do y = x.."b" end + assert(y == "ab") +end + +do --- CSE, two buffers in parallel, no sink + local y, z + local x1, x2 = "xx", "yy" + for i=1,100 do y = x1.."a"..x1; z = x1.."a"..x2 end + assert(y == "xxaxx") + assert(z == "xxayy") + x1 = "xx" + for i=1,100 do y = x1.."a"..x1; z = x1.."b"..x1 end + assert(y == "xxaxx") + assert(z == "xxbxx") +end + +do --- Append, CSE + local y, z + local x = "a" + for i=1,100 do + y = x.."b" + y = y.."c" + end + assert(y == "abc") + x = "a" + for i=1,100 do + y = x.."b" + z = y.."c" + end + assert(y == "ab") + assert(z == "abc") + x = "a" + for i=1,100 do + y = x.."b" + z = y..i + end + assert(y == "ab") + assert(z == "ab100") +end + +do --- Append, FOLD + local a, b = "x" + for i=1,100 do b = (a.."y").."" end + assert(b == "xy") +end + +do --- Append to buffer, sink + local x = "a" + for i=1,100 do x = x.."b" end + assert(x == "a"..string.rep("b", 100)) + x = "a" + for i=1,100 do x = x.."bc" end + assert(x == "a"..string.rep("bc", 100)) +end + +do --- Append to two buffers in parallel, no append, no sink + local y, z = "xx", "yy" + for i=1,100 do y = y.."a"; z = z.."b" end + assert(y == "xx"..string.rep("a", 100)) + assert(z == "yy"..string.rep("b", 100)) +end + +do --- Sink into side-exit + local x = "a" + local z + for i=1,200 do + local y = x.."b" + if i > 100 then + z = y..i + end + end + assert(z == "ab200") +end + +do --- Very long strings + for i, s in ipairs{"a", "bc", "def"} do + for n = 1, 20 do + s = s .. s + end + assert(#s == 2^20*i) + assert(s:sub(1, 6) == s:sub(7, 12)) + assert(s:sub(1, 6) == s:sub(-6, -1)) + end +end diff --git a/testsuite/test/lang/constant/index b/testsuite/test/lang/constant/index new file mode 100644 index 0000000000..e738357d79 --- /dev/null +++ b/testsuite/test/lang/constant/index @@ -0,0 +1,2 @@ +number.lua +table.lua diff --git a/testsuite/test/lang/constant/number.lua b/testsuite/test/lang/constant/number.lua new file mode 100644 index 0000000000..fb67356e7f --- /dev/null +++ b/testsuite/test/lang/constant/number.lua @@ -0,0 +1,12 @@ +do --- exp + assert(1e5 == 100000) + assert(1e+5 == 100000) + assert(1e-5 == 0.00001) +end + +do --- hex exp +hexfloat !lex + assert(0xe+9 == 23) + assert(0xep9 == 7168) + assert(0xep+9 == 7168) + assert(0xep-9 == 0.02734375) +end diff --git a/testsuite/test/lang/constant/table.lua b/testsuite/test/lang/constant/table.lua new file mode 100644 index 0000000000..899d0f671c --- /dev/null +++ b/testsuite/test/lang/constant/table.lua @@ -0,0 +1,15 @@ + +do --- tnew + local a = nil + local b = {} + local t = {[true] = a, [false] = b or 1} + assert(t[true] == nil) + assert(t[false] == b) +end + +do --- tdup + local b = {} + local t = {[true] = nil, [false] = b or 1} + assert(t[true] == nil) + assert(t[false] == b) +end diff --git a/testsuite/test/lang/coroutine.lua b/testsuite/test/lang/coroutine.lua new file mode 100644 index 0000000000..405135c968 --- /dev/null +++ b/testsuite/test/lang/coroutine.lua @@ -0,0 +1,8 @@ +do --- traceback + local co = coroutine.create(function() + local x = nil + local y = x.x + end) + assert(coroutine.resume(co) == false) + debug.traceback(co) +end diff --git a/testsuite/test/lang/for.lua b/testsuite/test/lang/for.lua new file mode 100644 index 0000000000..4982b32b95 --- /dev/null +++ b/testsuite/test/lang/for.lua @@ -0,0 +1,45 @@ +do --- direction + local a,b,c = 10,1,-1 + for i=1,20 do + if c == -1 then + a,b,c = 1,10,1 + else + a,b,c = 10,1,-1 + end + local x = 0 + for i=a,b,c do for j=1,10 do end x=x+1 end + assert(x == 10) + end +end + +do --- coerce to integer at 13 + local n = 1 + local x = 0 + for i=1,20 do + for j=n,100 do x = x + 1 end + if i == 13 then n = "2" end + end + assert(x == 1993) +end + +do --- coerce to integer at 10 + local n = 1 + local x = 0 + for i=1,20 do + for j=n,100 do x = x + 1 end + if i == 10 then n = "2" end + end + assert(x == 1990) +end + +do --- cannot coerce to integer at 10 + local function f() + local n = 1 + local x = 0 + for i=1,20 do + for j=n,100 do x = x + 1 end + if i == 10 then n = "x" end + end + end + assert(not pcall(f)) +end diff --git a/testsuite/test/lang/gc.lua b/testsuite/test/lang/gc.lua new file mode 100644 index 0000000000..35e6a1f336 --- /dev/null +++ b/testsuite/test/lang/gc.lua @@ -0,0 +1,42 @@ +do --- rechain + local k + + collectgarbage() + + local t = {} + t.ac = 1 + + t.nn = 1 + t.mm = 1 + t.nn = nil + t.mm = nil + + k = "a".."i" + t[k] = 2 + + t.ad = 3 + + t[k] = nil + k = nil + + collectgarbage() + + k = "a".."f" + t[k] = 4 + + t.ak = 5 + + assert(t[k] == 4) +end + +do --- TSETM gc + local function f() + collectgarbage() + return "a", "b" + end + for i = 1, 10 do + local t = {f()} + assert(t[1] == "a") + assert(t[2] == "b") + end +end diff --git a/testsuite/test/lang/goto.lua b/testsuite/test/lang/goto.lua new file mode 100644 index 0000000000..1563a234ae --- /dev/null +++ b/testsuite/test/lang/goto.lua @@ -0,0 +1,149 @@ +local loadstring = loadstring or load + +local function expect(src, msg) + local ok, err = loadstring(src) + if msg then + assert(not ok and string.find(err, msg), err) + else + assert(ok, err) + end +end + +do --- Basic goto and label semantics. + -- Error: duplicate label. + expect("::a:: ::a::", "'a'") + expect("::a:: ::b:: do ::b:: end ::a::", "'a'") + + -- Error: undefined label. + expect("goto a", "'a'") + expect("goto a; ::b::", "'a'") + expect("do ::a:: end; goto a", "'a'") + expect("goto a; do ::a:: end", "'a'") + expect("break", "break") + expect("if x then break end", "break") + + -- Error: goto into variable scope. + expect("goto a; local x; ::a:: local y", "'x'") + expect("do local v,w; goto a; end; local x; ::a:: local y", "'x'") + expect("repeat goto a; local x; ::a:: until x", "'x'") + + ::a:: do goto a; ::a:: end -- Forward jump, not an infinite loop. +end + +do --- Goto is not a keyword. -compat5.2 !lex !private_G + goto = 1 +end + +do --- Goto is a keyword. +compat5.2 + expect("goto = 1", "") +end + +do --- Trailing label is considered to be out of scope. + local x = 11 + do + goto a + goto a + local y = 22 + x = y + ::a:: + ::b:: + end + assert(x == 11) +end + +do --- Trailing labels and empty statements are considered to be out of scope. +compat5.2 !lex + local x = 11 + do + goto a + goto a + local y = 22 + x = y + ::a:: ;; + ::b:: ;; + end + assert(x == 11) +end + +do --- Simple loop with cross-jumping. + local x = 1 + while true do + goto b + ::a:: if x < 100 then goto c end + goto d + ::b:: x = x + 1; goto a + ::c:: + end + ::d:: + assert(x == 100) +end + +do --- Backwards goto must close upval. + local t = {} + local i = 1 + ::a:: + local x + t[i] = function() return x end + x = i + i = i + 1 + if i <= 2 then goto a end + assert(t[1]() == 1) + assert(t[2]() == 2) +end + +do --- Break must close upval, even if closure is parsed after break. + local foo + repeat + local x + ::a:: + if x then break end + function foo() return x end + x = true + goto a + until false + assert(foo() == true) +end + +do --- Label prevents joining to KNIL. -lua==5.2 + local k = 0 + local x + ::foo:: + local y + assert(y == nil) + y = true + k = k + 1 + if k < 2 then goto foo end +end + +do --- Break resolved from the right scope. + local function p(lvl) + lvl = lvl or 1 + while true do + lvl = lvl + 1 + if lvl == nil then break end + local idx = 1 + while true do + if key == nil then break end + idx = idx + 1 + end + end + end +end + +do --- Do not join twice with UCLO. + while true do + do + local x + local function f() return x end + end + break + end + + while true do + do + local x + local function f() return x end + end + goto foo + end + ::foo:: +end diff --git a/testsuite/test/lang/index b/testsuite/test/lang/index new file mode 100644 index 0000000000..88e2edfac7 --- /dev/null +++ b/testsuite/test/lang/index @@ -0,0 +1,18 @@ +andor.lua +assignment.lua +compare.lua +compare_nan.lua +constant +for.lua +length.lua +modulo.lua +concat.lua +self.lua +table.lua +upvalue +coroutine.lua +tail_recursion.lua +vararg_jit.lua +gc.lua +goto.lua +goto +meta diff --git a/testsuite/test/lang/length.lua b/testsuite/test/lang/length.lua new file mode 100644 index 0000000000..67c68ae70a --- /dev/null +++ b/testsuite/test/lang/length.lua @@ -0,0 +1,23 @@ + +do --- length increasing and decreasing in loop + local t = {} + for i=1,100 do t[#t+1] = i end + assert(#t == 100) + for i=1,100 do t[#t] = nil end + assert(#t == 0) +end + +do --- length increasing in loop with existing element + local t = {} + t[90] = 999 + for i=1,100 do t[#t+1] = i end + assert(#t > 100 and t[#t] == 100) +end + +do --- length decreasing in loop with erased element + local t = {} + for i=1,100 do t[i] = i end + t[10] = nil + for i=1,99 do t[#t] = nil end + assert(#t == 0) +end diff --git a/testsuite/test/lang/meta/arith.lua b/testsuite/test/lang/meta/arith.lua new file mode 100644 index 0000000000..17de4c8cde --- /dev/null +++ b/testsuite/test/lang/meta/arith.lua @@ -0,0 +1,118 @@ +local function create(arith, v1, v2) + local meta = { + __add=function(a,b) return arith("add", a, b) end, + __sub=function(a,b) return arith("sub", a, b) end, + __mul=function(a,b) return arith("mul", a, b) end, + __div=function(a,b) return arith("div", a, b) end, + __mod=function(a,b) return arith("mod", a, b) end, + __pow=function(a,b) return arith("pow", a, b) end, + __unm=function(a,b) return arith("unm", a, b) end, + } + return setmetatable({v1}, meta), setmetatable({v2}, meta) +end + +do --- op + local a, b = create(function(op,a,b) return op end) + assert(a+b == "add") + assert(a-b == "sub") + assert(a*b == "mul") + assert(a/b == "div") + assert(a%b == "mod") + assert(a^b == "pow") + assert(-a == "unm") +end + +do --- lhs + local a, b = create(function(op,a,b) return a[1] end, "foo", 42) + assert(a+b == "foo") + assert(a-b == "foo") + assert(a*b == "foo") + assert(a/b == "foo") + assert(a%b == "foo") + assert(a^b == "foo") + assert(-a == "foo") +end + +do --- rhs + local a, b = create(function(op,a,b) return b[1] end, 42, "foo") + assert(a+b == "foo") + assert(a-b == "foo") + assert(a*b == "foo") + assert(a/b == "foo") + assert(a%b == "foo") + assert(a^b == "foo") + assert(-a == 42) +end + +do --- meta only lhs + local a, b = create(function(op,a,b) return a[1]+b end, 39), 3 + assert(a+b == 42) + assert(a-b == 42) + assert(a*b == 42) + assert(a/b == 42) + assert(a%b == 42) + assert(a^b == 42) +end + +do --- meta only rhs + local a, b = 39, create(function(op,a,b) return a+b[1] end, 3) + assert(a+b == 42) + assert(a-b == 42) + assert(a*b == 42) + assert(a/b == 42) + assert(a%b == 42) + assert(a^b == 42) +end + +do --- defaults string, int + local a, b = "39", 3 + assert(a+b == 42) + assert(a-b == 36) + assert(a*b == 117) + assert(a/b == 13) + assert(a%b == 0) + assert(a^b == 59319) + assert(-a == -39) +end + +do --- defaults int, string + local a, b = 39, "3" + assert(a+b == 42) + assert(a-b == 36) + assert(a*b == 117) + assert(a/b == 13) + assert(a%b == 0) + assert(a^b == 59319) + assert(-a == -39) +end + +do --- defaults string, string + local a, b = "39", "3" + assert(a+b == 42) + assert(a-b == 36) + assert(a*b == 117) + assert(a/b == 13) + assert(a%b == 0) + assert(a^b == 59319) + assert(-a == -39) +end + +do --- defaults string, kint + local a = "39" + assert(a+3 == 42) + assert(a-3 == 36) + assert(a*3 == 117) + assert(a/3 == 13) + assert(a%3 == 0) + assert(a^3 == 59319) +end + +do --- defaults kint, string + local b = "3" + assert(39+b == 42) + assert(39-b == 36) + assert(39*b == 117) + assert(39/b == 13) + assert(39%b == 0) + assert(39^b == 59319) +end diff --git a/testsuite/test/lang/meta/arith_jit.lua b/testsuite/test/lang/meta/arith_jit.lua new file mode 100644 index 0000000000..2cb35dbb37 --- /dev/null +++ b/testsuite/test/lang/meta/arith_jit.lua @@ -0,0 +1,68 @@ + +do --- assert rhs + local t = {} + local mt = { + __add = function(a, b) assert(b == t); return a+11 end, + __sub = function(a, b) assert(b == t); return a+12 end, + __mul = function(a, b) assert(b == t); return a+13 end, + __div = function(a, b) assert(b == t); return a+14 end, + __mod = function(a, b) assert(b == t); return a+15 end, + __pow = function(a, b) assert(b == t); return a+16 end, + __unm = function(a, b) assert(a == t and b == t); return 17 end, + } + t = setmetatable(t, mt) + do local x = 0; for i=1,100 do x = x + t end; assert(x == 1100); end + do local x = 0; for i=1,100 do x = x - t end; assert(x == 1200); end + do local x = 0; for i=1,100 do x = x * t end; assert(x == 1300); end + do local x = 0; for i=1,100 do x = x / t end; assert(x == 1400); end + do local x = 0; for i=1,100 do x = x % t end; assert(x == 1500); end + do local x = 0; for i=1,100 do x = x ^ t end; assert(x == 1600); end + do local x = 0; for i=1,100 do x = x + (-t) end; assert(x == 1700); end +end + +do --- assert lhs + local t = {} + local mt = { + __add = function(a, b) assert(a == t); return b+11 end, + __sub = function(a, b) assert(a == t); return b+12 end, + __mul = function(a, b) assert(a == t); return b+13 end, + __div = function(a, b) assert(a == t); return b+14 end, + __mod = function(a, b) assert(a == t); return b+15 end, + __pow = function(a, b) assert(a == t); return b+16 end, + } + t = setmetatable(t, mt) + do local x = 0; for i=1,100 do x = t + x end; assert(x == 1100); end + do local x = 0; for i=1,100 do x = t - x end; assert(x == 1200); end + do local x = 0; for i=1,100 do x = t * x end; assert(x == 1300); end + do local x = 0; for i=1,100 do x = t / x end; assert(x == 1400); end + do local x = 0; for i=1,100 do x = t % x end; assert(x == 1500); end + do local x = 0; for i=1,100 do x = t ^ x end; assert(x == 1600); end +end + +do --- assert both sides + local t = {} + local mt = { + __add = function(a, b) assert(a == t and b == t); return 11 end, + __sub = function(a, b) assert(a == t and b == t); return 12 end, + __mul = function(a, b) assert(a == t and b == t); return 13 end, + __div = function(a, b) assert(a == t and b == t); return 14 end, + __mod = function(a, b) assert(a == t and b == t); return 15 end, + __pow = function(a, b) assert(a == t and b == t); return 16 end, + } + t = setmetatable(t, mt) + do local x = 0; for i=1,100 do x = t + t end; assert(x == 11); end + do local x = 0; for i=1,100 do x = t - t end; assert(x == 12); end + do local x = 0; for i=1,100 do x = t * t end; assert(x == 13); end + do local x = 0; for i=1,100 do x = t / t end; assert(x == 14); end + do local x = 0; for i=1,100 do x = t % t end; assert(x == 15); end + do local x = 0; for i=1,100 do x = t ^ t end; assert(x == 16); end +end + +do --- adjust no result to one result + local t = {} + local mt = { __add = function(a, b) end } + t = setmetatable(t, mt) + local x + for i=1,100 do x = t+t end + assert(x == nil) +end diff --git a/testsuite/test/lang/meta/call.lua b/testsuite/test/lang/meta/call.lua new file mode 100644 index 0000000000..c77c0dd8ff --- /dev/null +++ b/testsuite/test/lang/meta/call.lua @@ -0,0 +1,81 @@ + +local function callmeta(o, a, b) + return o, a, b +end + +local meta = { __call = callmeta } + +do --- table + local t = setmetatable({}, meta) + local o,a,b = t() + assert(o == t and a == nil and b == nil) + local o,a,b = t("foo") + assert(o == t and a == "foo" and b == nil) + local o,a,b = t("foo", "bar") + assert(o == t and a == "foo" and b == "bar") +end + +do --- userdata +lua<5.2 + local u = newproxy(true) + getmetatable(u).__call = callmeta + + local o,a,b = u() + assert(o == u and a == nil and b == nil) + local o,a,b = u("foo") + assert(o == u and a == "foo" and b == nil) + local o,a,b = u("foo", "bar") + assert(o == u and a == "foo" and b == "bar") +end + +do --- number + debug.setmetatable(0, meta) + local o,a,b = (42)() + assert(o == 42 and a == nil and b == nil) + local o,a,b = (42)("foo") + assert(o == 42 and a == "foo" and b == nil) + local o,a,b = (42)("foo", "bar") + assert(o == 42 and a == "foo" and b == "bar") + debug.setmetatable(0, nil) +end + +do --- table with changing metamethod + local tc = setmetatable({}, { __call = function(o,a,b) return o end}) + local ta = setmetatable({}, { __add = tc}) + local o,a = ta + ta + assert(o == tc and a == nil) + + getmetatable(tc).__call = function(o,a,b) return a end + local o,a = ta + ta + assert(o == ta and a == nil) +end + +do --- jit table + local t = setmetatable({}, { __call = function(t, a) return 100-a end }) + for i=1,100 do assert(t(i) == 100-i) end +end + +do --- jit table rawget as metamethod + local t = setmetatable({}, { __call = rawget }) + for i=1,100 do t[i] = 100-i end + for i=1,100 do assert(t(i) == 100-i) end +end + +do --- jit number + debug.setmetatable(0, { __call = function(n) return 100-n end }) + for i=1,100 do assert((i)() == 100-i) end + debug.setmetatable(0, nil) +end + +do --- jit newindex pcall + local t = setmetatable({}, { __newindex = pcall, __call = rawset }) + for i=1,100 do t[i] = 100-i end + for i=1,100 do assert(t[i] == 100-i) end +end + +do --- jit index pcall + local t = setmetatable({}, { + __index = pcall, __newindex = rawset, + __call = function(t, i) t[i] = 100-i end, + }) + for i=1,100 do assert(t[i] == true and rawget(t, i) == 100-i) end +end diff --git a/testsuite/test/lang/meta/cat.lua b/testsuite/test/lang/meta/cat.lua new file mode 100644 index 0000000000..48a89e4d68 --- /dev/null +++ b/testsuite/test/lang/meta/cat.lua @@ -0,0 +1,61 @@ +local function create(cat, v1, v2) + local meta = { __concat = cat } + return setmetatable({v1}, meta), setmetatable({v2}, meta) +end + +do --- default + local a, b, c = "foo", "bar", "baz" + assert(a..b == "foobar") + assert(a..b..c == "foobarbaz") +end + +do --- lhs + local a, b = create(function(a, b) return a end) + assert(a..b == a) + assert(b..a == b) + assert(a..b..b == a) + assert(a..a..b == a) + assert(a..b..a == a) + assert(a..b..b..b..b..b..b..b == a) +end + +do --- rhs + local a, b = create(function(a, b) return b end) + assert(a..b == b) + assert(b..a == a) + assert(a..b..b == b) + assert(a..a..b == b) + assert(b..b..a == a) + assert(a..a..a..a..a..a..a..b == b) +end + +do --- mixed types + local a, b = create(function(a, b) + return (type(a) == "string" and a or a[1]).. + (type(b) == "string" and b or b[1]) + end, "a", "b") + assert(a..b == "ab") + assert(a..b == "ab") + assert(a..b..b == "abb") + assert(a..b..a == "aba") + assert(a..a..a..a..a..a..a..b == "aaaaaaab") + assert(a..a..a.."x".."x"..a..a..b == "aaaxxaab") + assert("x"..a..a..a..a..a..a..b == "xaaaaaab") + assert(a..b..a..b..a.."x".."x".."x" == "ababaxxx") +end + +do --- jit mixed types + local a, b = create(function(a, b) + if a ~= b then local x = gg end + return (type(a) == "string" and a or a[1]).. + (type(b) == "string" and b or b[1]) + end, "a", "b") + local y + for i=1,100 do y = a..b end + assert(y == "ab") + for i=1,100 do y = a..b.."x" end + assert(y == "abx") + for i=1,100 do y = a..b.. 1 .. "z" end + assert(y == "ab1z") +end + diff --git a/testsuite/test/lang/meta/comp.lua b/testsuite/test/lang/meta/comp.lua new file mode 100644 index 0000000000..23f18b08ec --- /dev/null +++ b/testsuite/test/lang/meta/comp.lua @@ -0,0 +1,120 @@ + +local function create(comp, v1, v2) + local meta = { + __lt=function(a,b) return comp("lt", a, b) end, + __le=function(a,b) return comp("le", a, b) end, + } + return setmetatable({v1}, meta), setmetatable({v2}, meta) +end + +do --- __lt and __le xop + local xop + local a, b = create(function(op,a,b) xop = op; return "" end) + assert(ab == true and xop == "lt"); xop = nil + assert(a<=b == true and xop == "le"); xop = nil + assert(a>=b == true and xop == "le"); xop = nil + + assert(not (ab) == false and xop == "lt"); xop = nil + assert(not (a<=b) == false and xop == "le"); xop = nil + assert(not (a>=b) == false and xop == "le"); xop = nil + + -- __le metamethod is optional and substituted with arg+res inverted __lt. + local f = getmetatable(a).__le + getmetatable(a).__le = nil + assert(ab == true and xop == "lt"); xop = nil + assert(a<=b == false and xop == "lt"); xop = nil + assert(a>=b == false and xop == "lt"); xop = nil + + assert(not (ab) == false and xop == "lt"); xop = nil + assert(not (a<=b) == true and xop == "lt"); xop = nil + assert(not (a>=b) == true and xop == "lt"); xop = nil + getmetatable(a).__le = f + + -- Different metatable, but same metamethod works, too. + setmetatable(b, { __lt = getmetatable(b).__lt, __le = getmetatable(b).__le }) + assert(ab == true and xop == "lt"); xop = nil + assert(a<=b == true and xop == "le"); xop = nil + assert(a>=b == true and xop == "le"); xop = nil + + assert(not (ab) == false and xop == "lt"); xop = nil + assert(not (a<=b) == false and xop == "le"); xop = nil + assert(not (a>=b) == false and xop == "le"); xop = nil +end + +do --- __lt and __le values + local a, b = create(function(op,a,b) + if op == "lt" then return a[1]b == false) + assert(a<=b == true) + assert(a>=b == false) + + assert(not (ab) == true) + assert(not (a<=b) == false) + assert(not (a>=b) == true) + + b[1] = 1 + assert(ab == false) + assert(a<=b == true) + assert(a>=b == true) + + assert(not (ab) == true) + assert(not (a<=b) == false) + assert(not (a>=b) == false) + + a[1] = 2 + assert(ab == true) + assert(a<=b == false) + assert(a>=b == true) + + assert(not (ab) == false) + assert(not (a<=b) == true) + assert(not (a>=b) == false) + + -- __le metamethod is optional and substituted with arg+res inverted __lt. + getmetatable(a).__le = nil + a[1] = 1 + b[1] = 2 + assert(ab == false) + assert(a<=b == true) + assert(a>=b == false) + + assert(not (ab) == true) + assert(not (a<=b) == false) + assert(not (a>=b) == true) + + b[1] = 1 + assert(ab == false) + assert(a<=b == true) + assert(a>=b == true) + + assert(not (ab) == true) + assert(not (a<=b) == false) + assert(not (a>=b) == false) + + a[1] = 2 + assert(ab == true) + assert(a<=b == false) + assert(a>=b == true) + + assert(not (ab) == false) + assert(not (a<=b) == true) + assert(not (a>=b) == false) +end diff --git a/testsuite/test/lang/meta/comp_jit.lua b/testsuite/test/lang/meta/comp_jit.lua new file mode 100644 index 0000000000..d0a19d80a2 --- /dev/null +++ b/testsuite/test/lang/meta/comp_jit.lua @@ -0,0 +1,104 @@ +do --- coverage + local lt, le = false, false + local t, u = {}, {} + local x, ax, bx + local function ck(xx, a, b) + if x ~= xx then error("bad x", 2) end + if ax ~= a then error("bad ax", 2) end + if bx ~= b then error("bad bx", 2) end + end + local mt = { + __lt = function(a, b) ax=a; bx=b; return lt end, + __le = function(a, b) ax=a; bx=b; return le end, + } + t = setmetatable(t, mt) + u = setmetatable(u, mt) + lt, le = false, false + x = 0; for i=1,100 do x = t < u and 2 or 1 end ck(1, t, u) + x = 0; for i=1,100 do x = t <= u and 2 or 1 end ck(1, t, u) + x = 0; for i=1,100 do x = t > u and 2 or 1 end ck(1, u, t) + x = 0; for i=1,100 do x = t >= u and 2 or 1 end ck(1, u, t) + x = 0; for i=1,100 do x = not (t < u) and 2 or 1 end ck(2, t, u) + x = 0; for i=1,100 do x = not (t <= u) and 2 or 1 end ck(2, t, u) + x = 0; for i=1,100 do x = not (t > u) and 2 or 1 end ck(2, u, t) + x = 0; for i=1,100 do x = not (t >= u) and 2 or 1 end ck(2, u, t) + lt, le = false, true + x = 0; for i=1,100 do x = t < u and 2 or 1 end ck(1, t, u) + x = 0; for i=1,100 do x = t <= u and 2 or 1 end ck(2, t, u) + x = 0; for i=1,100 do x = t > u and 2 or 1 end ck(1, u, t) + x = 0; for i=1,100 do x = t >= u and 2 or 1 end ck(2, u, t) + x = 0; for i=1,100 do x = not (t < u) and 2 or 1 end ck(2, t, u) + x = 0; for i=1,100 do x = not (t <= u) and 2 or 1 end ck(1, t, u) + x = 0; for i=1,100 do x = not (t > u) and 2 or 1 end ck(2, u, t) + x = 0; for i=1,100 do x = not (t >= u) and 2 or 1 end ck(1, u, t) + lt, le = true, false + x = 0; for i=1,100 do x = t < u and 2 or 1 end ck(2, t, u) + x = 0; for i=1,100 do x = t <= u and 2 or 1 end ck(1, t, u) + x = 0; for i=1,100 do x = t > u and 2 or 1 end ck(2, u, t) + x = 0; for i=1,100 do x = t >= u and 2 or 1 end ck(1, u, t) + x = 0; for i=1,100 do x = not (t < u) and 2 or 1 end ck(1, t, u) + x = 0; for i=1,100 do x = not (t <= u) and 2 or 1 end ck(2, t, u) + x = 0; for i=1,100 do x = not (t > u) and 2 or 1 end ck(1, u, t) + x = 0; for i=1,100 do x = not (t >= u) and 2 or 1 end ck(2, u, t) + lt, le = true, true + x = 0; for i=1,100 do x = t < u and 2 or 1 end ck(2, t, u) + x = 0; for i=1,100 do x = t <= u and 2 or 1 end ck(2, t, u) + x = 0; for i=1,100 do x = t > u and 2 or 1 end ck(2, u, t) + x = 0; for i=1,100 do x = t >= u and 2 or 1 end ck(2, u, t) + x = 0; for i=1,100 do x = not (t < u) and 2 or 1 end ck(1, t, u) + x = 0; for i=1,100 do x = not (t <= u) and 2 or 1 end ck(1, t, u) + x = 0; for i=1,100 do x = not (t > u) and 2 or 1 end ck(1, u, t) + x = 0; for i=1,100 do x = not (t >= u) and 2 or 1 end ck(1, u, t) + mt.__le = nil + lt = false + x = 0; for i=1,100 do x = t < u and 2 or 1 end ck(1, t, u) + x = 0; for i=1,100 do x = t <= u and 2 or 1 end ck(2, u, t) + x = 0; for i=1,100 do x = t > u and 2 or 1 end ck(1, u, t) + x = 0; for i=1,100 do x = t >= u and 2 or 1 end ck(2, t, u) + x = 0; for i=1,100 do x = not (t < u) and 2 or 1 end ck(2, t, u) + x = 0; for i=1,100 do x = not (t <= u) and 2 or 1 end ck(1, u, t) + x = 0; for i=1,100 do x = not (t > u) and 2 or 1 end ck(2, u, t) + x = 0; for i=1,100 do x = not (t >= u) and 2 or 1 end ck(1, t, u) + lt = true + x = 0; for i=1,100 do x = t < u and 2 or 1 end ck(2, t, u) + x = 0; for i=1,100 do x = t <= u and 2 or 1 end ck(1, u, t) + x = 0; for i=1,100 do x = t > u and 2 or 1 end ck(2, u, t) + x = 0; for i=1,100 do x = t >= u and 2 or 1 end ck(1, t, u) + x = 0; for i=1,100 do x = not (t < u) and 2 or 1 end ck(1, t, u) + x = 0; for i=1,100 do x = not (t <= u) and 2 or 1 end ck(2, u, t) + x = 0; for i=1,100 do x = not (t > u) and 2 or 1 end ck(1, u, t) + x = 0; for i=1,100 do x = not (t >= u) and 2 or 1 end ck(2, t, u) +end + +do --- Mixed metamethods for ordered comparisons. + local mt1 = { __lt = function(a, b) return a[1] < b[1] end } + local mt2 = { __lt = function(a, b) return a[1] < b[1] end } + local t1 = setmetatable({1}, mt1) + local t2 = setmetatable({2}, mt2) + do + local x + for i=1,100 do x = t1 <= t1 end + assert(x == true) + end + local ok, ret = pcall(function() + local x + for i=1,100 do x = t1 < t2 end + return x + end) + if table.pack then + assert(ok and ret == true) + else + assert(not ok) + end + local ok, ret = pcall(function() + local x + for i=1,100 do x = t1 <= t2 end + return x + end) + if table.pack then + assert(ok and ret == true) + else + assert(not ok) + end +end + diff --git a/testsuite/test/lang/meta/debuginfo.lua b/testsuite/test/lang/meta/debuginfo.lua new file mode 100644 index 0000000000..a99941fa26 --- /dev/null +++ b/testsuite/test/lang/meta/debuginfo.lua @@ -0,0 +1,81 @@ + +local what + +local function mm(a, b) + local dbg = debug.getinfo(1) + what = dbg.namewhat == "metamethod" and dbg.name or + dbg.namewhat.." "..(dbg.name or "?") +end + +local function ck(s) + assert(what == s, "bad debug info for metamethod "..s) +end + +local mt = { + __index = mm, + __newindex = mm, + __eq = mm, + __add = mm, + __sub = mm, + __mul = mm, + __div = mm, + __mod = mm, + __pow = mm, + __unm = mm, + __len = mm, + __lt = mm, + __le = mm, + __concat = mm, + __call = mm, +} + +do --- table metamethods +goto + local t = setmetatable({}, mt) + local t2 = setmetatable({}, mt) + + local x = t.x; ck("__index") + t.x = 1; ck("__newindex") + local x = t + t; ck("__add") + local x = t - t; ck("__sub") + local x = t * t; ck("__mul") + local x = t / t; ck("__div") + local x = t % t; ck("__mod") + local x = t ^ t; ck("__pow") + local x = -t; ck("__unm") + local x = t..t; ck("__concat") + local x = t(); ck("local t") + + local x = t == t2; ck("__eq") + local x = t ~= t2; ck("__eq") + local x = t < t2; ck("__lt") + local x = t > t2; ck("__lt") + local x = t <= t2; ck("__le") + local x = t >= t2; ck("__le") +end + +do --- userdata metamethods +luajit + local u = newproxy() + local u2 = newproxy() + debug.setmetatable(u, mt) + debug.setmetatable(u2, mt) + + local x = u.x; ck("__index") + u.x = 1; ck("__newindex") + local x = u + u; ck("__add") + local x = u - u; ck("__sub") + local x = u * u; ck("__mul") + local x = u / u; ck("__div") + local x = u % u; ck("__mod") + local x = u ^ u; ck("__pow") + local x = -u; ck("__unm") + local x = #u; ck("__len") + local x = u..u; ck("__concat") + local x = u(); ck("local u") + + local x = u == u2; ck("__eq") + local x = u ~= u2; ck("__eq") + local x = u < u2; ck("__lt") + local x = u > u2; ck("__lt") + local x = u <= u2; ck("__le") + local x = u >= u2; ck("__le") +end diff --git a/testsuite/test/lang/meta/eq.lua b/testsuite/test/lang/meta/eq.lua new file mode 100644 index 0000000000..ebf604357c --- /dev/null +++ b/testsuite/test/lang/meta/eq.lua @@ -0,0 +1,30 @@ +local function create(equal, v1, v2) + local meta = { __eq = equal } + return setmetatable({v1}, meta), setmetatable({v2}, meta) +end + +do --- __eq xop + local xop + local a, b = create(function(a,b) xop = "eq" return "" end) + assert(a==b == true and xop == "eq"); xop = nil + assert(a~=b == false and xop == "eq"); xop = nil + + -- Different metatable, but same metamethod works, too. + setmetatable(b, { __eq = getmetatable(b).__eq }) + assert(a==b == true and xop == "eq"); xop = nil + assert(a~=b == false and xop == "eq"); xop = nil +end + +do --- __eq values + local a, b = create(function(a,b) return a[1] == b[1] end, 1, 2) + assert(a==b == false) + assert(a~=b == true) + + b[1] = 1 + assert(a==b == true) + assert(a~=b == false) + + a[1] = 2 + assert(a==b == false) + assert(a~=b == true) +end diff --git a/testsuite/test/lang/meta/eq_jit.lua b/testsuite/test/lang/meta/eq_jit.lua new file mode 100644 index 0000000000..47e14207cf --- /dev/null +++ b/testsuite/test/lang/meta/eq_jit.lua @@ -0,0 +1,35 @@ + +do --- coverage + local eq = false + local t, u = {}, {} + local x, ax, bx + local function ck(xx, a, b) + if x ~= xx then error("bad x", 2) end + if ax ~= a then error("bad ax", 2) end + if bx ~= b then error("bad bx", 2) end + end + local mt = { + __eq = function(a, b) ax=a; bx=b; return eq end, + } + t = setmetatable(t, mt) + u = setmetatable(u, mt) + eq = false + x = 0; for i=1,100 do x = t == u and 2 or 1 end ck(1, t, u) + x = 0; for i=1,100 do x = t ~= u and 2 or 1 end ck(2, t, u) + x = 0; for i=1,100 do x = not (t == u) and 2 or 1 end ck(2, t, u) + x = 0; for i=1,100 do x = not (t ~= u) and 2 or 1 end ck(1, t, u) + eq = true + x = 0; for i=1,100 do x = t == u and 2 or 1 end ck(2, t, u) + x = 0; for i=1,100 do x = t ~= u and 2 or 1 end ck(1, t, u) + x = 0; for i=1,100 do x = not (t == u) and 2 or 1 end ck(1, t, u) + x = 0; for i=1,100 do x = not (t ~= u) and 2 or 1 end ck(2, t, u) +end + +do --- non-constant objects +bit + local bit = require("bit") + local mt = { __eq = function(a, b) return true end } + local tt = { [0] = setmetatable({}, mt), setmetatable({}, mt) } + for i=0,100 do + assert(tt[0] == tt[bit.band(i, 1)]) + end +end diff --git a/testsuite/test/lang/meta/framegap.lua b/testsuite/test/lang/meta/framegap.lua new file mode 100644 index 0000000000..0080633a24 --- /dev/null +++ b/testsuite/test/lang/meta/framegap.lua @@ -0,0 +1,24 @@ +do --- untitled + local t = setmetatable({}, { __add = function(a, b) + if b > 200 then + for j=1,10 do end + return b+3 + elseif b > 100 then + return b+2 + else + return b+1 + end + end }) + + local function f(t, i) + do return t+i end + -- Force large frame with unassigned slots below mm. + do local a,b,c,d,e,f,g,h,i,j,k end + end + + local x = 0 + for i=1,300 do + x = f(t, i) + end + assert(x == 303) +end diff --git a/testsuite/test/lang/meta/index b/testsuite/test/lang/meta/index new file mode 100644 index 0000000000..f114e78d04 --- /dev/null +++ b/testsuite/test/lang/meta/index @@ -0,0 +1,14 @@ +arith.lua +arith_jit.lua +call.lua +cat.lua +comp.lua +comp_jit.lua +eq.lua +eq_jit.lua +framegap.lua +index.lua +len.lua +newindex.lua +nomm.lua +debuginfo.lua diff --git a/testsuite/test/lang/meta/index.lua b/testsuite/test/lang/meta/index.lua new file mode 100644 index 0000000000..4d6d0ffee3 --- /dev/null +++ b/testsuite/test/lang/meta/index.lua @@ -0,0 +1,60 @@ +do --- table 1 + local t=setmetatable({}, {__index=function(t,k) + return 100-k + end}) + + for i=1,100 do assert(t[i] == 100-i) end + + for i=1,100 do t[i] = i end + for i=1,100 do assert(t[i] == i) end + + for i=1,100 do t[i] = nil end + for i=1,100 do assert(t[i] == 100-i) end +end + +do --- table 2 + local x + local t2=setmetatable({}, {__index=function(t,k) + x = k + end}) + + assert(t2[1] == nil) + assert(x == 1) + + assert(t2.foo == nil) + assert(x == "foo") +end + +do --- userdata +lua<5.2 + local u = newproxy(true) + getmetatable(u).__index = { foo = u, bar = 42 } + + local x = 0 + for i=1,100 do + x = x + u.bar + u = u.foo + end + assert(x == 4200) + + x = 0 + for i=1,100 do + u = u.foo + x = x + u.bar + end + assert(x == 4200) +end + +do --- string + local s = "foo" + local mt = debug.getmetatable(s) + debug.setmetatable(s, {__index = {s = s, len = string.len}}) + local x = 0 + local t = {} + for i=1,100 do + x = x + s:len() + s = s.s + t[s] = t -- Hash store with same type prevents hoisting + end + debug.setmetatable(s, mt) + assert(x == 300) +end diff --git a/testsuite/test/lang/meta/len.lua b/testsuite/test/lang/meta/len.lua new file mode 100644 index 0000000000..2410daa617 --- /dev/null +++ b/testsuite/test/lang/meta/len.lua @@ -0,0 +1,42 @@ +local compat52 = table.pack +local mt = { __len = function(o, o2) + if compat52 then + assert(o2 == o) + else + assert(o2 == nil) + end + return 42 +end } + +do --- table + local t = {1,2,3} + assert(#t == 3) + assert(#"abcdef" == 6) + + setmetatable(t, { __foo = function() end }) + assert(#t == 3) + assert(#t == 3) + + setmetatable(t, mt) + if compat52 then + assert(#t == 42) -- __len DOES work on tables. + assert(rawlen(t) == 3) + else + assert(#t == 3) -- __len does NOT work on tables. + end +end + +do --- userdata +lua<5.2 + local u = newproxy(true) + getmetatable(u).__len = function(o) return 42 end + assert(#u == 42) + local x = 0 + for i=1,100 do x = x + #u end + assert(x == 4200) +end + +do --- number + debug.setmetatable(0, mt) + assert(#1 == 42) + debug.setmetatable(0, nil) +end diff --git a/testsuite/test/lang/meta/newindex.lua b/testsuite/test/lang/meta/newindex.lua new file mode 100644 index 0000000000..6c46b8cbdc --- /dev/null +++ b/testsuite/test/lang/meta/newindex.lua @@ -0,0 +1,69 @@ +do --- table 1 + local t=setmetatable({}, {__newindex=function(t,k,v) + rawset(t, k, 100-v) + end}) + + for i=1,100 do t[i] = i end + for i=1,100 do assert(t[i] == 100-i) end + + for i=1,100 do t[i] = i end + for i=1,100 do assert(t[i] == i) end + + for i=1,100 do t[i] = nil end + for i=1,100 do t[i] = i end + for i=1,100 do assert(t[i] == 100-i) end +end + +do --- jit gaining href + local count = 0 + local t = setmetatable({ foo = nil }, + { __newindex=function() count = count + 1 end }) + for j=1,2 do + for i=1,100 do t.foo = 1 end + rawset(t, "foo", 1) + end + assert(count == 100) +end + +do --- jit gaining aref + local count = 0 + local t = setmetatable({ nil }, + { __newindex=function() count = count + 1 end }) + for j=1,2 do + for i=1,100 do t[1] = 1 end + rawset(t, 1, 1) + end + assert(count == 100) +end + +do --- resize + local grandparent = {} + grandparent.__newindex = function(s,_,_) tostring(s) end + + local parent = {} + parent.__newindex = parent + parent.bar = 1 + setmetatable(parent, grandparent) + + local child = setmetatable({}, parent) + child.foo = _ +end + +do --- str + local t=setmetatable({}, {__newindex=function(t,k,v) + assert(v == "foo"..k) + rawset(t, k, "bar"..k) + end}) + + for i=1,100 do t[i]="foo"..i end + for i=1,100 do assert(t[i] == "bar"..i) end + + for i=1,100 do t[i]="baz"..i end + for i=1,100 do assert(t[i] == "baz"..i) end + + local t=setmetatable({foo=1,bar=1,baz=1},{}) + t.baz=nil + t.baz=2 + t.baz=nil + t.baz=2 +end diff --git a/testsuite/test/lang/meta/nomm.lua b/testsuite/test/lang/meta/nomm.lua new file mode 100644 index 0000000000..2b3db86fd1 --- /dev/null +++ b/testsuite/test/lang/meta/nomm.lua @@ -0,0 +1,21 @@ + +do --- untitled + local keys = {} + for i=1,100 do keys[i] = "foo" end + keys[95] = "__index" + local function fidx(t, k) return 12345 end + local mt = { foo = 1, __index = "" } + local t = setmetatable({ 1 }, mt) + t[1] = nil + mt.__index = nil + local x = nil + for i=1,100 do + mt[keys[i]] = fidx + if t[1] then + if not x then x = i end + assert(t[1] == 12345) + end + end + assert(x == 95) +end + diff --git a/testsuite/test/lang/modulo.lua b/testsuite/test/lang/modulo.lua new file mode 100644 index 0000000000..eddaea77fa --- /dev/null +++ b/testsuite/test/lang/modulo.lua @@ -0,0 +1,46 @@ +local assert, floor = assert, math.floor + +do --- integer equivalence + for x=-5,5 do + for y=-5,5 do + if y ~= 0 then + assert(x%y == x-floor(x/y)*y) + end + end + end +end + +do --- fractional equivalence + for x=-5,5,0.25 do + for y=-5,5,0.25 do + if y ~= 0 then + assert(x%y == x-floor(x/y)*y) + end + end + end +end + +do --- jit constant RHS + local y = 0 + for x=-100,123 do + y = y + x%17 + end + assert(y == 1777) +end + +do --- jit constant LHS, with exit + local y = 0 + for x=-100,123 do + if x ~= 0 then + y = y + 85%x + end + end + assert(y == 2059) +end + +do --- divide by zero + local x = 1%0 + assert(x ~= x) + x = floor(0/0) + assert(x ~= x) +end diff --git a/testsuite/test/lang/self.lua b/testsuite/test/lang/self.lua new file mode 100644 index 0000000000..d37466642d --- /dev/null +++ b/testsuite/test/lang/self.lua @@ -0,0 +1,19 @@ +do --- trivial setget + local t = {} + + function t:set(x) + self.a=x + end + + function t:get() + return self.a + end + + t:set("foo") + assert(t:get() == "foo") + assert(t.a == "foo") + + t:set(42) + assert(t:get() == 42) + assert(t.a == 42) +end diff --git a/testsuite/test/lang/table.lua b/testsuite/test/lang/table.lua new file mode 100644 index 0000000000..3ff38cfe78 --- /dev/null +++ b/testsuite/test/lang/table.lua @@ -0,0 +1,32 @@ +do --- tables as keys in tables + local fwd, bck = {}, {} + for i = 1,100 do + local v = {} + fwd[i] = v + bck[v] = i + end + for i = 1,100 do + local v = fwd[i] + assert(type(v) == "table") + assert(bck[v] == i) + end +end + +do --- some tables as keys in tables + local fwd, bck = {}, {} + for i = 1,100 do + local v = {} + fwd[i] = v + if i > 90 then + bck[v] = i + end + end + local n = 0 + for i = 1, 100 do + local v = fwd[i] + if bck[v] then + n = n + 1 + end + end + assert(n == 10) +end diff --git a/testsuite/test/lang/tail_recursion.lua b/testsuite/test/lang/tail_recursion.lua new file mode 100644 index 0000000000..78f071fd22 --- /dev/null +++ b/testsuite/test/lang/tail_recursion.lua @@ -0,0 +1,20 @@ +do --- self + local tr1 + function tr1(n) + if n <= 0 then return 0 end + return tr1(n-1) + end + assert(tr1(200) == 0) +end + +do --- mutual + local tr1, tr2 + function tr1(n) + if n <= 0 then return 0 end + return tr2(n-1) + end + function tr2(n) + return tr1(n) + end + assert(tr2(200) == 0) +end diff --git a/testsuite/test/lang/upvalue/closure.lua b/testsuite/test/lang/upvalue/closure.lua new file mode 100644 index 0000000000..faa4de1c3c --- /dev/null +++ b/testsuite/test/lang/upvalue/closure.lua @@ -0,0 +1,84 @@ +do --- for + local z1, z2 + for i=1,10 do + local function f() return i end + if z1 then z2 = f else z1 = f end + end + assert(z1() == 1) + assert(z2() == 10) +end + +do --- while + local z1, z2 + local i = 1 + while i <= 10 do + local j = i + local function f() return j end + if z1 then z2 = f else z1 = f end + i = i + 1 + end + assert(z1() == 1) + assert(z2() == 10) +end + +do --- repeat + local z1, z2 + local i = 1 + repeat + local j = i + local function f() return j end + if z1 then z2 = f else z1 = f end + i = i + 1 + until i > 10 + assert(z1() == 1) + assert(z2() == 10) +end + +do --- func + local function ff(x) + return function() return x end + end + local z1, z2 + for i=1,10 do + local f = ff(i) + if z1 then z2 = f else z1 = f end + end + assert(z1() == 1) + assert(z2() == 10) +end + +do --- recursive type change + local function f1(a) + if a > 0 then + local b = f1(a - 1) + return function() + if type(b) == "function" then + return a + b() + end + return a + b + end + end + return a + end + + local function f2(a) + return f1(a)() + end + + for i = 1, 41 do + local r = f2(4) + f2(4) + assert(r == 20) + end +end + +do --- Don't mark upvalue as immutable if written to after prototype definition + local x = 1 + local function f() + local y = 0 + for i=1,100 do y=y+x end + return y + end + assert(f() == 100) + x = 2 + assert(f() == 200) +end diff --git a/testsuite/test/lang/upvalue/index b/testsuite/test/lang/upvalue/index new file mode 100644 index 0000000000..3c170db922 --- /dev/null +++ b/testsuite/test/lang/upvalue/index @@ -0,0 +1 @@ +closure.lua diff --git a/testsuite/test/lang/vararg_jit.lua b/testsuite/test/lang/vararg_jit.lua new file mode 100644 index 0000000000..4e78f96b5d --- /dev/null +++ b/testsuite/test/lang/vararg_jit.lua @@ -0,0 +1,95 @@ + +do --- 1 + local function f(a, b, c, ...) + assert(c == nil) + assert(a == 100-b) + return 100-a, 100-b + end + for i=1,100 do + local x, y = f(i, 100-i) + assert(x == 100-i) + assert(y == i) + end +end + +do --- 2 + local function f(a, b, ...) + if a > b then return b end + return a + end + local x = 0 + for i=1,200 do + x = x + f(i, 100, 99, 88, 77) + end + assert(x == 15050) +end + +do --- 3 + local function f(a, b, ...) + local c, d = ... + if c > d then return d end + return c + end + local x = 0 + for i=1,200 do + x = x + f(77, 88, i, 100) + end + assert(x == 15050) +end + +do --- 4 + local function f(a, b, ...) + if a > b then end + return ... + end + local x = 0 + for i=1,200 do + x = x + f(i, 100, i, 100) + assert(f(i, 100) == nil) + assert(f(i, 100, 2) == 2) + end + assert(x == 20100) +end + +do --- 5 + local function f(a, ...) + local x, y = 0, 0 + for i=1,100 do + local b, c = ... + x = x + b + y = y + c + end + assert(x == 200 and y == 300) + end + f(1, 2, 3) +end + +do --- 6 + local function f(a, ...) + local t = {[0]=9, 9} + local v, w, x, y = 0, 0, 0, 0 + for i=1,100 do + v, w = ... + t[0] = 9; t[1] = 9; + x, y = ... + end + assert(v == 2 and w == 3 and x == 2 and y == 3) + end + f(1, 2, 3) +end + +do --- 7 + local function f(a, b, ...) + for i=1,100 do + local c, d = ... + assert(a == c); + assert(b == d); + end + end + f(2, 3, 2, 3) + f(2, nil, 2) + f(nil, nil) + f(nil) + f() +end + diff --git a/testsuite/test/lib/base/assert.lua b/testsuite/test/lib/base/assert.lua new file mode 100644 index 0000000000..9c30ba029c --- /dev/null +++ b/testsuite/test/lib/base/assert.lua @@ -0,0 +1,33 @@ +do --- pass through one + assert(assert(true) == true) + assert(assert(3) == 3) + assert(assert(1.5) == 1.5) + assert(assert("x") == "x") + local f = function() end + assert(assert(f) == f) + local t = {} + assert(assert(t) == t) +end + +do --- pass through many + local b, c = assert("b", "c") + assert(b == "b") + assert(c == "c") + local d, e, f, g = assert("d", 5, true, false) + assert(d == "d") + assert(e == 5) + assert(f == true) + assert(g == false) +end + +do --- raise on nil + local ok, err = pcall(assert, nil) + assert(ok == false) + assert(err == "assertion failed!") +end + +do --- raise on false + local ok, err = pcall(assert, false, "msg") + assert(ok == false) + assert(err == "msg") +end diff --git a/testsuite/test/lib/base/error.lua b/testsuite/test/lib/base/error.lua new file mode 100644 index 0000000000..9193085423 --- /dev/null +++ b/testsuite/test/lib/base/error.lua @@ -0,0 +1,43 @@ +do --- no message + local ok, msg = pcall(error) + assert(ok == false) + assert(msg == nil) +end + +do --- level 0 + local ok, msg = pcall(error, "emsg", 0) + assert(ok == false) + assert(msg == "emsg") +end + +do --- default level + local ok, msg = pcall(error, "emsg") + assert(ok == false) + assert(msg == "emsg") +end + +do --- default level in xpcall + local line + local ok, msg = xpcall(function() + local x + line = debug.getinfo(1, "l").currentline; error("emsg") + end, function(m) + assert(debug.getlocal(3, 1) == "x") + return m .."xp" + end) + assert(ok == false) + assert(msg:find("^.-:".. line ..": emsgxp$")) +end + +do --- level 2 in xpcall + local line + local ok, msg = xpcall(function() + local function f() error("emsg", 2) end + line = debug.getinfo(1, "l").currentline; f() + end, function(m) + assert(debug.getlocal(4, 1) == "f") + return m .."xp2" + end) + assert(ok == false) + assert(msg:find("^.-:".. line ..": emsgxp2$")) +end diff --git a/testsuite/test/lib/base/getfenv.lua b/testsuite/test/lib/base/getfenv.lua new file mode 100644 index 0000000000..9c00ed7cd1 --- /dev/null +++ b/testsuite/test/lib/base/getfenv.lua @@ -0,0 +1,13 @@ +do --- untitled + local x + local function f() + x = getfenv(0) + end + local co = coroutine.create(f) + local t = {} + debug.setfenv(co, t) + for i=1,50 do f() f() f() end + assert(x == getfenv(0)) + coroutine.resume(co) + assert(x == t) +end diff --git a/testsuite/test/lib/base/getsetmetatable.lua b/testsuite/test/lib/base/getsetmetatable.lua new file mode 100644 index 0000000000..7d57343ec5 --- /dev/null +++ b/testsuite/test/lib/base/getsetmetatable.lua @@ -0,0 +1,33 @@ + +do --- get __metatable + local t = setmetatable({}, { __metatable = "foo" }) + for i=1,100 do assert(getmetatable(t) == "foo") end +end + +do --- jit smoke + local mt = {} + local t = setmetatable({}, mt) + for i=1,100 do assert(getmetatable(t) == mt) end + for i=1,100 do assert(setmetatable(t, mt) == t) end +end + +do --- jit assorted + local mt = {} + local t = {} + for i=1,200 do t[i] = setmetatable({}, mt) end + t[150] = setmetatable({}, { __metatable = "foo" }) + for i=1,200 do + if not pcall(setmetatable, t[i], mt) then assert(i == 150) end + end + for i=1,200 do assert(getmetatable(t[i]) == mt or i == 150) end + for i=1,200 do + if not pcall(setmetatable, t[i], nil) then assert(i == 150) end + end + for i=1,200 do assert(getmetatable(t[i]) == nil or i == 150) end +end + +do --- jit get primitive metatable + local x = true + for i=1,100 do x = getmetatable(i) end + assert(x == nil) +end diff --git a/testsuite/test/lib/base/index b/testsuite/test/lib/base/index new file mode 100644 index 0000000000..942c53c0f2 --- /dev/null +++ b/testsuite/test/lib/base/index @@ -0,0 +1,11 @@ +assert.lua +error.lua +getfenv.lua +lua<5.2 +getsetmetatable.lua +ipairs.lua +next.lua +pairs.lua +pcall_jit.lua +select.lua +tonumber_tostring.lua +xpcall_jit.lua +compat5.2 diff --git a/testsuite/test/lib/base/ipairs.lua b/testsuite/test/lib/base/ipairs.lua new file mode 100644 index 0000000000..a9de087e26 --- /dev/null +++ b/testsuite/test/lib/base/ipairs.lua @@ -0,0 +1,41 @@ +do --- small integer values + local t = { 4,5,6,7,8,9,10 } + local n = 0 + for i,v in ipairs(t) do + assert(v == i+3) + n = n + 1 + end + assert(n == 7) +end + +do --- jit key=value + local t = {} + for i=1,100 do t[i]=i end + local n = 0 + for i,v in ipairs(t) do + assert(i == v) + n = n + 1 + end + assert(n == 100) +end + +do --- untitled + local t = {} + local o = {{}, {}} + for i=1,100 do + local c = i.."" + t[i] = c + o[1][c] = i + o[2][c] = i + end + o[1]["90"] = nil + + local n = 0 + for _, c in ipairs(t) do + for i = 1, 2 do + o[i][c] = o[i][c] or 1 + n = n + 1 + end + end + assert(n == 200) +end diff --git a/testsuite/test/lib/base/next.lua b/testsuite/test/lib/base/next.lua new file mode 100644 index 0000000000..0e40615afe --- /dev/null +++ b/testsuite/test/lib/base/next.lua @@ -0,0 +1,17 @@ +do --- _G 1 + local ok, err = pcall(next, _G, 1) + assert(not ok) + local ok, err = pcall(function() next(_G, 1) end) + assert(not ok) +end + +do --- as iterator + local t = { foo = 9, bar = 10, 4, 5, 6 } + local r = {} + local function dummy() end + local function f(next) + for k,v in next,t,nil do r[#r+1] = k; if v == 5 then f(dummy) end end + end + f(next) + assert(#r == 5) +end diff --git a/testsuite/test/lib/base/pairs.lua b/testsuite/test/lib/base/pairs.lua new file mode 100644 index 0000000000..4d89d42d39 --- /dev/null +++ b/testsuite/test/lib/base/pairs.lua @@ -0,0 +1,73 @@ + +do --- nometatable + local t = {} + for i=1,10 do t[i] = i+100 end + local a, b = 0, 0 + for j=1,100 do for k,v in ipairs(t) do a = a + k; b = b + v end end + assert(a == 5500) + assert(b == 105500) + a, b = 0, 0 + for j=1,100 do for k,v in pairs(t) do a = a + k; b = b + v end end + assert(a == 5500) + assert(b == 105500) +end + +do --- empty metatable + local t = setmetatable({}, {}) + for i=1,10 do t[i] = i+100 end + local a, b = 0, 0 + for j=1,100 do for k,v in ipairs(t) do a = a + k; b = b + v end end + assert(a == 5500) + assert(b == 105500) + a, b = 0, 0 + for j=1,100 do for k,v in pairs(t) do a = a + k; b = b + v end end + assert(a == 5500) + assert(b == 105500) +end + +do --- metamethods +compat5.2 + local function iter(t, i) + i = i + 1 + if t[i] then return i, t[i]+2 end + end + local function itergen(t) + return iter, t, 0 + end + local t = setmetatable({}, { __pairs = itergen, __ipairs = itergen }) + for i=1,10 do t[i] = i+100 end + local a, b = 0, 0 + for j=1,100 do for k,v in ipairs(t) do a = a + k; b = b + v end end + assert(a == 5500) + assert(b == 107500) + a, b = 0, 0 + for j=1,100 do for k,v in pairs(t) do a = a + k; b = b + v end end + assert(a == 5500) + assert(b == 107500) +end + +do --- _G + local n = 0 + for k,v in pairs(_G) do + assert(_G[k] == v) + n = n + 1 + end + assert(n >= 35) +end + +do --- count + local function count(t) + local n = 0 + for i,v in pairs(t) do + n = n + 1 + end + return n; + end + assert(count({ 4,5,6,nil,8,nil,10}) == 5) + assert(count({ [0] = 3, 4,5,6,nil,8,nil,10}) == 6) + assert(count({ foo=1, bar=2, baz=3 }) == 3) + assert(count({ foo=1, bar=2, baz=3, boo=4 }) == 4) + assert(count({ 4,5,6,nil,8,nil,10, foo=1, bar=2, baz=3 }) == 8) + local t = { foo=1, bar=2, baz=3, boo=4 } + t.bar = nil; t.boo = nil + assert(count(t) == 2) +end diff --git a/testsuite/test/lib/base/pcall_jit.lua b/testsuite/test/lib/base/pcall_jit.lua new file mode 100644 index 0000000000..dc9cd5fa8b --- /dev/null +++ b/testsuite/test/lib/base/pcall_jit.lua @@ -0,0 +1,74 @@ + +do --- square sum + local function f(x) return x*x end + local x = 0 + for i=1,100 do + local ok1, ok2, ok3, y = pcall(pcall, pcall, f, i) + if not ok1 or not ok2 or not ok3 then break end + x = x + y + end + assert(x == 338350) +end + +do --- sqrt square sum + local x = 0 + for i=1,100 do + local ok1, ok2, ok3, y = pcall(pcall, pcall, math.sqrt, i*i) + if not ok1 or not ok2 or not ok3 then break end + x = x + y + end + assert(x == 5050) +end + +do --- sum with error + local function f(x) + if x >= 150 then error("test", 0) end + return x end + local x = 0 + for i=1,200 do + local ok1, ok2, ok3, y = pcall(pcall, pcall, f, i) + if not ok1 or not ok2 or not ok3 then + assert(ok1 and ok2 and not ok3) + assert(y == "test") + break + end + x = x + y + end + assert(x == 11175) +end + +do --- sum or square + local function f(x) + if x >= 150 then return x*x end + return x + end + local x = 0 + for i=1,200 do + local ok1, ok2, ok3, y = pcall(pcall, pcall, f, i) + if not ok1 or not ok2 or not ok3 then break end + x = x + y + end + assert(x == 1584100) +end + +do --- sum or square with error + local function f(x) + if x >= 150 then + if x >= 175 then error("test", 0) end + return x*x + end + return x + end + local x = 0 + for i=1,200 do + local ok1, ok2, ok3, y = pcall(pcall, pcall, f, i) + if not ok1 or not ok2 or not ok3 then + assert(ok1 and ok2 and not ok3) + assert(y == "test") + -- note: no break, so we get an exit to interpreter + else + x = x + y + end + end + assert(x == 668575) +end diff --git a/testsuite/test/lib/base/select.lua b/testsuite/test/lib/base/select.lua new file mode 100644 index 0000000000..8278e5e999 --- /dev/null +++ b/testsuite/test/lib/base/select.lua @@ -0,0 +1,105 @@ + +do --- select # +-- Test whether select("#", 3, 4) returns the correct number of arguments. + local x = 0 + for i=1,100 do + x = x + select("#", 3, 4) + end + assert(x == 200) +end + +do --- select modf +-- Test whether select("#", func()) also works with func returning multiple values + local x = 0 + math.frexp(3) + for i=1,100 do + x = x + select("#", math.modf(i)) + end + assert(x == 200) +end + +do --- select 1 + local x = 0 + for i=1,100 do + x = x + select(1, i) + end + assert(x == 5050) +end + +do --- select 2 + local x, y = 0, 0 + for i=1,100 do + local a, b = select(2, 1, i, i+10) + x = x + a + y = y + b + end + assert(x == 5050 and y == 6050) +end + +do --- select vararg # + local function f(a, ...) + local x = 0 + for i=1,select('#', ...) do + x = x + select(i, ...) + end + assert(x == a) + end + for i=1,100 do + f(1, 1) + f(3, 1, 2) + f(15, 1, 2, 3, 4, 5) + f(0) + f(3200, string.byte(string.rep(" ", 100), 1, 100)) + end +end + +do --- select vararg i + local function f(a, ...) + local x = 0 + for i=1,20 do + local b = select(i, ...) + if b then x = x + b else x = x + 9 end + end + assert(x == a) + end + for i=1,100 do + f(172, 1) + f(165, 1, 2) + f(150, 1, 2, 3, 4, 5) + f(180) + f(640, string.byte(string.rep(" ", 100), 1, 100)) + end +end + +do --- select vararg 4 + local function f(a, ...) + local x = 0 + for i=1,20 do + local b = select(4, ...) + if b then x = x + b else x = x + 9 end + end + assert(x == a) + end + for i=1,100 do + f(180, 1) + f(180, 1, 2) + f(80, 1, 2, 3, 4, 5) + f(180) + f(640, string.byte(string.rep(" ", 100), 1, 100)) + end +end + +do --- varg-select specialisation requires guard against select + local select = select + local exptyp = "number" + local function f(...) + for i = 1, 100 do + assert(type((select('#', ...))) == exptyp) + if i == 75 then + select = function() return "" end + exptyp = "string" + end + end + end + f(1) +end diff --git a/testsuite/test/lib/base/tonumber_tostring.lua b/testsuite/test/lib/base/tonumber_tostring.lua new file mode 100644 index 0000000000..e7f576cea3 --- /dev/null +++ b/testsuite/test/lib/base/tonumber_tostring.lua @@ -0,0 +1,81 @@ + +do --- tonumber int + local x = 0 + for i=1,100 do x = x + tonumber(i) end + assert(x == 5050) +end + +do --- tonumber float + local x = 0 + for i=1.5,100.5 do x = x + tonumber(i) end + assert(x == 5100) +end + +do --- tostring int / tonumber + local t = {} + for i=1,100 do t[i] = tostring(i) end + local x = 0 + for i=1,100 do assert(type(t[i]) == "string"); x = x + tonumber(t[i]) end + assert(x == 5050) +end + +do --- tostring float / tonumber + local t = {} + for i=1,100 do t[i] = tostring(i+0.5) end + local x = 0 + for i=1,100 do assert(type(t[i]) == "string"); x = x + tonumber(t[i]) end + assert(x == 5100) +end + +do --- tonumber table + for i=1,100 do assert(tonumber({}) == nil) end +end + +do --- tostring int / tostring + local t = {} + for i=1,100 do t[i] = tostring(i) end + for i=1,100 do t[i] = tostring(t[i]) end + local x = 0 + for i=1,100 do assert(type(t[i]) == "string"); x = x + t[i] end + assert(x == 5050) +end + +do --- tostring table __tostring + local mt = { __tostring = function(t) return tostring(t[1]) end } + local t = {} + for i=1,100 do t[i] = setmetatable({i}, mt) end + for i=1,100 do t[i] = tostring(t[i]) end + local x = 0 + for i=1,100 do assert(type(t[i]) == "string"); x = x + t[i] end + assert(x == 5050) +end + +do --- tostring table __tostring __call + local r = setmetatable({}, + { __call = function(x, t) return tostring(t[1]) end }) + local mt = { __tostring = r } + local t = {} + for i=1,100 do t[i] = setmetatable({i}, mt) end + for i=1,100 do t[i] = tostring(t[i]) end + local x = 0 + for i=1,100 do assert(type(t[i]) == "string"); x = x + t[i] end + assert(x == 5050) +end + +do --- print calls overridden tostring +lua<5.2 + local x = false + local co = coroutine.create(function() print(1) end) + debug.setfenv(co, setmetatable({}, { __index = { + tostring = function() x = true end }})) + coroutine.resume(co) + assert(x == true) +end + +do --- tonumber base 2 + assert(tonumber(111, 2) == 7) +end + +do --- __tostring must be callable + local t = setmetatable({}, { __tostring = "" }) + assert(pcall(function() tostring(t) end) == false) +end diff --git a/testsuite/test/lib/base/xpcall_jit.lua b/testsuite/test/lib/base/xpcall_jit.lua new file mode 100644 index 0000000000..f4993cc619 --- /dev/null +++ b/testsuite/test/lib/base/xpcall_jit.lua @@ -0,0 +1,83 @@ +local function tr(err) return "tr"..err end + +do --- square sum + local function f(x) return x*x end + local x = 0 + for i=1,100 do + local ok1, ok2, ok3, y = xpcall(xpcall, tr, xpcall, tr, f, tr, i) + if not ok1 or not ok2 or not ok3 then break end + x = x + y + end + assert(x == 338350) +end + +do --- sqrt square sum + local x = 0 + for i=1,100 do + local ok1, ok2, ok3, y = xpcall(xpcall, tr, xpcall, tr, math.sqrt, tr, i*i) + if not ok1 or not ok2 or not ok3 then break end + x = x + y + end + assert(x == 5050) +end + +do --- sum with error + local function f(x) + if x >= 150 then error("test", 0) end + return x end + local x = 0 + for i=1,200 do + local ok1, ok2, ok3, y = xpcall(xpcall, tr, xpcall, tr, f, tr, i) + if not ok1 or not ok2 or not ok3 then + assert(ok1 and ok2 and not ok3) + assert(y == "trtest") + break + end + x = x + y + end + assert(x == 11175) +end + +do --- square with error + local function f(x) + if x >= 150 then return x*x end + return x + end + local x = 0 + for i=1,200 do + local ok1, ok2, ok3, y = xpcall(xpcall, tr, xpcall, tr, f, tr, i) + if not ok1 or not ok2 or not ok3 then break end + x = x + y + end + assert(x == 1584100) +end + +do --- sum or square with error + local function f(x) + if x >= 150 then + if x >= 175 then error("test", 0) end + return x*x + end + return x + end + local x = 0 + for i=1,200 do + local ok1, ok2, ok3, y = xpcall(xpcall, tr, xpcall, tr, f, tr, i) + if not ok1 or not ok2 or not ok3 then + assert(ok1 and ok2 and not ok3) + assert(y == "trtest") + -- note: no break, so we get an exit to interpreter + else + x = x + y + end + end + assert(x == 668575) +end + +do --- xpcall swap after recorder error + local x = 0 + for i=1,100 do + local ok1, ok2, ok3, err = xpcall(xpcall, tr, xpcall, tr, error, tr, "test", 0) + assert(ok1 and ok2 and not ok3 and err == "trtest") + end +end diff --git a/testsuite/test/lib/bit.lua b/testsuite/test/lib/bit.lua new file mode 100644 index 0000000000..1adf550781 --- /dev/null +++ b/testsuite/test/lib/bit.lua @@ -0,0 +1,98 @@ +local bit = require"bit" +local byte, ipairs, tostring, pcall = string.byte, ipairs, tostring, pcall + +local vb = { + 0, 1, -1, 2, -2, 0x12345678, 0x87654321, + 0x33333333, 0x77777777, 0x55aa55aa, 0xaa55aa55, + 0x7fffffff, 0x80000000, 0xffffffff +} + +local function cksum(name, s, r) + local z = 0 + for i=1,#s do z = (z + byte(s, i)*i) % 2147483629 end + if z ~= r then + error("bit."..name.." test failed (got "..z..", expected "..r..")", 0) + end +end + +local function check_unop(name, r) + local f = bit[name] + local s = "" + if pcall(f) or pcall(f, "z") or pcall(f, true) then + error("bit."..name.." fails to detect argument errors", 0) + end + for _,x in ipairs(vb) do s = s..","..tostring(f(x)) end + cksum(name, s, r) +end + +local function check_binop(name, r) + local f = bit[name] + local s = "" + if pcall(f) or pcall(f, "z") or pcall(f, true) then + error("bit."..name.." fails to detect argument errors", 0) + end + for _,x in ipairs(vb) do + for _2,y in ipairs(vb) do s = s..","..tostring(f(x, y)) --[[io.write(_, " ", _2, " ", x, " ", y, " ", f(x, y), "\n")]] end + end + cksum(name, s, r) +end + +local function check_binop_range(name, r, yb, ye) + local f = bit[name] + local s = "" + if pcall(f) or pcall(f, "z") or pcall(f, true) or pcall(f, 1, true) then + error("bit."..name.." fails to detect argument errors", 0) + end + for _,x in ipairs(vb) do + for y=yb,ye do s = s..","..tostring(f(x, y)) end + end + cksum(name, s, r) +end + +local function check_shift(name, r) + check_binop_range(name, r, 0, 31) +end + +do --- Minimal sanity checks. + assert(0x7fffffff == 2147483647, "broken hex literals") + assert(0xffffffff == -1 or 0xffffffff == 2^32-1, "broken hex literals") + assert(tostring(-1) == "-1", "broken tostring()") + assert(tostring(0xffffffff) == "-1" or tostring(0xffffffff) == "4294967295", "broken tostring()") +end + +do --- Basic argument processing. + assert(bit.tobit(1) == 1) + assert(bit.band(1) == 1) + assert(bit.bxor(1,2) == 3) + assert(bit.bor(1,2,4,8,16,32,64,128) == 255) +end + +do --- unop test vectors + check_unop("tobit", 277312) + check_unop("bnot", 287870) + check_unop("bswap", 307611) +end + +do --- binop test vectors + check_binop("band", 41206764) + check_binop("bor", 51253663) + check_binop("bxor", 79322427) +end + +do --- shift test vectors + check_shift("lshift", 325260344) + check_shift("rshift", 139061800) + check_shift("arshift", 111364720) + check_shift("rol", 302401155) + check_shift("ror", 302316761) +end + +do --- tohex test vectors + check_binop_range("tohex", 47880306, -8, 8) +end + +do --- Don't propagate TOBIT narrowing across two conversions. + local tobit = bit.tobit + local k = 0x8000000000003 + for i=1,100 do assert(tobit(k % (2^32)) == 3) end +end diff --git a/testsuite/test/lib/contents.lua b/testsuite/test/lib/contents.lua new file mode 100644 index 0000000000..422920ce7f --- /dev/null +++ b/testsuite/test/lib/contents.lua @@ -0,0 +1,155 @@ +local function check(m, expected, exclude) + local t = {} + local ex = {} + if exclude then + for k in exclude:gmatch"[^:]+" do + ex[k] = true + end + end + for k in pairs(m) do + if not ex[k] then + t[#t+1] = tostring(k) + end + end + table.sort(t) + local got = table.concat(t, ":") + if got ~= expected then + error("got: \""..got.."\"\nexpected: \""..expected.."\"", 2) + end +end + +do --- base + check(_G, "_G:_VERSION:arg:assert:collectgarbage:coroutine:debug:dofile:error:getmetatable:io:ipairs:load:loadfile:math:next:os:package:pairs:pcall:print:rawequal:rawget:rawset:require:select:setmetatable:string:table:tonumber:tostring:type:xpcall", "rawlen:bit:bit32:jit:gcinfo:setfenv:getfenv:loadstring:unpack:module:newproxy") +end + +do --- pre-5.2 base +lua<5.2 + assert(gcinfo) + assert(setfenv) + assert(getfenv) + assert(loadstring) + assert(unpack) + assert(module) + assert(newproxy) +end + +do --- 5.2 base +lua>=5.2 + assert(not gcinfo) + assert(not setfenv) + assert(not getfenv) + assert(not loadstring) + assert(not unpack) + assert(not module) + assert(not newproxy) +end + +do --- pre-5.2 base rawlen -compat5.2 + assert(not rawlen) +end + +do --- 5.2 base rawlen +compat5.2 + assert(rawlen) +end + +do --- math + check(math, "abs:acos:asin:atan:atan2:ceil:cos:cosh:deg:exp:floor:fmod:frexp:huge:ldexp:log:max:min:modf:pi:pow:rad:random:randomseed:sin:sinh:sqrt:tan:tanh", "log10:mod") +end + +do --- pre-5.2 math +lua<5.2 -compat5.2 + assert(math.mod) + assert(math.log10) +end + +do --- 5.2 math +lua>=5.2 + assert(not math.mod) + assert(not math.log10) +end + +do --- string + check(string, "byte:char:dump:find:format:gmatch:gsub:len:lower:match:rep:reverse:sub:upper", "gfind") +end + +do --- pre-5.2 string +lua<5.2 -compat5.2 + assert(string.gfind) +end + +do --- 5.2 string +lua>=5.2 + assert(not string.gfind) +end + +do --- pre-5.2 table +lua<5.2 + check(table, "concat:foreach:foreachi:getn:insert:maxn:move:remove:sort", "pack:unpack:setn:new") +end + +do --- 5.2 table +lua>=5.2 + check(table, "concat:insert:pack:remove:sort:unpack") +end + +do --- pre-5.2 table.pack -compat5.2 + assert(not table.pack) + assert(not table.unpack) +end + +do --- 5.2 table.pack +compat5.2 + assert(table.pack) + assert(table.unpack) +end + +do --- io + check(io, "close:flush:input:lines:open:output:popen:read:stderr:stdin:stdout:tmpfile:type:write") +end + +do --- io file + check(debug.getmetatable(io.stdin), "__gc:__index:__tostring:close:flush:lines:read:seek:setvbuf:write") +end + +do --- os + check(os, "clock:date:difftime:execute:exit:getenv:remove:rename:setlocale:time:tmpname") +end + +do --- debug + check(debug, "debug:gethook:getinfo:getlocal:getmetatable:getregistry:getupvalue:sethook:setlocal:setmetatable:setupvalue:traceback", "getfenv:setfenv:upvalueid:upvaluejoin:getuservalue:setuservalue") +end + +-- TODO: Check versional differences in debug library + +do --- package + check(package, "config:cpath:loaded:loadlib:path:preload", "searchpath:loaders:searchers:seeall") +end + +do --- pre-5.2 package +lua<5.2 + assert(package.loaders) + assert(not package.searchers) + assert(package.seeall) +end + +do --- 5.2 package +lua>=5.2 + assert(not package.loaders) + assert(package.searchers) + assert(not package.seeall) +end + +do --- package.loaders + check(package.loaders or package.searchers, "1:2:3:4") +end + +do --- package.loaded + local loaded = {} + for k, v in pairs(package.loaded) do + if type(k) ~= "string" or (k:sub(1, 7) ~= "common." and k:sub(1, 4) ~= "jit.") then + loaded[k] = v + end + end + check(loaded, "_G:coroutine:debug:io:math:os:package:string:table", "bit:bit32:common:ffi:jit:table.new") +end + +do --- bit +bit + check(bit, "arshift:band:bnot:bor:bswap:bxor:lshift:rol:ror:rshift:tobit:tohex") +end + +do --- ffi +ffi + check(require"ffi", "C:abi:alignof:arch:cast:cdef:copy:errno:fill:gc:istype:load:metatype:new:offsetof:os:sizeof:string:typeof", "typeinfo") +end + +do --- ffi 2.1 +fii +luajit>=2.1 + assert(require"ffi".typeinfo) +end diff --git a/testsuite/test/lib/coroutine/index b/testsuite/test/lib/coroutine/index new file mode 100644 index 0000000000..9c5c17ec19 --- /dev/null +++ b/testsuite/test/lib/coroutine/index @@ -0,0 +1 @@ +yield.lua diff --git a/testsuite/test/lib/coroutine/yield.lua b/testsuite/test/lib/coroutine/yield.lua new file mode 100644 index 0000000000..d995bf87e1 --- /dev/null +++ b/testsuite/test/lib/coroutine/yield.lua @@ -0,0 +1,109 @@ +local create = coroutine.create +local wrap = coroutine.wrap +local resume = coroutine.resume +local yield = coroutine.yield + +do --- Stack overflow on return (create) + wrap(function() + local co = create(function() + yield(string.byte(string.rep(" ", 100), 1, 100)) + end) + assert(select('#', resume(co)) == 101) + end)() +end + +do --- Stack overflow on return (wrap) + wrap(function() + local f = wrap(function() + yield(string.byte(string.rep(" ", 100), 1, 100)) + end) + assert(select('#', f()) == 100) + end)() +end + +do --- cogen + local function cogen(x) + return wrap(function(n) repeat x = x+n; n = yield(x) until false end), + wrap(function(n) repeat x = x*n; n = yield(x) until false end) + end + + local a,b=cogen(3) + local c,d=cogen(5) + assert(d(b(c(a(d(b(c(a(1)))))))) == 168428160) +end + +do --- cofunc +luajit + local function verify(what, expect, ...) + local got = {...} + for i=1,100 do + if expect[i] ~= got[i] then + error("FAIL " .. what) + end + if expect[i] == nil then + break + end + end + end + + local function cofunc(...) + verify("call", { 1, "foo" }, ...) + verify("yield", { "bar" }, yield(2, "test")) + verify("pcall yield", { true, "again" }, pcall(yield, "from pcall")) + return "end" + end + + local co = create(cofunc) + verify("resume", { true, 2, "test" }, resume(co, 1, "foo")) + verify("resume pcall", { true, "from pcall" }, resume(co, "bar")) + verify("resume end", { true, "end" }, resume(co, "again")) +end + +do --- assorted +luajit + local function verify(expect, func, ...) + local co = create(func) + for i=1,100 do + local ok, res = resume(co, ...) + if not ok then + if expect[i] ~= nil then + error("too few results: ["..i.."] = "..tostring(expect[i]).." (got: "..tostring(res)..")") + end + break + end + if expect[i] ~= res then + error("bad result: ["..i.."] = "..tostring(res).." (should be: "..tostring(expect[i])..")") + end + end + end + + verify({ 42, 99 }, + function(x) pcall(yield, x) return 99 end, + 42) + + verify({ 42, 99 }, + function(x) pcall(function(y) yield(y) end, x) return 99 end, + 42) + + verify({ 42, 99 }, + function(x) xpcall(yield, debug.traceback, x) return 99 end, + 42) + + verify({ 45, 44, 43, 42, 99 }, + function(x, y) + for i in + function(o, k) + yield(o+k) + if k ~= 0 then return k-1 end + end,x,y do + end + return 99 + end, + 42, 3) + + verify({ 84, 99 }, + function(x) + local o = setmetatable({ x }, + {__add = function(a, b) yield(a[1]+b[1]) return 99 end }) + return o+o + end, + 42) +end diff --git a/testsuite/test/lib/ffi/bit64.lua b/testsuite/test/lib/ffi/bit64.lua new file mode 100644 index 0000000000..d1b47bef7b --- /dev/null +++ b/testsuite/test/lib/ffi/bit64.lua @@ -0,0 +1,130 @@ +local ffi = require("ffi") +local bit = require("bit") + +local tobit, bnot, bswap = bit.tobit, bit.bnot, bit.bswap +local band, bor, bxor = bit.band, bit.bor, bit.bxor +local shl, shr, sar = bit.lshift, bit.rshift, bit.arshift +local rol, ror = bit.rol, bit.ror + +ffi.cdef[[ +typedef enum { ZZI = -1 } ienum_t; +typedef enum { ZZU } uenum_t; +]] + +do --- smoke tobit + assert(tobit(0xfedcba9876543210ll) == 0x76543210) + assert(tobit(0xfedcba9876543210ull) == 0x76543210) +end + +do --- smoke band + assert(tostring(band(1ll, 1, 1ll, -1)) == "1LL") + assert(tostring(band(1ll, 1, 1ull, -1)) == "1ULL") +end + +do --- smoke shl + assert(shl(10ll, 2) == 40) + assert(shl(10, 2ll) == 40) + assert(shl(10ll, 2ll) == 40) +end + +do --- smoke tohex + assert(bit.tohex(0x123456789abcdef0LL) == "123456789abcdef0") +end + +do --- tobit/band assorted C types + for _,tp in ipairs{"int", "ienum_t", "uenum_t", "int64_t", "uint64_t"} do + local x = ffi.new(tp, 10) + local y = tobit(x) + local z = band(x) + assert(type(y) == "number" and y == 10) + assert(type(z) == "cdata" and z == 10) + end +end + +do --- tobit/band negative unsigned enum + local x = ffi.new("uenum_t", -10) + local y = tobit(x) + local z = band(x) + assert(type(y) == "number") + assert(y == -10) + assert(type(z) == "cdata") + assert(z == 2^32-10) +end + +do --- jit band/bor/bxor + local a = 0x123456789abcdef0LL + local y1, y2, y3, y4, y5, y6 + for i=1,100 do + y1 = band(a, 0x000000005a5a5a5aLL) + y2 = band(a, 0x5a5a5a5a00000000LL) + y3 = band(a, 0xffffffff5a5a5a5aLL) + y4 = band(a, 0x5a5a5a5affffffffLL) + y5 = band(a, 0xffffffff00000000LL) + y6 = band(a, 0x00000000ffffffffLL) + end + assert(y1 == 0x000000001a185a50LL) + assert(y2 == 0x1210525800000000LL) + assert(y3 == 0x123456781a185a50LL) + assert(y4 == 0x121052589abcdef0LL) + assert(y5 == 0x1234567800000000LL) + assert(y6 == 0x000000009abcdef0LL) + for i=1,100 do + y1 = bor(a, 0x000000005a5a5a5aLL) + y2 = bor(a, 0x5a5a5a5a00000000LL) + y3 = bor(a, 0xffffffff5a5a5a5aLL) + y4 = bor(a, 0x5a5a5a5affffffffLL) + y5 = bor(a, 0xffffffff00000000LL) + y6 = bor(a, 0x00000000ffffffffLL) + end + assert(y1 == 0x12345678dafedefaLL) + assert(y2 == 0x5a7e5e7a9abcdef0LL) + assert(y3 == 0xffffffffdafedefaLL) + assert(y4 == 0x5a7e5e7affffffffLL) + assert(y5 == 0xffffffff9abcdef0LL) + assert(y6 == 0x12345678ffffffffLL) + for i=1,100 do + y1 = bxor(a, 0x000000005a5a5a5aLL) + y2 = bxor(a, 0x5a5a5a5a00000000LL) + y3 = bxor(a, 0xffffffff5a5a5a5aLL) + y4 = bxor(a, 0x5a5a5a5affffffffLL) + y5 = bxor(a, 0xffffffff00000000LL) + y6 = bxor(a, 0x00000000ffffffffLL) + end + assert(y1 == 0x12345678c0e684aaLL) + assert(y2 == 0x486e0c229abcdef0LL) + assert(y3 == 0xedcba987c0e684aaLL) + assert(y4 == 0x486e0c226543210fLL) + assert(y5 == 0xedcba9879abcdef0LL) + assert(y6 == 0x123456786543210fLL) +end + +do --- jit shift/xor + local a, b = 0x123456789abcdef0LL, 0x31415926535898LL + for i=1,200 do + a = bxor(a, b); b = sar(b, 14) + shl(b, 50) + a = a - b; b = shl(b, 5) + sar(b, 59) + b = bxor(a, b); b = b - shl(b, 13) - shr(b, 51) + end + assert(b == -7993764627526027113LL) +end + +do --- jit rotate/xor + local a, b = 0x123456789abcdef0LL, 0x31415926535898LL + for i=1,200 do + a = bxor(a, b); b = rol(b, 14) + a = a - b; b = rol(b, 5) + b = bxor(a, b); b = b - rol(b, 13) + end + assert(b == -6199148037344061526LL) +end + +do --- jit all ops + local a, b = 0x123456789abcdef0LL, 0x31415926535898LL + for i=1,200 do + a = bxor(a, b); b = rol(b, a) + a = a - b; b = shr(b, a) + shl(b, bnot(a)) + b = bxor(a, b); b = b - bswap(b) + end + assert(b == -8881785180777266821LL) +end + diff --git a/testsuite/test/lib/ffi/cdata_var.lua b/testsuite/test/lib/ffi/cdata_var.lua new file mode 100644 index 0000000000..42d6028a53 --- /dev/null +++ b/testsuite/test/lib/ffi/cdata_var.lua @@ -0,0 +1,47 @@ +local ffi = require("ffi") + +do --- byte array allocations + local typ = ffi.typeof"uint8_t[?]" + for i = 4, 24 do + for d = -5, 5 do + local sz = 2^i + d + assert(ffi.sizeof(typ, sz) == sz) + local mem = ffi.new(typ, sz) + assert(ffi.sizeof(mem) == sz) + mem[0] = 0x21 + mem[1] = 0x32 + mem[2] = 0x43 + mem[sz-3] = 0x54 + mem[sz-2] = 0x65 + mem[sz-1] = 0x76 + assert(mem[0] == 0x21) + assert(mem[1] == 0x32) + assert(mem[2] == 0x43) + assert(mem[3] == 0) + assert(mem[sz-4] == 0) + assert(mem[sz-3] == 0x54) + assert(mem[sz-2] == 0x65) + assert(mem[sz-1] == 0x76) + end + end +end + +do --- int array allocations + local typ = ffi.typeof"int32_t[?]" + for i = 2, 17 do + for d = -2, 2 do + local sz = 2^i + d + assert(ffi.sizeof(typ, sz) == sz*4) + local mem = ffi.new(typ, sz) + assert(ffi.sizeof(mem) == sz*4) + mem[0] = -3 + mem[sz-1] = -4 + assert(mem[0] == -3) + if sz ~= 2 then + assert(mem[1] == 0) + assert(mem[sz-2] == 0) + end + assert(mem[sz-1] == -4) + end + end +end diff --git a/testsuite/test/lib/ffi/copy_fill.lua b/testsuite/test/lib/ffi/copy_fill.lua new file mode 100644 index 0000000000..295638182d --- /dev/null +++ b/testsuite/test/lib/ffi/copy_fill.lua @@ -0,0 +1,64 @@ +local ffi = require("ffi") + +do --- misc + local arr = ffi.typeof("char[11]") + local a = arr() + local b = arr() + local c = arr() + + for i=0,9 do a[i] = 97+i; b[i] = 106-i end + a[10] = 0; b[10] = 0; + + ffi.copy(c, a, 11) + for i=0,9 do assert(c[i] == 97+i) end + assert(ffi.string(c) == "abcdefghij") + + ffi.copy(c, b, 5) + for i=0,4 do assert(c[i] == 106-i) end + for i=5,9 do assert(c[i] == 97+i) end + assert(ffi.string(c) == "jihgffghij") + + c[7] = 0 + assert(ffi.string(c) == "jihgffg") + + c[10] = 1 + ffi.copy(c, "ABCDEFGHIJ") + for i=0,9 do assert(c[i] == 65+i) end + assert(c[10] == 0) + assert(ffi.string(c) == "ABCDEFGHIJ") + + ffi.copy(c, "abcdefghij", 5) + assert(ffi.string(c) == "abcdeFGHIJ") + + ffi.fill(c, 10, 65) + assert(ffi.string(c) == "AAAAAAAAAA") + for i=10,0,-1 do ffi.fill(c, i, 96+i) end + assert(ffi.string(c) == "abcdefghij") + ffi.fill(c, 10) + assert(c[0] == 0) + assert(c[9] == 0) + + -- test length parameter to ffi.string + ffi.fill(c, 10, 65) + assert(ffi.string(c, 5) == "AAAAA") +end + +do --- jit char[10] + local a = ffi.new("char[10]", 64) + local x + for i=1,100 do a[0] = i; x = ffi.string(a, 10) end + assert(x == "d@@@@@@@@@") +end + +do --- jit char[1] + local a = ffi.new("char[1]") + local x, y + for i=1,100 do + a[0] = i + x = ffi.string(a, 1) + a[0] = 126 + y = ffi.string(a, 1) + end + assert(x == "d" and y == "~") +end + diff --git a/testsuite/test/lib/ffi/err.lua b/testsuite/test/lib/ffi/err.lua new file mode 100644 index 0000000000..44723657c0 --- /dev/null +++ b/testsuite/test/lib/ffi/err.lua @@ -0,0 +1,35 @@ +local ffi = require("ffi") + +do --- error in FFI metamethod: don't print metamethod frame. + local ok, err = xpcall(function() + local x = (1ll).foo + end, debug.traceback) + assert(ok == false) + assert(not string.find(err, "__index")) +end + +do --- tailcall in regular metamethod: keep metamethod frame. + local ok, err = xpcall(function() + local t = setmetatable({}, {__index = function() return rawget("x") end }) + local y = t[1] + end, debug.traceback) + assert(ok == false) + assert(string.find(err, "__index")) +end + +do --- error in FFI metamethod: set correct PC. + ffi.cdef[[ +typedef struct { int x; int y; } ffi_err_point; +ffi_err_point ffi_err_strchr(ffi_err_point* op1, ffi_err_point* op2) asm("strchr"); +]] + local point = ffi.metatype("ffi_err_point", { __add = ffi.C.ffi_err_strchr }) + local function foo() + local p = point{ 3, 4 } + local r = p + p + local r = p + 5 + end + local ok, err = xpcall(foo, debug.traceback) + local line = debug.getinfo(foo).linedefined+3 + assert(string.match(err, "traceback:[^:]*:"..line..":")) +end + diff --git a/testsuite/test/lib/ffi/ffi_arith_ptr.lua b/testsuite/test/lib/ffi/ffi_arith_ptr.lua new file mode 100644 index 0000000000..8cf890c608 --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_arith_ptr.lua @@ -0,0 +1,106 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") + +ffi.cdef[[ +typedef struct { int a,b,c; } foo1_t; +void free(void *); +void *malloc(size_t); +struct incomplete; +]] + +do + local a = ffi.new("int[10]") + local p1 = a+0 + p1[0] = 1; + p1[1] = 2; + assert(a[0] == 1) + assert(a[1] == 2) + assert(a == p1) + assert(not (a ~= p1)) + assert(p1 <= a) + assert(a <= p1) + assert(not (p1 < a)) + assert(not (a < p1)) + assert(a ~= nil) + assert(not (a == nil)) + assert(p1 ~= nil) + assert(not (p1 == nil)) + + local p2 = a+2 + p2[0] = 3; + p2[1] = 4; + assert(a[2] == 3) + assert(a[3] == 4) + assert(p2 - p1 == 2) + assert(p1 - p2 == -2) + assert(p1 ~= p2) + assert(not (p1 == p2)) + assert(p1 < p2) + assert(p2 > p1) + assert(not (p1 > p2)) + assert(not (p2 < p1)) + assert(p1 <= p2) + assert(p2 >= p1) + assert(not (p1 >= p2)) + assert(not (p2 <= p1)) + + local p3 = a-2 + assert(p3[2] == 1) + assert(p3[3] == 2) + local p4 = a+(-3) + assert(p4[5] == 3) + assert(p4[6] == 4) + -- bad: adding two pointers or subtracting a pointer + fails(function(p1, p2) return p1 + p2 end, p1, p2) + fails(function(p1) return 1 - p1 end, p1) + -- bad: subtracting different pointer types + fails(function(p1) return p1 - ffi.new("char[1]") end, p1) + -- but different qualifiers are ok + local b = ffi.cast("const int *", a+5) + assert(b - a == 5) +end + +do + local p1 = ffi.cast("void *", 0) + local p2 = ffi.cast("int *", 1) + assert(p1 == p1) + assert(p2 == p2) + assert(p1 ~= p2) + assert(p1 == nil) + assert(p2 ~= nil) +end + +do + local f1 = ffi.C.free + local f2 = ffi.C.malloc + local p1 = ffi.cast("void *", f1) + assert(f1 == f1) + assert(f1 ~= nil) + assert(f1 ~= f2) + assert(p1 == f1) + assert(p1 ~= f2) + assert(f1 < f2 or f1 > f2) + fails(function(f1) return f1 + 1 end, f1) +end + +do + local s = ffi.new("foo1_t[10]") + local p1 = s+3 + p1.a = 1; p1.b = 2; p1.c = 3 + p1[1].a = 4; p1[1].b = 5; p1[1].c = 6 + assert(s[3].a == 1 and s[3].b == 2 and s[3].c == 3) + assert(s[4].a == 4 and s[4].b == 5 and s[4].c == 6) + local p2 = s+6 + assert(p2 - p1 == 3) + assert(p1 - p2 == -3) +end + +do + local mem = ffi.new("int[1]") + local p = ffi.cast("struct incomplete *", mem) + fails(function(p) return p+1 end, p) + local ok, err = pcall(function(p) return p[1] end, p) + assert(not ok and err:match("size.*unknown")) +end + diff --git a/testsuite/test/lib/ffi/ffi_bitfield.lua b/testsuite/test/lib/ffi/ffi_bitfield.lua new file mode 100644 index 0000000000..cd0b1815bd --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_bitfield.lua @@ -0,0 +1,108 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") + +do + local x = ffi.new([[ + union { + uint32_t u; + struct { int a:10,b:10,c:11,d:1; }; + struct { unsigned int e:10,f:10,g:11,h:1; }; + struct { int8_t i:4,j:5,k:5,l:3; }; + struct { _Bool b0:1,b1:1,b2:1,b3:1; }; + } + ]]) + + -- bitfield access + x.u = 0xffffffff + assert(x.a == -1 and x.b == -1 and x.c == -1 and x.d == -1) + assert(x.e == 1023 and x.f == 1023 and x.g == 2047 and x.h == 1) + assert(x.i == -1 and x.j == -1 and x.k == -1 and x.l == -1) + assert(x.b0 == true and x.b1 == true and x.b2 == true and x.b3 == true) + x.u = 0x12345678 + if ffi.abi("le") then + assert(x.a == -392 and x.b == 277 and x.c == 291 and x.d == 0) + assert(x.e == 632 and x.f == 277 and x.g == 291 and x.h == 0) + assert(x.i == -8 and x.j == -10 and x.k == -12 and x.l == 1) + assert(x.b0 == false and x.b1 == false and x.b2 == false and x.b3 == true) + else + assert(x.a == 72 and x.b == -187 and x.c == 828 and x.d == 0) + assert(x.e == 72 and x.f == 837 and x.g == 828 and x.h == 0) + assert(x.i == 1 and x.j == 6 and x.k == 10 and x.l == -2) + assert(x.b0 == false and x.b1 == false and x.b2 == false and x.b3 == true) + end + x.u = 0xe8d30edc + if ffi.abi("le") then + assert(x.a == -292 and x.b == 195 and x.c == -371 and x.d == -1) + assert(x.e == 732 and x.f == 195 and x.g == 1677 and x.h == 1) + assert(x.i == -4 and x.j == 14 and x.k == -13 and x.l == -2) + assert(x.b0 == false and x.b1 == false and x.b2 == true and x.b3 == true) + else + assert(x.a == -93 and x.b == 304 and x.c == -146 and x.d == 0) + assert(x.e == 931 and x.f == 304 and x.g == 1902 and x.h == 0) + assert(x.i == -2 and x.j == -6 and x.k == 1 and x.l == -2) + assert(x.b0 == true and x.b1 == true and x.b2 == true and x.b3 == false) + end + + -- bitfield insert + x.u = 0xffffffff + x.a = 0 + if ffi.abi("le") then + assert(x.u == 0xfffffc00) + else + assert(x.u == 0x003fffff) + end + x.u = 0 + x.a = -1 + if ffi.abi("le") then + assert(x.u == 0x3ff) + else + assert(x.u == 0xffc00000) + end + x.u = 0xffffffff + x.b = 0 + if ffi.abi("le") then + assert(x.u == 0xfff003ff) + else + assert(x.u == 0xffc00fff) + end + x.u = 0 + x.b = -1 + if ffi.abi("le") then + assert(x.u == 0x000ffc00) + else + assert(x.u == 0x003ff000) + end + + -- cumulative bitfield insert + x.u = 0xffffffff + if ffi.abi("le") then + x.a = -392; x.b = 277; x.c = 291; x.d = 0 + else + x.a = 72; x.b = -187; x.c = 828; x.d = 0 + end + assert(x.u == 0x12345678) + x.u = 0 + if ffi.abi("le") then + x.a = -392; x.b = 277; x.c = 291; x.d = 0 + else + x.a = 72; x.b = -187; x.c = 828; x.d = 0 + end + assert(x.u == 0x12345678) + x.u = 0xffffffff + x.b0 = true; x.b1 = false; x.b2 = true; x.b3 = false + if ffi.abi("le") then + assert(x.u == 0xfffffff5) + else + assert(x.u == 0xafffffff) + end + x.u = 0 + x.b0 = true; x.b1 = false; x.b2 = true; x.b3 = false + if ffi.abi("le") then + assert(x.u == 0x00000005) + else + assert(x.u == 0xa0000000) + end + +end + diff --git a/testsuite/test/lib/ffi/ffi_call.lua b/testsuite/test/lib/ffi/ffi_call.lua new file mode 100644 index 0000000000..1eb5e906b1 --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_call.lua @@ -0,0 +1,266 @@ + +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") + +local tonumber = tonumber + +ffi.cdef[[ +typedef struct s_ii { int x, y; } s_ii; +typedef struct s_jj { int64_t x, y; } s_jj; +typedef struct s_ff { float x, y; } s_ff; +typedef struct s_dd { double x, y; } s_dd; +typedef struct s_8i { int a,b,c,d,e,f,g,h; } s_8i; + +int call_i(int a); +int call_ii(int a, int b); +int call_10i(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j); + +typedef enum { XYZ } e_u; + +e_u call_ie(e_u a) asm("call_i"); + +int64_t call_ji(int64_t a, int b); +int64_t call_ij(int a, int64_t b); +int64_t call_jj(int64_t a, int64_t b); + +double call_dd(double a, double b); +double call_10d(double a, double b, double c, double d, double e, double f, double g, double h, double i, double j); + +float call_ff(float a, float b); +float call_10f(float a, float b, float c, float d, float e, float f, float g, float h, float i, float j); + +double call_idifjd(int a, double b, int c, float d, int64_t e, double f); + +int call_p_i(int *a); +int *call_p_p(int *a); +int call_pp_i(int *a, int *b); + +double call_ividi(int a, ...); + +complex call_dd_cd(double a, double b); +complex call_cd(complex a); +complex call_cdcd(complex a, complex b); + +complex float call_ff_cf(float a, float b); +complex float call_cf(complex float a); +complex float call_cfcf(complex float a, complex float b); + +s_ii call_sii(s_ii a); +s_jj call_sjj(s_jj a); +s_ff call_sff(s_ff a); +s_dd call_sdd(s_dd a); +s_8i call_s8i(s_8i a); +s_ii call_siisii(s_ii a, s_ii b); +s_ff call_sffsff(s_ff a, s_ff b); +s_dd call_sddsdd(s_dd a, s_dd b); +s_8i call_s8is8i(s_8i a, s_8i b); +s_8i call_is8ii(int a, s_8i b, int c); + +int __fastcall fastcall_void(void); +int __fastcall fastcall_i(int a); +int __fastcall fastcall_ii(int a, int b); +int __fastcall fastcall_iii(int a, int b, int c); +int64_t __fastcall fastcall_ji(int64_t a, int b); +double __fastcall fastcall_dd(double a, double b); +int __fastcall fastcall_pp_i(int *a, int *b); +s_ii __fastcall fastcall_siisii(s_ii a, s_ii b); +s_dd __fastcall fastcall_sddsdd(s_dd a, s_dd b); + +int __stdcall stdcall_i(int a); +int __stdcall stdcall_ii(int a, int b); +double __stdcall stdcall_dd(double a, double b); +float __stdcall stdcall_ff(float a, float b); +]] + +local C = ffi.load("../clib/ctest") + +assert(C.call_i(-42) == -41) +assert(C.call_ii(-42, 17) == -42+17) +assert(C.call_10i(-42, 17, 12345, 9987, -100, 11, 51, 0x12345678, 338, -78901234) == -42+17+12345+9987-100+11+51+0x12345678+338-78901234) + +assert(C.call_ie(123) == 124) + +assert(tonumber(C.call_ji(0x123456789LL, -17)) == tonumber(0x123456789LL-17)) +assert(tonumber(C.call_ij(-17, 0x123456789LL)) == tonumber(0x123456789LL-17)) +assert(tonumber(C.call_jj(-42, 17)) == -42+17) +assert(tonumber(C.call_jj(0x123456789abcdef0LL, -0x789abcde99887766LL)) == tonumber(0x123456789abcdef0LL-0x789abcde99887766LL)) + +assert(C.call_dd(12.5, -3.25) == 12.5-3.25) +assert(C.call_10d(-42.5, 17.125, 12345.5, 9987, -100.625, 11, 51, 0x12345678, 338, -78901234.75) == -42.5+17.125+12345.5+9987-100.625+11+51+0x12345678+338-78901234.75) + +assert(C.call_ff(12.5, -3.25) == 12.5-3.25) +assert(C.call_10f(-42.5, 17.125, 12345.5, 9987, -100.625, 11, 51, 0x123456, 338, -789012.75) == -42.5+17.125+12345.5+9987-100.625+11+51+0x123456+338-789012.75) + +assert(C.call_idifjd(-42, 17.125, 0x12345, -100.625, 12345678901234, -789012.75) == -42+17.125+0x12345-100.625+12345678901234-789012.75) + +do + local a = ffi.new("int[10]", -42) + assert(C.call_p_i(a) == -42+1) + assert(tonumber(ffi.cast("intptr_t", C.call_p_p(a+3))) == tonumber(ffi.cast("intptr_t", a+4))) + assert(C.call_pp_i(a+8, a+5) == 3) +end + +-- vararg +assert(C.call_ividi(-42, ffi.new("int", 17), 12.5, ffi.new("int", 131)) == -42+17+12.5+131) + +-- complex +if pcall(function() return C.call_dd_cd end) then + do + local c = C.call_dd_cd(12.5, -3.25) + assert(c.re == 12.5 and c.im == -3.25*2) + end + do + local c1 = ffi.new("complex", 12.5, -3.25) + local cz = C.call_cd(c1) + assert(cz.re == 12.5+1 and cz.im == -3.25-2) + end + do + local c1 = ffi.new("complex", 12.5, -3.25) + local c2 = ffi.new("complex", -17.125, 100.625) + local cz = C.call_cdcd(c1, c2) + assert(cz.re == 12.5-17.125 and cz.im == -3.25+100.625) + end + + do + local c = C.call_ff_cf(12.5, -3.25) + assert(c.re == 12.5 and c.im == -3.25*2) + end + do + local c1 = ffi.new("complex float", 12.5, -3.25) + local cz = C.call_cf(c1) + assert(cz.re == 12.5+1 and cz.im == -3.25-2) + end + do + local c1 = ffi.new("complex float", 12.5, -3.25) + local c2 = ffi.new("complex float", -17.125, 100.625) + local cz = C.call_cfcf(c1, c2) + assert(cz.re == 12.5-17.125 and cz.im == -3.25+100.625) + end +end + +-- structs +do + local s1 = ffi.new("s_ii", -42, 17) + local sz = C.call_sii(s1) + assert(s1.x == -42 and s1.y == 17) + assert(sz.x == -42 and sz.y == 17) +end + +do + local s1 = ffi.new("s_jj", 0x123456789abcdef0LL, -0x789abcde99887766LL) + local sz = C.call_sjj(s1) + assert(s1.x == 0x123456789abcdef0LL) + assert(s1.y == -0x789abcde99887766LL) + assert(sz.x == 0x123456789abcdef0LL) + assert(sz.y == -0x789abcde99887766LL) +end + +do + local s1 = ffi.new("s_ff", 12.5, -3.25) + local sz = C.call_sff(s1) + assert(s1.x == 12.5 and s1.y == -3.25) + assert(sz.x == 12.5 and sz.y == -3.25) +end + +do + local s1 = ffi.new("s_dd", 12.5, -3.25) + local sz = C.call_sdd(s1) + assert(s1.x == 12.5 and s1.y == -3.25) + assert(sz.x == 12.5 and sz.y == -3.25) +end + +do + local s1 = ffi.new("s_8i", -42, 17, 12345, 9987, -100, 11, 51, 0x12345678) + local sz = C.call_s8i(s1) + assert(s1.a+s1.b+s1.c+s1.d+s1.e+s1.f+s1.g+s1.h == -42+17+12345+9987-100+11+51+0x12345678) + assert(sz.a+sz.b+sz.c+sz.d+sz.e+sz.f+sz.g+sz.h == -42+17+12345+9987-100+11+51+0x12345678) +end + +do + local s1 = ffi.new("s_ii", -42, 17) + local s2 = ffi.new("s_ii", 0x12345, -98765) + local sz = C.call_siisii(s1, s2) + assert(s1.x == -42 and s1.y == 17) + assert(s2.x == 0x12345 and s2.y == -98765) + assert(sz.x == -42+0x12345 and sz.y == 17-98765) +end + +do + local s1 = ffi.new("s_ff", 12.5, -3.25) + local s2 = ffi.new("s_ff", -17.125, 100.625) + local sz = C.call_sffsff(s1, s2) + assert(s1.x == 12.5 and s1.y == -3.25) + assert(s2.x == -17.125 and s2.y == 100.625) + assert(sz.x == 12.5-17.125 and sz.y == -3.25+100.625) +end + +do + local s1 = ffi.new("s_dd", 12.5, -3.25) + local s2 = ffi.new("s_dd", -17.125, 100.625) + local sz = C.call_sddsdd(s1, s2) + assert(s1.x == 12.5 and s1.y == -3.25) + assert(s2.x == -17.125 and s2.y == 100.625) + assert(sz.x == 12.5-17.125 and sz.y == -3.25+100.625) +end + +do + local s1 = ffi.new("s_8i", -42, 17, 12345, 9987, -100, 11, 51, 0x12345678) + local s2 = ffi.new("s_8i", 99, 311, 98765, -51, 312, 97, 17, 0x44332211) + local sz = C.call_s8is8i(s1, s2) + assert(s1.a+s1.b+s1.c+s1.d+s1.e+s1.f+s1.g+s1.h == -42+17+12345+9987-100+11+51+0x12345678) + assert(s2.a+s2.b+s2.c+s2.d+s2.e+s2.f+s2.g+s2.h == 99+311+98765-51+312+97+17+0x44332211) + assert(sz.a+sz.b+sz.c+sz.d+sz.e+sz.f+sz.g+sz.h == -42+17+12345+9987-100+11+51+0x12345678 + 99+311+98765-51+312+97+17+0x44332211) + assert(sz.a == -42+99) + assert(sz.h == 0x12345678+0x44332211) +end + +do + local s1 = ffi.new("s_8i", -42, 17, 12345, 9987, -100, 11, 51, 0x12345678) + local sz = C.call_is8ii(19, s1, -51) + assert(s1.a+s1.b+s1.c+s1.d+s1.e+s1.f+s1.g+s1.h == -42+17+12345+9987-100+11+51+0x12345678) + assert(sz.a+sz.b+sz.c+sz.d+sz.e+sz.f+sz.g+sz.h == -42+17+12345+9987-100+11+51+0x12345678 + 19-51) + assert(sz.a == -42+19) + assert(sz.c == 12345-51) +end + +-- target-specific +if jit.arch == "x86" then + assert(C.fastcall_void() == 1) + assert(C.fastcall_i(-42) == -41) + assert(C.fastcall_ii(-42, 17) == -42+17) + assert(C.fastcall_iii(-42, 17, 139) == -42+17+139) + assert(tonumber(C.fastcall_ji(0x123456789LL, -17)) == tonumber(0x123456789LL-17)) + assert(C.fastcall_dd(12.5, -3.25) == 12.5-3.25) + + do + local a = ffi.new("int[10]", -42) + assert(C.fastcall_pp_i(a+8, a+5) == 3) + end + + do + local s1 = ffi.new("s_ii", -42, 17) + local s2 = ffi.new("s_ii", 0x12345, -98765) + local sz = C.fastcall_siisii(s1, s2) + assert(s1.x == -42 and s1.y == 17) + assert(s2.x == 0x12345 and s2.y == -98765) + assert(sz.x == -42+0x12345 and sz.y == 17-98765) + end + + do + local s1 = ffi.new("s_dd", 12.5, -3.25) + local s2 = ffi.new("s_dd", -17.125, 100.625) + local sz = C.fastcall_sddsdd(s1, s2) + assert(s1.x == 12.5 and s1.y == -3.25) + assert(s2.x == -17.125 and s2.y == 100.625) + assert(sz.x == 12.5-17.125 and sz.y == -3.25+100.625) + end + + if jit.os == "Windows" then + assert(C.stdcall_i(-42) == -41) + assert(C.stdcall_ii(-42, 17) == -42+17) + assert(C.stdcall_dd(12.5, -3.25) == 12.5-3.25) + assert(C.stdcall_ff(12.5, -3.25) == 12.5-3.25) + end +end + diff --git a/testsuite/test/lib/ffi/ffi_callback.lua b/testsuite/test/lib/ffi/ffi_callback.lua new file mode 100644 index 0000000000..1fd14bd0d2 --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_callback.lua @@ -0,0 +1,158 @@ + +local ffi = require("ffi") + +ffi.cdef[[ +void qsort(void *base, size_t nmemb, size_t size, + int (*compar)(const uint8_t *, const uint8_t *)); +]] + +do + local cb = ffi.cast("int (*)(int, int, int)", function(a, b, c) + return a+b+c + end) + + assert(cb(10, 99, 13) == 122) + + -- Don't compile call to blacklisted function. + for i=1,200 do + if i > 60 then assert(cb(10, 99, 13) == 122) end + end +end + +do + assert(ffi.cast("int64_t (*)(int64_t, int64_t, int64_t)", function(a, b, c) + return a+b+c + end)(12345678901234567LL, 70000000000000001LL, 10000000909090904LL) == + 12345678901234567LL+70000000000000001LL+10000000909090904LL) + + assert(ffi.cast("double (*)(double, float, double)", function(a, b, c) + return a+b+c + end)(7.125, -123.25, 9999.33) == 7.125-123.25+9999.33) + + assert(ffi.cast("double (*)(int, double)", function(a, b) + return a+b + end)(12345, 7.125) == 12345 + 7.125) + + assert(ffi.cast("float (*)(double, float, double)", function(a, b, c) + return a+b+c + end)(7.125, -123.25, 9999.33) == 9883.205078125) + + assert(ffi.cast("int (*)(int, int, int, int, int, int, int, int, int, int)", + function(a, b, c, d, e, f, g, h, i, j) + return a+b+c+d+e+f+g+h+i+j + end)(-42, 17, 12345, 9987, -100, 11, 51, 0x12345678, 338, -78901234) == + -42+17+12345+9987-100+11+51+0x12345678+338-78901234) + + assert(ffi.cast("double (*)(double, double, double, double, double, double, double, double, double, double)", + function(a, b, c, d, e, f, g, h, i, j) + return a+b+c+d+e+f+g+h+i+j + end)(-42.5, 17.125, 12345.5, 9987, -100.625, 11, 51, 0x12345678, 338, -78901234.75) == + -42.5+17.125+12345.5+9987-100.625+11+51+0x12345678+338-78901234.75) +end + +-- Target-specific tests. +if jit.arch == "x86" then + assert(ffi.cast("__fastcall int (*)(int, int, int)", function(a, b, c) + return a+b+c + end)(10, 99, 13) == 122) + + assert(ffi.cast("__stdcall int (*)(int, int, int)", function(a, b, c) + return a+b+c + end)(10, 99, 13) == 122) + + -- Test reordering. + assert(ffi.cast("int64_t __fastcall (*)(int64_t, int, int)", function(a, b, c) + return a+b+c + end)(12345678901234567LL, 12345, 989797123) == + 12345678901234567LL+12345+989797123) +end + +-- Error handling. +do + local function f() + return + end -- Error for result conversion triggered here. + local ok, err = pcall(ffi.cast("int (*)(void)", f)) + assert(ok == false) + assert(string.match(err, ":"..debug.getinfo(f, "S").lastlinedefined..":")) + + assert(pcall(ffi.cast("int (*)(void)", function() end)) == false) + assert(pcall(ffi.cast("int (*)(void)", function() error("test") end)) == false) + assert(pcall(ffi.cast("int (*)(void)", function(a) return a+1 end)) == false) + + assert(pcall(ffi.cast("int (*)(int,int,int,int, int,int,int,int, int)", function() error("test") end), 1,1,1,1, 1,1,1,1, 1) == false) + assert(pcall(ffi.cast("int (*)(int,int,int,int, int,int,int,int, int)", function() error("test") end), 1,1,1,1, 1,1,1,1, 1) == false) +end + +do + local function cmp(pa, pb) + local a, b = pa[0], pb[0] + if a < b then + return -1 + elseif a > b then + return 1 + else + return 0 + end + end + + local arr = ffi.new("uint8_t[?]", 256) + for i=0,255 do arr[i] = math.random(0, 255) end + ffi.C.qsort(arr, 256, 1, cmp) + for i=0,254 do assert(arr[i] <= arr[i+1]) end +end + +if ffi.abi"win" then + ffi.cdef[[ + typedef int (__stdcall *WNDENUMPROC)(void *hwnd, intptr_t l); + int EnumWindows(WNDENUMPROC func, intptr_t l); + int SendMessageA(void *hwnd, uint32_t msg, int w, intptr_t l); + enum { WM_GETTEXT = 13 }; + ]] + + local C = ffi.C + local buf = ffi.new("char[?]", 256) + local lbuf = ffi.cast("intptr_t", buf) + local count = 0 + C.EnumWindows(function(hwnd, l) + if C.SendMessageA(hwnd, C.WM_GETTEXT, 255, lbuf) ~= 0 then + count = count + 1 + end + return true + end, 0) + assert(count > 10) +end + +do + local cb = ffi.cast("int(*)(void)", function() return 1 end) + assert(cb() == 1) + cb:free() + assert(pcall(cb) == false) + assert(pcall(cb.free, cb) == false) + assert(pcall(cb.set, cb, function() end) == false) + cb = ffi.cast("int(*)(void)", function() return 2 end) + assert(cb() == 2) + cb:set(function() return 3 end) + assert(cb() == 3) +end + +do + local ft = ffi.typeof("void(*)(void)") + local function f() end + local t = {} + for i=1,4 do + for i=1,400 do t[i] = ft(f) end + for i=1,400 do t[i]:free() end + end +end + +do + assert(ffi.cast("int (*)()", function() return string.byte"A" end)() == 65) +end + +do + local f = ffi.cast("void (*)(void)", function() debug.traceback() end) + debug.sethook(function() debug.sethook(nil, "", 0); f() end, "", 1) + local x +end + diff --git a/testsuite/test/lib/ffi/ffi_const.lua b/testsuite/test/lib/ffi/ffi_const.lua new file mode 100644 index 0000000000..d42133ad94 --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_const.lua @@ -0,0 +1,113 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") + +ffi.cdef[[ +typedef struct s_t { + int v, w; +} s_t; + +typedef const s_t cs_t; + +typedef enum en_t { EE } en_t; + +typedef struct pcs_t { + int v; + const int w; +} pcs_t; + +typedef struct foo_t { + static const int cc = 17; + enum { CC = -37 }; + int i; + const int ci; + int bi:8; + const int cbi:8; + en_t e; + const en_t ce; + int a[10]; + const int ca[10]; + const char cac[10]; + s_t s; + cs_t cs; + pcs_t pcs1, pcs2; + const struct { + int ni; + }; + complex cx; + const complex ccx; + complex *cp; + const complex *ccp; +} foo_t; +]] + +do + local foo_t = ffi.typeof("foo_t") + local x = foo_t() + + -- constval + assert(x.cc == 17) + fails(function(x) x.cc = 1 end, x) + assert(x.CC == -37) + fails(function(x) x.CC = 1 end, x) + + -- fields + x.i = 1 + fails(function(x) x.ci = 1 end, x) + x.e = 1 + fails(function(x) x.ce = 1 end, x) + + -- bitfields + x.bi = 1 + fails(function(x) x.cbi = 1 end, x) + + -- arrays + do + local a = ffi.new("int[10]") + a[0] = 1 + local ca = ffi.new("const int[10]") + fails(function(ca) ca[0] = 1 end, ca) + end + x.a[0] = 1 + fails(function(x) x.ca[0] = 1 end, x) + fails(function(x) x.a = x.ca end, x) -- incompatible type + fails(function(x) x.ca = x.a end, x) + fails(function(x) x.ca = {} end, x) + fails(function(x) x.cac = "abc" end, x) + + -- structs + do + local s = ffi.new("s_t") + s.v = 1 + local cs = ffi.new("cs_t") + fails(function(cs) cs.v = 1 end, cs) + end + x.s.v = 1 + fails(function(x) x.cs.v = 1 end, x) + x.s = x.cs + fails(function(x) x.cs = x.s end, x) + fails(function(x) x.cs = {} end, x) + + -- pseudo-const structs + x.pcs1.v = 1 + fails(function(x) x.pcs1.w = 1 end, x) + fails(function(x) x.pcs1 = x.pcs2 end, x) + fails(function(x) x.pcs1 = {} end, x) + + -- transparent structs + local y = x.ni + fails(function(x) x.ni = 1 end, x) + + -- complex subtype is implicitly const and doesn't inherit const attribute + x.cx = 1 + fails(function(x) x.ccx = 1 end, x) + do + local cxa = ffi.new("complex[1]") + local ccxa = ffi.new("const complex[1]") + x.cp = cxa + x.ccp = cxa + fails(function(x) x.cp = ccxa end, x) + x.ccp = ccxa + end +end + diff --git a/testsuite/test/lib/ffi/ffi_convert.lua b/testsuite/test/lib/ffi/ffi_convert.lua new file mode 100644 index 0000000000..bd3fb1f903 --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_convert.lua @@ -0,0 +1,787 @@ +local ffi = require("ffi") + +local ctest = require("ctest") + +dofile("../common/ffi_util.inc") + +local tonumber = tonumber + +ffi.cdef[[ +typedef struct bar_t { + int v, w; +} bar_t; +// Same structure, but treated as different struct. +typedef struct barx_t { + int v, w; +} barx_t; + +typedef struct nest_t { + int a,b; + struct { int c,d; }; + struct { int e1,e2; } e; + int f[2]; +} nest_t; + +typedef union uni_t { + int8_t a; + int16_t b; + int32_t c; +} uni_t; + +typedef struct arrinc_t { + int a[]; +} arrinc_t; + +typedef enum uenum_t { + UE0, UE71 = 71, UE72 +} uenum_t; + +typedef enum ienum_t { + IE0, IEM12 = -12, IEM11 +} ienum_t; + +typedef struct foo_t { + bool b; + int8_t i8; + uint8_t u8; + int16_t i16; + uint16_t u16; + int32_t i32; + uint32_t u32; + int64_t i64; + uint64_t u64; + float f; + double d; + complex cf; + complex cd; + uint8_t __attribute__((mode(__V16QI__))) v16qi; + int __attribute__((mode(__V4SI__))) v4si; + double __attribute__((mode(__V2DF__))) v2df; + int *pi; + int *__ptr32 p32i; + const int *pci; + volatile int *pvi; + int **ppi; + const int **ppci; + void **ppv; + char *(*ppf)(char *, const char *); + int ai[10]; + int ai_guard; + int ai2[10]; + char ac[10]; + char ac_guard; + bar_t s; + bar_t s2; + bar_t *ps; + const bar_t *pcs; + barx_t sx; + struct { int a,b,c; } si; + int si_guard; + nest_t sn; + uni_t ui; + uenum_t ue; + ienum_t ie; +} foo_t; + +char *strcpy(char *dest, const char *src); +typedef struct FILE FILE; +int fileno(FILE *stream); +int _fileno(FILE *stream); +]] + +do + local foo_t = ffi.typeof("foo_t") + local sz = ffi.sizeof(foo_t) + local x = foo_t() + local y = foo_t() + ffi.fill(x, sz, 0xff) + ffi.fill(y, sz, 0xee) + + -- unknown member + fails(function(x) local a = x.bad end, x) + fails(function(x) x.bad = 1 end, x) + -- too many initializers + fails(function(x) x.d = ffi.new("double", 1,2) end, x) + + -- conversions to bool + x.b = false + assert(x.b == false) + x.b = true + assert(x.b == true) + x.b = 0 + assert(x.b == false) + x.b = 10 + assert(x.b == true) + y.b = false + x.b = y.b + assert(x.b == false) + x.b = ffi.new("bool", true) + assert(x.b == true) + x.b = ffi.cast("bool", false) + assert(x.b == false) + x.b = ffi.new("int32_t", 17) + assert(x.b == true) + x.b = ffi.new("int32_t", 0) + assert(x.b == false) + + -- conversions from bool + x.i32 = true + assert(x.i32 == 1) + x.i32 = false + assert(x.i32 == 0) + x.i8 = ffi.new("bool", true) + assert(x.i8 == 1) + x.i8 = ffi.new("bool", false) + assert(x.i8 == 0) + x.d = true + assert(x.d == 1) + x.d = ffi.new("bool", false) + assert(x.d == 0) + -- assignment of bool to other types is not allowed + fails(function(x) x.cd = true end, x) + fails(function(x) x.v4si = true end, x) + fails(function(x) x.ai = true end, x) + fails(function(x) x.s = true end, x) + + -- int to int conversions + x.i8 = 99 + assert(x.i8 == 99) + x.i8 = -99 + assert(x.i8 == -99) + x.i8 = 128 + assert(x.i8 == -128) + x.i8 = 0xfffe + assert(x.i8 == -2) + y.i8 = 91 + x.i8 = y.i8 + assert(x.i8 == 91) + x.i8 = ffi.new("uint8_t", 0xb7) + assert(x.i8 == -73) + x.i8 = ffi.new("int16_t", 0x7fa0) + assert(x.i8 == -96) + x.i8 = ffi.new("int32_t", 0xff91) + assert(x.i8 == -111) + x.i8 = ffi.new("int64_t", 0xff81) + assert(x.i8 == -127) + + x.u8 = 99 + assert(x.u8 == 99) + x.u8 = -99 + assert(x.u8 == 256-99) + x.u8 = 128 + assert(x.u8 == 128) + x.u8 = 0xfffe + assert(x.u8 == 0xfe) + x.u8 = ffi.new("int8_t", -73) + assert(x.u8 == 0xb7) + x.u8 = ffi.new("int16_t", 0x7fa0) + assert(x.u8 == 0xa0) + x.u8 = ffi.new("int32_t", 0xff91) + assert(x.u8 == 0x91) + x.u8 = ffi.new("int64_t", 0xff81) + assert(x.u8 == 0x81) + + x.i16 = 99 + assert(x.i16 == 99) + x.i16 = -99 + assert(x.i16 == -99) + x.i16 = 32768 + assert(x.i16 == -32768) + x.i16 = 0xffffffe + assert(x.i16 == -2) + x.i16 = ffi.new("int8_t", -10) + assert(x.i16 == -10) + x.i16 = ffi.new("uint8_t", 254) + assert(x.i16 == 254) + x.i16 = ffi.new("uint16_t", 0xefa0) + assert(x.i16 == 0xefa0-65536) + x.i16 = ffi.new("int32_t", 0xffe291) + assert(x.i16 == 0xe291-65536) + x.i16 = ffi.new("int64_t", 0xffd481) + assert(x.i16 == 0xd481-65536) + + x.u16 = 99 + assert(x.u16 == 99) + x.u16 = -99 + assert(x.u16 == 65536-99) + x.u16 = 32768 + assert(x.u16 == 32768) + x.u16 = 0xffffffe + assert(x.u16 == 65534) + x.u16 = ffi.new("int8_t", -10) + assert(x.u16 == 65536-10) + x.u16 = ffi.new("uint8_t", 254) + assert(x.u16 == 254) + x.u16 = ffi.new("int16_t", 0xefa0-65536) + assert(x.u16 == 0xefa0) + x.u16 = ffi.new("int32_t", 0xffe291) + assert(x.u16 == 0xe291) + x.u16 = ffi.new("int64_t", 0xffd481) + assert(x.u16 == 0xd481) + + x.i32 = 99 + assert(x.i32 == 99) + x.i32 = -99 + assert(x.i32 == -99) + -- double to int conversion for values >= 0x80000000 is undefined + x.i32 = ffi.new("int8_t", -10) + assert(x.i32 == -10) + x.i32 = ffi.new("uint8_t", 254) + assert(x.i32 == 254) + x.i32 = ffi.new("int16_t", -517) + assert(x.i32 == -517) + x.i32 = ffi.new("uint16_t", 35876) + assert(x.i32 == 35876) + x.i32 = ffi.new("uint32_t", 0xffffe291) + assert(x.i32 == 0xffffe291-2^32) + x.i32 = ffi.new("int64_t", 15*2^32-317) + assert(x.i32 == -317) + + x.u32 = 99 + assert(x.u32 == 99) + -- x.u32 = -99 -- this is undefined on some architectures + -- assert(x.u32 == 2^32-99) + x.u32 = 0x87654321 + assert(x.u32 == 0x87654321) + x.u32 = ffi.new("int8_t", -10) + assert(x.u32 == 2^32-10) + x.u32 = ffi.new("uint8_t", 254) + assert(x.u32 == 254) + x.u32 = ffi.new("int16_t", -517) + assert(x.u32 == 2^32-517) + x.u32 = ffi.new("uint16_t", 35876) + assert(x.u32 == 35876) + x.u32 = ffi.new("int32_t", 0xffffe291-2^32) + assert(x.u32 == 0xffffe291) + x.u32 = ffi.new("int64_t", 15*2^32-317) + assert(x.u32 == 2^32-317) + + x.i64 = 99 + assert(tonumber(x.i64) == 99) + x.i64 = -99 + assert(tonumber(x.i64) == -99) + x.i64 = 0x1234*2^32+0x87654321 + assert(tonumber(x.i64) == 0x1234*2^32+0x87654321) + -- double to int64 conversion for values >= 2^63-1 is undefined + x.i64 = ffi.new("int8_t", -10) + assert(tonumber(x.i64) == -10) + x.i64 = ffi.new("uint8_t", 254) + assert(tonumber(x.i64) == 254) + x.i64 = ffi.new("int16_t", -517) + assert(tonumber(x.i64) == -517) + x.i64 = ffi.new("uint16_t", 35876) + assert(tonumber(x.i64) == 35876) + x.i64 = ffi.new("int32_t", -12345678) + assert(tonumber(x.i64) == -12345678) + x.i64 = ffi.new("uint32_t", 0xffeeddcc) + assert(tonumber(x.i64) == 0xffeeddcc) + x.i64 = ffi.new("uint64_t", 0xffeeddcc*2^32) + assert(tonumber(x.i64) == 0xffeeddcc*2^32-2^64) + + x.u64 = 99 + assert(tonumber(x.u64) == 99) + -- x.u64 = -99 -- this is undefined on some architectures + -- assert(tonumber(x.u64) == 2^64-99) + x.u64 = 0x1234*2^32+0x87654321 + assert(tonumber(x.u64) == 0x1234*2^32+0x87654321) + -- double to int64 conversion for values >= 2^63-1 is undefined + x.u64 = ffi.new("int8_t", -10) + assert(tonumber(x.u64) == 2^64-10) + x.u64 = ffi.new("uint8_t", 254) + assert(tonumber(x.u64) == 254) + x.u64 = ffi.new("int16_t", -517) + assert(tonumber(x.u64) == 2^64-517) + x.u64 = ffi.new("uint16_t", 35876) + assert(tonumber(x.u64) == 35876) + x.u64 = ffi.new("int32_t", -12345678) + assert(tonumber(x.u64) == 2^64-12345678) + x.u64 = ffi.new("uint32_t", 0xffeeddcc) + assert(tonumber(x.u64) == 0xffeeddcc) + x.u64 = ffi.new("int64_t", -0x7feeddcc*2^32) + assert(tonumber(x.u64) == 2^64-0x7feeddcc*2^32) + + -- FP to int conversions, test for truncation + x.i32 = 1.9 + assert(x.i32 == 1) + x.i32 = 2.9 + assert(x.i32 == 2) + x.i32 = -1.9 + assert(x.i32 == -1) + x.i32 = -2.9 + assert(x.i32 == -2) + x.i8 = 1.9 + assert(x.i8 == 1) + x.u8 = 1.9 + assert(x.u8 == 1) + x.i16 = 1.9 + assert(x.i16 == 1) + x.u16 = 1.9 + assert(x.u16 == 1) + x.u32 = 1.9 + assert(x.u32 == 1) + x.u64 = 1.9 + assert(tonumber(x.u64) == 1) + + -- int to FP conversions (most tested above) + x.f = ffi.new("int32_t", -17) + assert(x.f == -17) + x.d = ffi.new("int32_t", -17) + assert(x.d == -17) + -- test for rounding due to precision loss + x.f = -1717986919 + assert(x.f == -1717986944) + x.f = ffi.new("int32_t", 0x77777777) + assert(x.f == 0x77777780) + x.d = ffi.new("union { uint32_t u32[2]; uint64_t u64; }", + {{ 0x77777777, 0x77777777}}).u64 + assert(x.d == 0x77777777*2^32 + 0x77777800) + + -- complex initialization + x.cd = ffi.new("complex", 9.125, -78.5) + assert(x.cd.re == 9.125 and x.cd.im == -78.5) + x.cd = ffi.new("complex", {9.125, -78.5}) + assert(x.cd.re == 9.125 and x.cd.im == -78.5) + -- too many initializers + fails(function(x) x.cd = ffi.new("complex", 1,2,3) end, x) + + -- conversions between FP and complex + x.cf = -17.25 + assert(x.cf.re == -17.25 and x.cf.im == 0) + x.cf = ffi.new("complex float", -57.5) -- missing initializer + assert(x.cf.re == -57.5 and x.cf.im == 0) + x.cf = ffi.new("complex float", 9.125, -78.5) + assert(x.cf.re == 9.125 and x.cf.im == -78.5) + x.cf = ffi.new("complex double", 9.125, -78.5) + assert(x.cf.re == 9.125 and x.cf.im == -78.5) + + x.cd = -17.25 + assert(x.cd.re == -17.25 and x.cd.im == 0) + x.cd = ffi.new("complex double", -57.5) -- missing initializer + assert(x.cd.re == -57.5 and x.cd.im == 0) + x.cd = ffi.new("complex float", 9.125, -78.5) + assert(x.cd.re == 9.125 and x.cd.im == -78.5) + x.cd = ffi.new("complex double", 9.125, -78.5) + assert(x.cd.re == 9.125 and x.cd.im == -78.5) + + x.f = ffi.new("complex float", 9.125, -78.5) + assert(x.f == 9.125) + x.f = ffi.new("complex double", 9.125, -78.5) + assert(x.f == 9.125) + + x.d = ffi.new("complex float", 9.125, -78.5) + assert(x.d == 9.125) + x.d = ffi.new("complex double", 9.125, -78.5) + assert(x.d == 9.125) + + -- conversions between int and complex + x.cd = ffi.new("int32_t", -138) + assert(x.cd.re == -138 and x.cd.im == 0) + x.i32 = ffi.new("complex", 9.125, -78.5) + assert(x.i32 == 9) + + -- vector initialization + x.v4si = ffi.new("int __attribute__((mode(__V4SI__)))", 1, 2, 3, 4) + assert(x.v4si[0] == 1 and x.v4si[1] == 2 and + x.v4si[2] == 3 and x.v4si[3] == 4) + x.v2df = ffi.new("double __attribute__((mode(__V2DF__)))", {3.5, -6.75}) + assert(x.v2df[0] == 3.5 and x.v2df[1] == -6.75) + -- too many initializers + fails(function(x) + x.v4si = ffi.new("int __attribute__((mode(__V4SI__)))", 1,2,3,4,5) + end, x) + + -- conversions to vectors + x.v4si = -17 + assert(x.v4si[0] == -17 and x.v4si[1] == -17 and + x.v4si[2] == -17 and x.v4si[3] == -17) + x.v4si = ffi.new("int32_t", 712) + assert(x.v4si[0] == 712 and x.v4si[1] == 712 and + x.v4si[2] == 712 and x.v4si[3] == 712) + x.v2df = 12.5 + assert(x.v2df[0] == 12.5 and x.v2df[1] == 12.5) + x.v2df = ffi.new("complex", 9.125, -78.5) + assert(x.v2df[0] == 9.125 and x.v2df[1] == 9.125) + + -- assignment of same-sized but differently-typed vectors + x.v16qi = 99 + x.v4si = 0x33333333 + x.v16qi = x.v4si + assert(x.v16qi[0] == 0x33 and x.v16qi[15] == 0x33) + + -- string converted to enum + -- x.ue = -1 -- this is undefined on some architectures + -- assert(x.ue == 0xffffffff) + x.ue = "UE0" + assert(x.ue == 0) + x.ue = "UE72" + assert(x.ue == 72) + x.ie = -1 + assert(x.ie == -1) + x.ie = "IE0" + assert(x.ie == 0) + x.ie = "IEM11" + assert(x.ie == -11) + + x.pi = x.pi + -- assignment to pointer with higher qualifiers is ok + x.pci = x.pi + x.pvi = x.pi + -- assignment to pointer with lower qualifiers is not ok + fails(function(x) x.pi = x.pci end, x) + fails(function(x) x.pi = x.pvi end, x) + fails(function(x) x.pci = x.pvi end, x) + fails(function(x) x.pvi = x.pci end, x) + -- assignment of pointers with incompatible child types is not ok + fails(function(x) x.ppi = x.ai end, x) + fails(function(x) x.ppi = x.pi end, x) + fails(function(x) x.ppv = x.ppi end, x) + -- qualifiers of child types must match, higher qualifiers not ok + fails(function(x) x.ppci = x.ppi end, x) + fails(function(x) x.ppi = x.ppci end, x) + + -- pointer/int conversions are not allowed by default + fails(function(x) x.pi = 1 end, x) + fails(function(x) x.i32 = x.pi end, x) + assert(tonumber(x.pi) == nil) + assert(tonumber(x.ai) == nil) + assert(tonumber(x.si) == nil) + + -- but pointer/int casts are allowed + x.pi = ffi.cast("int *", ffi.new("int32_t", 0x12345678)) + x.i32 = ffi.cast("int32_t", x.pi) + assert(x.i32 == 0x12345678) + x.pi = ffi.cast("int *", 1234560.3) + x.i32 = ffi.cast("int32_t", x.pi) + assert(x.i32 == 1234560) + -- bad cast from non-TValue double to pointer + fails(function(x) + ffi.cast("int *", ffi.new("double", 1.5)) + end, x) + + -- nil sets a pointer to NULL + x.pi = nil + assert(tonumber(ffi.cast("uintptr_t", x.pi)) == 0) + + -- userdata and lightuserdata are treated as void * + do + local u = newproxy() + local uaddr = _G.tonumber(string.match(tostring(u), "(0x.*)")) + x.pi = u + assert(tonumber(ffi.cast("uintptr_t", x.pi)) == uaddr) + x.pi = ctest.lightud(12345678) + assert(tonumber(ffi.cast("uintptr_t", x.pi)) == 12345678) + end + + -- io.* file converts to file handle (as a void *) + if ffi.abi("win") then + assert(ffi.C._fileno(io.stdout) == 1) + assert(ffi.C._fileno(io.stderr) == 2) + local x + for i=1,100 do x = ffi.C._fileno(io.stderr) end + assert(x == 2) + else + assert(ffi.C.fileno(io.stdout) == 1) + assert(ffi.C.fileno(io.stderr) == 2) + local x + for i=1,100 do x = ffi.C.fileno(io.stderr) end + assert(x == 2) + end + + -- truncation/extension of __ptr32 + if ffi.abi("64bit") then + x.pi = ffi.cast("int *", 15*2^32+0x12345678) + assert(tonumber(ffi.cast("uintptr_t", x.pi)) == 15*2^32+0x12345678) + x.p32i = x.pi + assert(tonumber(ffi.cast("uintptr_t", x.p32i)) == 0x12345678) + x.pi = ffi.cast("int *", 0x1234*2^32+0x56780000) + x.pi = x.p32i + assert(tonumber(ffi.cast("uintptr_t", x.pi)) == 0x12345678) + end + + -- reference initialization + do + x.ai[0] = 712 + local ri = ffi.new("int &", x.ai) + assert(tonumber(ri) == 712) + local ra = ffi.new("int (&)[10]", ffi.cast("int (*)[10]", x.ai)) + assert(ra[0] == 712) + end + + -- ffi.sizeof follows references + assert(ffi.sizeof(x.ai) == 4*10) + -- ffi.offsetof follows references + assert(ffi.offsetof(x.s, "v") == 0) + assert(ffi.offsetof(x.s, "w") == 4) + + -- ffi.fill writes the right amount + ffi.fill(x.ai2, ffi.sizeof(x.ai2), 0x72) + ffi.fill(x.ai, ffi.sizeof(x.ai), 0x13) + assert(x.ai[0] == 0x13131313) + assert(x.ai[9] == 0x13131313) + assert(x.ai2[0] == 0x72727272) + assert(x.ai2[9] == 0x72727272) + + -- array cannot be assigned a pointer + fails(function(x) x.ai = x.pi end, x) + -- but pointer can be assigned the address of an array + x.pi = x.ai2 + assert(x.pi[0] == 0x72727272) + assert(x.pi[9] == 0x72727272) + x.pi = x.ai + assert(x.pi[0] == 0x13131313) + assert(x.pi[9] == 0x13131313) + x.ai = x.ai2 -- array copy + assert(x.ai[0] == 0x72727272) + assert(x.ai[9] == 0x72727272) + -- reflected via pointer, too + assert(x.pi[0] == 0x72727272) + assert(x.pi[9] == 0x72727272) + -- mismatched type or size in array copy + fails(function(x) x.ai = x.ac end, x) + fails(function(x) x.ai = ffi.new("int[20]") end, x) + fails(function(x) x.ai = ffi.new("arrinc_t").a end, x) + fails(function(x) ffi.new("arrinc_t").a = x.ai end, x) + + ffi.fill(x.s2, ffi.sizeof(x.s2), 0x59) + x.s.v = 0x12345678 + x.s.w = 0x789abcde + assert(x.s.v == 0x12345678) + assert(x.s.w == 0x789abcde) + + -- struct cannot be assigned a pointer + fails(function(x) x.s = x.ps end, x) + -- but pointer can be assigned the address of a struct + x.ps = x.s + assert(x.ps.v == 0x12345678) + assert(x.ps.w == 0x789abcde) + x.pcs = x.s + assert(x.pcs.v == 0x12345678) + assert(x.pcs.w == 0x789abcde) + x.s = x.s2 -- struct copy + assert(x.s.v == 0x59595959) + assert(x.s.w == 0x59595959) + -- reflected via pointer, too + assert(x.ps.v == 0x59595959) + assert(x.ps.w == 0x59595959) + + -- structs must be identical, structural equivalence is not enough + fails(function(x) x.ps = x.sx end, x) + fails(function(x) x.s = x.sx end, x) + + -- string copy to arrays + x.ac_guard = 99 + ffi.fill(x.ac, 10, 0x37) + x.ac = "ABCD" + assert(x.ac[0] == 65+0) + assert(x.ac[3] == 65+3) + assert(x.ac[4] == 0) + assert(x.ac[5] == 0x37) + x.ac = "ABCDEFGHI" + assert(x.ac[8] == 65+8) + assert(x.ac[9] == 0) + x.ac = "ABCDEFGHIJ" -- reduced size + assert(x.ac[8] == 65+8) + assert(x.ac[9] == 65+9) + x.ac = "ABCDEFGHIJKLM" + assert(x.ac[8] == 65+8) + assert(x.ac[9] == 65+9) + do -- copy to a[?] + local vx = ffi.new("struct { char ac[?]; }", 20) + ffi.fill(vx.ac, 20, 0x37) + vx.ac = "ABCDEFGHI" + assert(vx.ac[8] == 65+8) + assert(vx.ac[9] == 0) + end + do -- copy to a[0] + local vx = ffi.new("union { char ac[0]; char c[20]; }") + ffi.fill(vx.ac, 20, 0x37) + vx.ac = "ABCDEFGHI" + assert(vx.ac[8] == 65+8) + assert(vx.ac[9] == 0) + end + -- mismatched type or size in string copy + fails(function(x) x.i32 = "ABCD" end, x) + fails(function(x) x.ai = "ABCD" end, x) + assert(x.ac_guard == 99) -- Check guard + + -- array initialization + x.ai = ffi.new("int[10]") -- zero fill + for i=0,9 do assert(x.ai[i] == 0) end + x.ai = ffi.new("int[10]", -67) -- replicate first element + for i=0,9 do assert(x.ai[i] == -67) end + x.ai = ffi.new("int[10]", 42, -27) -- remainder filled with zero + assert(x.ai[0] == 42) + assert(x.ai[1] == -27) + for i=2,9 do assert(x.ai[i] == 0) end + x.ai = ffi.new("int[10]", 1,2,3,4,5,6,7,8,9,10) + for i=0,9 do assert(x.ai[i] == i+1) end + x.ai = ffi.new("int[10]", {1,2,3,4,5,6,7,8,9,10}) + for i=0,9 do assert(x.ai[i] == i+1) end + -- VLA initialization + do + local v = ffi.new("int[?]", 4) + for i=0,3 do assert(v[i] == 0) end + local v = ffi.new("int[?]", 4, 833) + for i=0,3 do assert(v[i] == 833) end + local v = ffi.new("int[?]", 4, 12, -9) + assert(v[0] == 12 and v[1] == -9 and v[2] == 0 and v[3] == 0) + local v = ffi.new("int[?]", 4, 1,2,3,4) + assert(v[0] == 1 and v[1] == 2 and v[2] == 3 and v[3] == 4) + end + -- too many initializers + fails(function(x) x.ai = {1,2,3,4,5,6,7,8,9,10,11} end, x) + for i=0,9 do assert(x.ai[i] == i+1) end -- but it's partially executed + fails(function(x) + local v = ffi.new("int[?]", 4, 1,2,3,4,5) + end, x) + + -- struct initialization + x.sn = ffi.new("nest_t") -- zero fill + assert(x.sn.e.e2 == 0) + x.sn = ffi.new("nest_t", 1,2) -- remainder filled with zero + assert(x.sn.a == 1 and x.sn.b == 2 and x.sn.c == 0 and x.sn.d == 0) + assert(x.sn.e.e1 == 0 and x.sn.e.e2 == 0) + assert(x.sn.f[0] == 0 and x.sn.f[1] == 0) + x.sn = ffi.new("nest_t", 1,2,3,4,{5,6},{7,8}) -- multi-value init + assert(x.sn.a == 1 and x.sn.b == 2 and x.sn.c == 3 and x.sn.d == 4) + assert(x.sn.e.e1 == 5 and x.sn.e.e2 == 6) + assert(x.sn.f[0] == 7 and x.sn.f[1] == 8) + x.sn = ffi.new("nest_t", {1,2,3,4,{5,6},{7,8}}) -- single-value init + assert(x.sn.a == 1 and x.sn.b == 2 and x.sn.c == 3 and x.sn.d == 4) + assert(x.sn.e.e1 == 5 and x.sn.e.e2 == 6) + assert(x.sn.f[0] == 7 and x.sn.f[1] == 8) + -- VLS initialization + do + local v = ffi.new("struct { int x; int a[?]; }", 4) + assert(v.x == 0) + for i=0,3 do assert(v.a[i] == 0) end + local v = ffi.new("struct { int x; int a[?]; }", 4, 9, {833}) + assert(v.x == 9) + -- NYI: fill up VLA in VLS. currently seen as indefinite length + -- for i=0,3 do assert(v.a[i] == 833) end + assert(v.a[0] == 833 and v.a[1] == 0 and v.a[2] == 0 and v.a[3] == 0) + end + -- no multi-value init beyond first level + fails(function(x) + x.sn = ffi.new("nest_t", 1,2,3,4,5,6,7,8) + end, x) + -- too many initializers + fails(function(x) + x.sn = ffi.new("nest_t", 1,2,3,4,{5,6},{7,8}, 9) + end, x) + + -- union initialization + x.ui = ffi.new("uni_t") -- zero fill + assert(x.ui.a == 0 and x.ui.b == 0 and x.ui.c == 0) + x.ui = ffi.new("uni_t", 255) -- initialize first field, remainder is zero + if ffi.abi("le") then + assert(x.ui.a == -1 and x.ui.b == 255 and x.ui.c == 255) + else + assert(x.ui.a == -1 and x.ui.b == -256 and x.ui.c == -16777216) + end + -- too many initializers + fails(function(x) + x.sn = ffi.new("uni_t", 1,2) + end, x) + fails(function() + ffi.new("union { struct { int x; }; int y; }", 1,2) + end) + + -- table converted to array + ffi.fill(x.ai, ffi.sizeof(x.ai), 0x13) + x.ai_guard = 99 + x.ai = {} -- zero fill + for i=0,9 do assert(x.ai[i] == 0) end + x.ai = {42} -- replicate + for i=0,9 do assert(x.ai[i] == 42) end + x.ai = {[0] = -67} -- replicate from index 0 + for i=0,9 do assert(x.ai[i] == -67) end + x.ai = {42, -27} -- remainder filled with zero + assert(x.ai[0] == 42) + assert(x.ai[1] == -27) + for i=2,9 do assert(x.ai[i] == 0) end + assert(x.ai_guard == 99) -- Check guard + + -- table converted to struct + ffi.fill(x.si, ffi.sizeof(x.si), 0x74) + x.si_guard = 97 + -- convert from array part + x.si = {} -- zero fill + assert(x.si.a == 0 and x.si.b == 0 and x.si.c == 0) + x.si = {42, 18} -- fill fields in order + assert(x.si.a == 42 and x.si.b == 18 and x.si.c == 0) + x.si = {[0] = -67, 12} -- fill fields in order from index 0 + assert(x.si.a == -67 and x.si.b == 12 and x.si.c == 0) + x.si = {42, -27, 19, 8} -- too many initializers ignored + assert(x.si.a == 42 and x.si.b == -27 and x.si.c == 19) + -- convert from hash part + x.si = {b = 12} + assert(x.si.a == 0 and x.si.b == 12 and x.si.c == 0) + x.si = {b = 12, c = 85, a = 35} + assert(x.si.a == 35 and x.si.b == 12 and x.si.c == 85) + x.si = {b = 19, foo = 1, bar = 2} -- unknown initializers ignored + assert(x.si.a == 0 and x.si.b == 19 and x.si.c == 0) + x.si = {b = 12, 5, 6, 7} -- hash part ignored if array part exists + assert(x.si.a == 5 and x.si.b == 6 and x.si.c == 7) + assert(x.si_guard == 97) -- Check guard + + -- table converted to struct with transparent/nested structs and arrays + ffi.fill(x.sn, ffi.sizeof(x.sn), 0x74) + x.sn = {} -- zero fill + assert(x.sn.e.e2 == 0) + x.sn = {1,2,3,4,{5,6},{7,8}} + assert(x.sn.a == 1 and x.sn.b == 2 and x.sn.c == 3 and x.sn.d == 4) + assert(x.sn.e.e1 == 5 and x.sn.e.e2 == 6) + assert(x.sn.f[0] == 7 and x.sn.f[1] == 8) + x.sn = {c = 10, e = {11,12}, f = {13,14}} + assert(x.sn.a == 0 and x.sn.b == 0 and x.sn.c == 10 and x.sn.d == 0) + assert(x.sn.e.e1 == 11 and x.sn.e.e2 == 12) + assert(x.sn.f[0] == 13 and x.sn.f[1] == 14) + + -- table converted to union + ffi.fill(x.ui, ffi.sizeof(x.ui), 0x58) + x.ui = {} -- zero fill + assert(x.ui.a == 0 and x.ui.b == 0 and x.ui.c == 0) + x.ui = {255, -1, -1} -- only first initializer used + if ffi.abi("le") then + assert(x.ui.a == -1 and x.ui.b == 255 and x.ui.c == 255) + else + assert(x.ui.a == -1 and x.ui.b == -256 and x.ui.c == -16777216) + end + x.ui = {b = -1} -- initialize a specific element of the union + if ffi.abi("le") then + assert(x.ui.a == -1 and x.ui.b == -1 and x.ui.c == 65535) + else + assert(x.ui.a == -1 and x.ui.b == -1 and x.ui.c == -65536) + end + + -- copy constructor + do + x.s.v = 1; x.s.w = 2 + local s = ffi.new("bar_t", x.s) + assert(s.v == 1 and s.w == 2) + for i=0,9 do x.ai[i] = i end + local a = ffi.new("int[10]", x.ai) + for i=0,9 do assert(a[i] == i) end + end + + -- assignment to function pointer + x.ppf = ffi.C.strcpy +end + +do + collectgarbage() + local oc = collectgarbage("count") + local cd = ffi.new"struct { struct { int a; } x;}" + local function f(cd) + local x + for i=1,1e5 do x = cd.x end + end + for i=1,2 do + f(cd) + local nc = collectgarbage("count") + assert(nc < oc + 200, "GC step missing for cdata __index") + jit.off(f) + end +end + diff --git a/testsuite/test/lib/ffi/ffi_enum.lua b/testsuite/test/lib/ffi/ffi_enum.lua new file mode 100644 index 0000000000..e8e40ad084 --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_enum.lua @@ -0,0 +1,57 @@ + +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") + +ffi.cdef[[ +typedef enum enum_i { FOO_I = -1, II = 10 } enum_i; +typedef enum enum_u { FOO_U = 1, UU = 10 } enum_u; + +enum_i call_ei_i(int a) asm("call_i"); +enum_u call_eu_i(int a) asm("call_i"); +int call_i_ei(enum_i a) asm("call_i"); +int call_i_eu(enum_u a) asm("call_i"); +]] + +local C = ffi.load("../clib/ctest") + +do + + local t = ffi.new("enum_i[100]") + for i=0,99 do t[i] = "II" end + for i=0,99 do assert(t[i] == "II") end + for i=0,99 do assert(t[i] >= "II") end + for i=0,99 do t[i] = -10 end + for i=0,99 do assert(t[i] == -10) end + for i=0,99 do assert(t[i] ~= 2147483648) end + for i=1,99 do assert(t[i] == t[i-1]) end + assert(t[0]+1 == -9) + assert(t[0] ~= "BB") + fails(function() return t[0] > "BB" end) + + local u = ffi.new("enum_u[100]") + for i=0,99 do u[i] = "UU" end + for i=0,99 do assert(u[i] == "UU") end + for i=0,99 do assert(u[i] >= "UU") end + for i=0,99 do u[i] = 4294967296-10 end + for i=0,99 do assert(u[i] == 4294967296-10) end + for i=0,99 do assert(u[i] ~= -10) end + for i=1,99 do assert(u[i] == u[i-1]) end + assert(u[0]+1 == 4294967296-9) + + for i=0,99 do assert(t[i] ~= u[i]) end +end + +do + for i=0,99 do assert(C.call_ei_i(9) == "II") end + for i=0,99 do assert(C.call_eu_i(9) == "UU") end + for i=0,99 do assert(C.call_i_ei("II") == 11) end + for i=0,99 do assert(C.call_i_eu("UU") == 11) end +end + +do + local f = ffi.cast("bool (*)(enum_i)", function(e) return e == "II" end) + assert(f("II")) + assert(not f(0)) +end + diff --git a/testsuite/test/lib/ffi/ffi_gcstep_recursive.lua b/testsuite/test/lib/ffi/ffi_gcstep_recursive.lua new file mode 100644 index 0000000000..cb19df1141 --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_gcstep_recursive.lua @@ -0,0 +1,66 @@ +-- From Robert G. Jakabosky, 2012-03-20 + +local N=tonumber(arg[1] or 10000) + +local ffi=require"ffi" + +ffi.cdef[[ +struct Buffer { void *buf; }; +typedef struct Buffer Buffer; +]] + +local Buffer_mt = { __index = {} } +local Buffer = ffi.typeof("Buffer") + +-- used to track alive objects +local nobj_obj_flags = {} + +local function obj_to_id(ptr) + return tonumber(ffi.cast('uintptr_t', ffi.cast('void *', ptr))) +end + +function obj_type_Buffer_push(val) + local obj = Buffer(val) + local id = obj_to_id(obj) + nobj_obj_flags[id] = true + return obj +end + +local function Buffer_new(len) + local buf = ffi.cast('void *', 0xdeadbeef) + return obj_type_Buffer_push(buf) +end + +function obj_type_Buffer_delete(obj) + local id = obj_to_id(obj) + if not nobj_obj_flags[id] then return nil end + nobj_obj_flags[id] = nil + return obj.buf +end + +local getmeta = debug.getmetatable + +local function Buffer_close(self) + local buf = obj_type_Buffer_delete(self) + getmeta("Buffer_close") -- cause trace to abort + if buf then + self.buf = nil + end +end +Buffer_mt.__gc = Buffer_close +Buffer_mt.__index.close = Buffer_close + +ffi.metatype(Buffer, Buffer_mt) + +local cdata = {} +for x=1,2 do + cdata = {} + for i=1,N do + cdata[i] = Buffer_new(1) + end + for i=1,N do + cdata[i]:close() + end + cdata = nil +end + diff --git a/testsuite/test/lib/ffi/ffi_jit_arith.lua b/testsuite/test/lib/ffi/ffi_jit_arith.lua new file mode 100644 index 0000000000..0554fe60aa --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_jit_arith.lua @@ -0,0 +1,155 @@ +local ffi = require("ffi") + +do + local a = ffi.new("int64_t[?]", 101) + for i=1,100 do a[i] = -2 end + for i=1,100 do a[i] = i end + local x, y, m = 0ll, 0ll, 0ll + for i=1,100 do x = x + a[i]; y = y - a[i]; m = -a[i] end + assert(x == 5050) + assert(y == -5050) + assert(m == -100) + local z, z0 = 1ll, 3ll + for i=1,100 do z = a[i] * z0 end + assert(z == 300) + for i=1,100 do z = a[i] * 4ll end -- test MUL -> BSHL rule + assert(z == 400) + z, z0 = 1ll, 0x123456789abcdef0ll + for i=1,100 do z = z0 / a[i] end + assert(z == 0x123456789abcdef0ll / 100) + z, z0 = 1ll, 0x123456789abcdef0ll + for i=1,100 do z = z0 % a[i] end + assert(z == 0x123456789abcdef0ll % 100) + -- use multiple 64 bit PHIs + local t, u, v, w = 0ll, 0ll, 0ll, 0ll + for i=1,100 do t = t + a[i]; u = u + a[i]; v = v + a[i]; w = w + a[i] end + assert(t == 5050) + assert(u == 5050) + assert(v == 5050) + assert(w == 5050) +end + +do + local a = ffi.new("uint64_t[?]", 101) + for i=1,100 do a[i] = i end + local x, y, m = 0ull, 0ull, 0ull + for i=1,100 do x = x + a[i]; y = y - a[i]; m = -a[i] end + assert(x == 5050) + assert(y == 0ull-5050) + assert(m == -100ull) + local z, z0 = 1ull, 3ll + for i=1,100 do z = a[i] * z0 end + assert(z == 300) + z, z0 = 1ull, 0x123456789abcdef0ull + for i=1,100 do z = z0 / a[i] end + assert(z == 0x123456789abcdef0ull / 100) + z, z0 = 1ull, 0x123456789abcdef0ull + for i=1,100 do z = z0 % a[i] end + assert(z == 0x123456789abcdef0ull % 100) +end + +do + local x = 0ll + for i=1,100 do x = x + (-2ll) ^ (bit.band(i, 15)+1ll) end + assert(x == 262120) +end + +do + local x, a = 0ll, -2ll + for i=1,100 do x = x + a ^ (bit.band(i, 15)+1ll) end + assert(x == 262120) +end + +do + local x = 0ull + for i=1,100 do x = x + (-2ll) ^ (bit.band(i, 15)+1ull) end + assert(x == 262120) +end + +do + for i=1,200 do local j = bit.band(i, 7); assert((j == 0ll) == (j == 0)) end + for i=1,200 do assert((i < 100ll) == (i < 100)) end + for i=1,200 do assert((i <= 100ll) == (i <= 100)) end + for i=-100,100 do assert((i > 100ull) == (i < 0)) end +end + +do + local a = ffi.new("int64_t[?]", 100) + for i=0,99 do + a[i] = math.random(0, 2^32)*0x100000000LL + math.random(0, 2^32) + end + a[92] = 0x10000000LL + a[93] = 0x10000001LL + a[94] = math.random(0, 2^32) + a[95] = a[94] + 0x100000000LL + a[96] = a[94] + 0x100000001LL + a[97] = a[20] + a[98] = 0 + a[99] = -1 + + local function cksum(b) + local bxor, rol = bit.bxor, bit.rol + local x = 0 + for i=0,#b do x = rol(bxor(x, (b[i] and i or 0)), 7) end + return x + end + + local s = [[ + local a, b = ... + local k = 0 + for i=0,99 do + for j=0,99 do + b[k] = a[i] %s a[j] + k = k + 1 + end + end + ]] + + local ap = ffi.new("int64_t *", a) + local b = {} + for i=1,2 do + for _,cmp in ipairs{ "==", "~=", "<", "<=", ">", ">=" } do + local f = assert(loadstring(string.format(s, cmp), "operator"..cmp)) + f(ap, b) + local r1 = cksum(b) + jit.off(f) + f(ap, b) + local r2 = cksum(b) + assert(r1 == r2) + end + ap = ffi.new("uint64_t *", a) + end +end + +do + local a, b = ffi.new("char *"), ffi.new("char *") + local z + for i=1,100 do z = a-b end +end + +do + local x = true + local abc = ffi.cast("const char *", "abc") + for i=1,100 do x = abc == "abc" end + assert(x == true) + for i=1,100 do x = abc == "xyz" end + assert(x == false) + for i=1,100 do x = 0LL == "" end + assert(x == false) + for i=1,100 do x = 0LL == false end + assert(x == false) + for i=1,100 do x = 0LL == nil end + assert(x == false) +end + +-- ra_destpair +do + local x, y = 0, 0 + for i=1,100 do + x = x + i/3LL + y = y + i/5LL + end + assert(x == 1650) + assert(y == 970) +end + diff --git a/testsuite/test/lib/ffi/ffi_jit_call.lua b/testsuite/test/lib/ffi/ffi_jit_call.lua new file mode 100644 index 0000000000..b79d60b106 --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_jit_call.lua @@ -0,0 +1,154 @@ + +local ffi = require("ffi") + +ffi.cdef[[ +int call_10i(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j); +double call_10d(double a, double b, double c, double d, double e, double f, double g, double h, double i, double j); +float call_10f(float a, float b, float c, float d, float e, float f, float g, float h, float i, float j); +int64_t call_ij(int a, int64_t b); +bool call_b(int a) asm("call_i"); + +int64_t call_max(double,double,double,double,double,double,double,double,double,double,double,double,double,double,double,double,double) asm("call_10d"); + +int64_t call_10j_p(int a, int b, int c, int d, int e, int f, int g, int h, int i, const char *p) asm("call_10j"); + +int8_t call_i_i8(int a) asm("call_i"); +uint8_t call_i_u8(int a) asm("call_i"); +int16_t call_i_i16(int a) asm("call_i"); +uint16_t call_i_u16(int a) asm("call_i"); +int call_i8_i(int8_t a) asm("call_i"); +int call_u8_i(uint8_t a) asm("call_i"); +int call_i16_i(int16_t a) asm("call_i"); +int call_u16_i(uint16_t a) asm("call_i"); + +int __fastcall fastcall_void(void); +int __fastcall fastcall_i(int a); +int __fastcall fastcall_ii(int a, int b); +int __fastcall fastcall_iii(int a, int b, int c); +int64_t __fastcall fastcall_ji(int64_t a, int b); +double __fastcall fastcall_dd(double a, double b); +int __fastcall fastcall_pp_i(int *a, int *b); + +int __stdcall stdcall_i(int a); +int __stdcall stdcall_ii(int a, int b); +double __stdcall stdcall_dd(double a, double b); +float __stdcall stdcall_ff(float a, float b); +]] + +local lib = ffi.load("../clib/ctest") + +do + local x + for i=1,100 do + x = lib.call_10i(-42, 17, 12345, 9987, -100, 11, 51, 0x12345678, 338, -78901234) + end + assert(x == -42+17+12345+9987-100+11+51+0x12345678+338-78901234) +end + +do + for i=1,100 do + pcall(lib.call_max, i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i) + end +end + +if ffi.abi("64bit") then + local y = ffi.cast("void *", 0x123456789abcdefLL) + local x + for i=1,100 do + lib.call_10j_p(0,0,0,0,0,0,0,0,0, y) + x = lib.call_10j_p(0,0,0,0,0,0,0,0,0, nil) + end + assert(x == 0) +end + +do + local x = 0 + for i=1,100 do + x = x + lib.call_ij(100+i, i*0x300000002LL) + end + assert(x == 0x3b2e0000623eLL) +end + +do + local x + for i=1,100 do + x = lib.call_10d(-42.5, 17.125, 12345.5, 9987, -100.625, 11, 51, 0x12345678, 338, -78901234.75) + end + assert(x == -42.5+17.125+12345.5+9987-100.625+11+51+0x12345678+338-78901234.75) +end + +do + local x + for i=1,100 do + x = lib.call_10f(-42.5, 17.125, 12345.5, 9987, -100.625, 11, 51, 0x123456, 338, -789012.75) + end + assert(x == -42.5+17.125+12345.5+9987-100.625+11+51+0x123456+338-789012.75) +end + +do + local x + for i=-100,100 do + if not lib.call_b(i) then x = i end + end + assert(x == -1) + local t = {} + for i=1,100 do t[i] = -1 end + t[90] = 0 + for i=1,100 do + if lib.call_b(t[i]) then x = i end + end + assert(x == 90) +end + +do + local function tail(x) + return lib.call_b(x) + end + for i=1,100 do local a,b,c = tail(1), tail(1), tail(1) end +end + +do + local x = 0 + for i=0x01010080,0x010100ff do x = x + lib.call_i_i8(i) end + assert(x == -8128) + x = 0 + for i=0x01010080,0x010100ff do x = x + lib.call_i_u8(i) end + assert(x == 24384) + x = 0 + for i=0x0101ff80,0x0101ffff do x = x + lib.call_i_i16(i) end + assert(x == -8128) + x = 0 + for i=0x0101ff80,0x0101ffff do x = x + lib.call_i_u16(i) end + assert(x == 8314944) + x = 0 + for i=0x01010080,0x010100ff do x = x + lib.call_i8_i(i) end + assert(x == -8128) + x = 0 + for i=0x01010080,0x010100ff do x = x + lib.call_u8_i(i) end + assert(x == 24640) + x = 0 + for i=0x0101ff80,0x0101ffff do x = x + lib.call_i16_i(i) end + assert(x == -8128) + x = 0 + for i=0x0101ff80,0x0101ffff do x = x + lib.call_u16_i(i) end + assert(x == 8380480) +end + +-- target-specific +if jit.arch == "x86" then + for i=1,100 do assert(lib.fastcall_i(-42) == -41) end + for i=1,100 do assert(lib.fastcall_ii(-42, 17) == -42+17) end + for i=1,100 do assert(lib.fastcall_iii(-42, 17, 139) == -42+17+139) end + for i=1,100 do assert(lib.fastcall_ji(0x123456789LL, -17) == 0x123456789LL-17) end + for i=1,100 do assert(lib.fastcall_dd(12.5, -3.25) == 12.5-3.25) end + local x = lib.fastcall_ji + for i=1,100 do assert(x(0x123456789LL, -17) == 0x123456789LL-17) end + + if jit.os == "Windows" then + for i=1,100 do assert(lib.stdcall_i(-42) == -41) end + for i=1,100 do assert(lib.stdcall_ii(-42, 17) == -42+17) end + for i=1,100 do assert(lib.stdcall_dd(12.5, -3.25) == 12.5-3.25) end + for i=1,100 do assert(lib.stdcall_ff(12.5, -3.25) == 12.5-3.25) end + end +end + diff --git a/testsuite/test/lib/ffi/ffi_jit_conv.lua b/testsuite/test/lib/ffi/ffi_jit_conv.lua new file mode 100644 index 0000000000..d4707db7a8 --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_jit_conv.lua @@ -0,0 +1,277 @@ +local ffi = require("ffi") + +local ctest = require("ctest") + +do + local s = ffi.new("struct { int32_t x; }") + s.x = -0x12345678 + for i=1,100 do + s.x = s.x + 1 -- narrowed + end + assert(s.x == -0x12345678+100) +end + +do + local s = ffi.new("struct { uint32_t x; }") + s.x = 0x81234567 + for i=1,100 do + s.x = s.x + 1 -- CONV.num.u32, CONV.u32.num (no narrowing yet) + end + assert(s.x == 0x81234567+100) +end + +do + local s = ffi.new("struct { int8_t x; }") + s.x = 42 + for i=1,100 do + s.x = s.x + 1 + assert(s.x >= -128 and s.x <= 127) -- fwd -> CONV.int.i8 + end + assert(s.x == 142-256) +end + +do + local s = ffi.new("struct { uint8_t x; }") + s.x = 200 + for i=1,100 do + s.x = s.x + 1 + assert(s.x >= 0 and s.x <= 255) -- fwd -> CONV.int.u8 + end + assert(s.x == 300-256) +end + +do + local s = ffi.new("struct { int16_t x; }") + s.x = 32700 + for i=1,100 do + s.x = s.x + 1 + assert(s.x >= -32768 and s.x <= 32767) -- fwd -> CONV.int.i16 + end + assert(s.x == 32800-65536) +end + +do + local s = ffi.new("struct { uint16_t x; }") + s.x = 65450 + for i=1,100 do + s.x = s.x + 1 + assert(s.x >= 0 and s.x <= 65535) -- fwd -> CONV.int.u16 + end + assert(s.x == 65550-65536) +end + +do + local s = ffi.new("union { int32_t x; uint32_t y; }") + s.x = 0x7fffffff - 60 + local x,y = 0,0 + for i=1,100 do + if s.x == 0x7fffffff then s.x = -0x80000000 else s.x = s.x + 1 end + x = x + s.x -- fwd -> CONV.num.int + y = y + s.y -- fwd -> CONV.num.u32 + end + assert(s.x == 0x7fffffff - 60 + 100 - 2^32) + assert(s.y == 0x7fffffff - 60 + 100) + assert(y == (0x7fffffff - 60) * 100 + 5050) + assert(x == y - 40*2^32) +end + +do + local s = ffi.new("union { int32_t x; uint32_t y; }") + local x, z = 0, 2^31 + 42 + for i=1,100 do + s.y = z + x = x + s.x -- fwd -> CONV.int.u32 (dummy) + end + assert(x == 100*(-2^31 + 42)) +end + +do + local s = ffi.new("union { int8_t x; uint8_t y; }") + s.x = 42 + local x,y = 0,0 + for i=1,100 do + s.x = s.x + 1 + x = x + s.x -- fwd -> CONV.int.i8, CONV.num.int + y = y + s.y -- fwd -> CONV.int.u8, CONV.num.int + end + assert(s.x == 142 - 256) + assert(s.y == 142) + assert(y == 42 * 100 + 5050) + assert(x == y - (100-(127-42))*256) +end + +do + local a = ffi.new("uint32_t[?]", 101) + for i=1,100 do a[i] = 0x80000000+i end + local x = 0 + for i=1,100 do + x = bit.bxor(x, a[i]) -- FOLD TOBIT + CONV.num.u32 + end + assert(x == 100) +end + +do + local a = ffi.new("uint32_t[?]", 101) + for i=1,100 do a[i] = 0x80000000+i end + local x = 0 + for i=1,100 do + x = bit.bxor(a[i], 0) -- FOLD TOBIT + CONV.num.u32 + end + assert(x == -0x80000000+100) +end + +do + local v = ffi.new("float", 12.5) + local x = 0 + for i=1,100 do + x = x + tonumber(v) -- CONV.num.flt + end + assert(x == 100*12.5) +end + +do + local v = ffi.new("uint32_t", 0x80000000) + local x = 0 + for i=1,100 do + x = x + tonumber(v) -- CONV.num.u32 + end + assert(x == 100*0x80000000) +end + +do + local v = ffi.new("int64_t", 0x1234567800000000ll) + local x = 0 + for i=1,100 do + x = x + tonumber(v) -- CONV.num.i64 + end + assert(x == 100*0x12345678*2^32) +end + +do + local v = ffi.new("uint64_t", 0x89abcdef00000000ull) + local x = 0 + for i=1,100 do + x = x + tonumber(v) -- CONV.num.u64 + end + assert(x == 100*0x89abcdef*2^32) +end + +do + local a = ffi.new("int64_t[?]", 101) + for i=1,100 do a[i] = -i end + local x = 0 + for i=1,100 do + x = x + tonumber(a[i]) -- CONV.num.i64 + end + assert(x == -5050) +end + +do + local a = ffi.new("uint64_t[?]", 101) + for i=1,100 do a[i] = 2^63+2^32*i end + local x = 0 + for i=1,100 do + x = x + tonumber(a[i]) -- CONV.num.u64 + end + assert(x == 2^63*100+2^32*5050) +end + +do + local v = ffi.new("complex", 12.5, -3.25) + local x = 0 + for i=1,100 do + x = x + tonumber(v) + end + assert(x == 100*12.5) +end + +do + local s = ffi.new("struct { int64_t x;}") + for i=1,100 do + s.x = 0x123456789abcdef0LL + end + assert(tonumber(s.x) == tonumber(0x123456789abcdef0LL)) +end + +do + local s = ffi.new("struct { uint64_t x;}") + for i=1,100 do + s.x = 0x823456789abcdef0ULL + end + assert(tonumber(s.x) == tonumber(0x823456789abcdef0ULL)) +end + +do + ffi.cdef[[ + typedef enum { AA, BB, CC = -42 } foo_i; + typedef enum { DD, EE, FF = 0x80000000u } foo_u; + ]] + local s = ffi.new("struct { foo_i x; foo_u y;}") + for i=1,100 do + s.x = "CC" + assert(s.x == -42) + s.x = "BB" + assert(s.x == 1) + s.y = "FF" + assert(s.y == 0x80000000) + end + local st = ffi.typeof(s) + for i=1,100 do s = st() end + assert(s.x == 0 and s.y == 0) + for i=1,100 do s = st("CC", "EE") end + assert(s.x == -42 and s.y == 1) + local ei = ffi.new("foo_i", "CC") + local eu = ffi.new("foo_u", "EE") + for i=1,100 do s = st(ei, eu) end + assert(s.x == -42 and s.y == 1) + local x + for i=1,100 do x = tonumber(ei) end + assert(x == -42) +end + +do + local s = ffi.new("struct { const char *x; const char *y;}") + local a, tmp = "abcd", "ab" + for i=1,100 do + s.x = "abc" + s.y = string.sub(a, 1, 2) + end + assert(ffi.string(s.x) == "abc") + assert(ffi.string(s.y) == "ab") +end + +do + local s = ffi.new("struct { bool b[200]; int i[200]; double d[200];}") + for i=0,199 do s.i[i] = i-100; s.d[i] = i-100 end + for i=0,99 do s.b[i] = 0 end + for i=100,199 do s.b[i] = 1 end + for i=0,99 do assert(s.b[i] == false) end + for i=100,199 do assert(s.b[i] == true) end + for i=0,199 do s.b[i] = s.i[i] end + for i=0,199 do assert(s.b[i] == (i ~= 100)) end + for i=0,199 do s.b[i] = s.d[i] end + for i=0,199 do assert(s.b[i] == (i ~= 100)) end +end + +do + local a = ffi.new("int16_t[100]", 1) + for i=1,99 do a[i] = a[i] + a[i-1] end + assert(a[99] == 100) +end + +do + local ud = ctest.lightud(12345678) + local s = ffi.new("struct { void *p; }") + for i=1,100 do + assert(ffi.cast("uintptr_t", ud) == 12345678) + s.p = ud + end + assert(ffi.cast("uintptr_t", s.p) == 12345678) +end + +do + local x = ffi.new("struct { int & x;}", ffi.new("int[1]", 42)) + local z + for i=1,100 do z = x.x end + assert(z == 42) +end diff --git a/testsuite/test/lib/ffi/ffi_lex_number.lua b/testsuite/test/lib/ffi/ffi_lex_number.lua new file mode 100644 index 0000000000..e26650effd --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_lex_number.lua @@ -0,0 +1,51 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") + +local function checklex(t) + for i=1,1e9,2 do + local s = t[i+1] + if not s then break end + local s2 = assert(loadstring("return tostring("..s..")"))() + if s2 ~= t[i] then + print(s2) + error("lexer failed for '"..s.."'", 2) + end + end +end + +checklex{ + "0LL", "0ll", + "0LL", "0LL", + "0ULL", "0ull", + "0ULL", "0ULl", + "18446744073709551615ULL", "18446744073709551615llu", + "9223372036854775807LL", "0x7fffffffffffffffll", + "9223372036854775808ULL", "0x8000000000000000ull", + "1311768467463790320LL", "0x123456789abcdef0ll", + "-1LL", "-1ll", + "18446744073709551615ULL", "-1ull", + "-9223372036854775807LL", "-0x7fffffffffffffffll", + "9223372036854775808ULL", "-0x8000000000000000ull", + "0+0i", "0i", + "0+0i", "0I", + "0+12.5i", "12.5i", + "0+4660i", "0x1234i", + "0+infI", "1e400i", + "0-infI", "-1e400i", + "0-12.5i", "-12.5i", + "0-0i", "-0i", +} + +checkfail({ + "0l", + "0lll", + "0u", + "0ul", + "0ulll", + "0wll", + "0xll", + ".0ll", + "0ii", +}, function(s) assert(loadstring("return "..s)) end) + diff --git a/testsuite/test/lib/ffi/ffi_metatype.lua b/testsuite/test/lib/ffi/ffi_metatype.lua new file mode 100644 index 0000000000..2db717f479 --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_metatype.lua @@ -0,0 +1,245 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") + +ffi.cdef[[ +typedef struct { int x; } idx1_t; +typedef struct { int x; } idx2_t; +typedef struct { int x; } idx3_t; +typedef struct { int x,y; } arith_t; +typedef struct { void *p; } gc_t; +]] + +local function ptreq(a, b) + return ffi.cast("void *", a) == ffi.cast("void *", b) +end + +do + local nidx = {} + local tp = ffi.metatype("idx1_t", { + __index = { foo = 99, method = function(c, v) return v end }, + __newindex = nidx, + }) + + fails(function() ffi.metatype("idx1_t", {}) end) + + local s = tp(1234) + assert(s.foo == 99) + assert(s.x == 1234) + -- bad field in __index metatable + fails(function(s) local x = s.bar end, s) + assert(s:method(123) == 123) + s.bar = 42 + assert(nidx.bar == 42) + + local cs = ffi.new("const idx1_t", 9876) + assert(cs.foo == 99) + assert(cs.x == 9876) + -- write to const struct + fails(function(cs) cs.bar = 42 end, cs) + + local cp = ffi.new("const idx1_t *", cs) + assert(cp.foo == 99) + assert(cp.x == 9876) + -- write to const struct pointer + fails(function(cp) cp.bar = 42 end, cp) +end + +do + local uc, uk, uv + local tp = ffi.metatype("idx2_t", { + __index = function(c, k, x, y) + assert(x == nil and y == nil) + uc, uk = c, k; return 99 + end, + __newindex = function(c, k, v) uc, uk, uv = c, k, v end, + }) + + local s = tp(1234) + assert(s.foo == 99) + assert(ptreq(uc, s) and uk == "foo" and uv == nil); uc,uk,uv=nil,nil,nil + assert(s.x == 1234) + assert(uc == nil and uk == nil and uv == nil); uc,uk,uv=nil,nil,nil + + s.bar = 42 + assert(ptreq(uc, s) and uk == "bar" and uv == 42); uc,uk,uv=nil,nil,nil + s[10] = 11 + assert(ptreq(uc, s) and uk == 10 and uv == 11); uc,uk,uv=nil,nil,nil + + local p = ffi.new("idx2_t *", s) + assert(p.foo == 99) + assert(ptreq(uc, p) and uk == "foo" and uv == nil); uc,uk,uv=nil,nil,nil + assert(p.x == 1234) + assert(uc == nil and uk == nil and uv == nil); uc,uk,uv=nil,nil,nil + -- pointer dereference has precedence + assert(ptreq(p[0], p)) + assert(uc == nil and uk == nil and uv == nil); uc,uk,uv=nil,nil,nil + -- pointer dereference has precedence + fails(function(p) p[0] = 11 end, p) +end + +do + local uc, uk, uv + local ti, tn = {}, {} + local tp = ffi.metatype("idx3_t", { + __index = setmetatable(ti, + { __index = function(c, k) uc, uk = c, k; return 99 end }), + __newindex = setmetatable(tn, + { __newindex = function(c, k, v) uc, uk, uv = c, k, v end }), + }) + + local s = tp(1234) + assert(s.foo == 99) + assert(uc == ti and uk == "foo" and uv == nil) + uc, uk, uv = nil, nil, nil + assert(s.x == 1234) + assert(uc == nil and uk == nil and uv == nil) + + s.bar = 42 + assert(uc == tn and uk == "bar" and uv == 42) + uc, uk, uv = nil, nil, nil + s[10] = 11 + assert(uc == tn and uk == 10 and uv == 11) + uc, uk, uv = nil, nil, nil +end + +do + local tp + tp = ffi.metatype("arith_t", { + __add = function(a, b) return tp(a.x+b.x, a.y+b.y) end, + __sub = function(a, b) return tp(a.x-b.x, a.y-b.y) end, + __mul = function(a, z) return tp(a.x*z, a.y*z) end, + __div = function(z, a) return tp(a.x*z, a.y*z) end, + __concat = setmetatable({}, { __call = function(x) return 99 end }), + __len = function(x) return 2 end, + __call = function(a) return a.x+a.y end, + __tostring = function(a) return "foo" end, + __newindex = function(a, k, v) a.y = v end, + __index = { + diff = function(a) return a.x-a.y end, + }, + }) + + local a = tp(10, 20) + local b = tp(1, 2) + local c = a + b + assert(c.x == 11 and c.y == 22) + assert(c:diff() == -11) + assert(c() == 33) + local d = a - b + assert(d.x == 9 and d.y == 18) + assert(d:diff() == -9) + assert(d() == 27) + local e = a * 3 + assert(e.x == 30 and e.y == 60) + local f = 3LL / a + assert(f.x == 30 and f.y == 60) + assert(1 .. c == 99) + assert(c .. 1 == 99) + assert(c .. d == 99) + assert(tostring(c) == "foo") + assert(tostring(ffi.cast("arith_t *", c)) == "foo") + c.foo = 42 + assert(c.y == 42) + + local p = ffi.new("arith_t *", a) + local g1 = p + p + assert(g1.x == 20 and g1.y == 40) + local g2 = p[0] + p[0] + assert(g2.x == 20 and g2.y == 40) + assert(p() == 30) + + local q = ffi.new("arith_t &", a) + fails(function(p) local y = q[0] + q[0] end, q) + local h = q + q + assert(h.x == 20 and h.y == 40) + + local diff = 0 + for i=1,100 do diff = a:diff() end + assert(diff == -10) + + for i=1,100 do c.foo = i end + assert(c.y == 100) + + local z = tp(1, 3) + for i=1,100 do z = z + a end + assert(z.x == 1001 and z.y == 2003) + + local x = 0 + for i=1,100 do x = x + #a end + assert(x == 200) + + local x = 0 + for i=1,100 do x = x + p() end + assert(x == 3000) +end + +do + local count = 0 + local tp = ffi.metatype("gc_t", { + __gc = function(x) count = count + 1 end, + }) + + local a = tp() + a = nil + collectgarbage() + assert(count == 1) + local b,c = tp(), tp() + b = nil + collectgarbage() + assert(count == 2) + c = nil + collectgarbage() + assert(count == 3) + + local z + for i=1,100 do z = tp() end + z = nil + collectgarbage() + assert(count == 103) + + local t = {} + for i=1,100 do t[i] = tp() end + for i=1,100 do ffi.gc(t[i], nil) end + t = nil + collectgarbage() + assert(count == 103) +end + +do + local tp = ffi.metatype([[ +struct { + static const int Z42 = 42; + enum { Z39 = 39 }; + int x; +}]], { + __new = function(tp, x) + return ffi.new(tp, x or -1) + end, + __index = { test = function(x) return x+1 end, x = "hello" } + }) + assert(tp.Z42 == 42) + assert(tp.Z39 == 39) + assert(tp.test(99) == 100) + fails(function() tp.Z42 = 1 end) + fails(function() tp.Z39 = 1 end) + assert(tp.x == "hello") -- Not sure this is a good idea to allow that. + fails(function() tp.x = 1 end) + local o = tp() + assert(o.Z42 == 42) + assert(o.Z39 == 39) + assert(o.test(55) == 56) + fails(function() o.Z42 = 1 end) + fails(function() o.Z39 = 1 end) + assert(o.x == -1) + o.x = 5 + assert(o.x == 5) +end + +do + local fb = ffi.new("struct { int x; }", 99) + local xt = ffi.metatype("struct { }", { __index = fb }) + local o = xt() + assert(o.x == 99) +end + diff --git a/testsuite/test/lib/ffi/ffi_new.lua b/testsuite/test/lib/ffi/ffi_new.lua new file mode 100644 index 0000000000..9cdbd538cb --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_new.lua @@ -0,0 +1,106 @@ +local ffi = require("ffi") +local bit = require("bit") + +dofile("../common/ffi_util.inc") + +ffi.cdef([[ +typedef struct { int a,b,c; } foo1_t; +typedef int foo2_t[?]; +void *malloc(size_t size); +void free(void *ptr); +]]) + +do + assert(ffi.sizeof("foo1_t") == 12) + local cd = ffi.new("foo1_t") + assert(ffi.sizeof(cd) == 12) + local foo1_t = ffi.typeof("foo1_t") + assert(ffi.sizeof(foo1_t) == 12) + cd = foo1_t() + assert(ffi.sizeof(cd) == 12) +end + +do + assert(ffi.sizeof("foo2_t", 3) == 12) + local cd = ffi.new("foo2_t", 3) + assert(ffi.sizeof(cd) == 12) + local foo2_t = ffi.typeof("foo2_t") + fails(ffi.sizeof, foo2_t) + assert(ffi.sizeof(foo2_t, 3) == 12) + cd = foo2_t(3) + assert(ffi.sizeof(cd) == 12) +end + +do + local tpi = ffi.typeof("int") + local tpb = ffi.typeof("uint8_t") + local t = {} + for i=1,200 do t[i] = tpi end + t[100] = tpb + local x = 0 + for i=1,200 do x = x + tonumber(ffi.new(t[i], 257)) end + assert(x == 199*257 + 1) +end + +do + local oc = collectgarbage("count") + for al=0,15 do + local align = 2^al -- 1, 2, 4, ..., 32768 + local ct = ffi.typeof("struct { char __attribute__((aligned("..align.."))) a; }") + for i=1,100 do + local cd = ct() + local addr = tonumber(ffi.cast("intptr_t", ffi.cast("void *", cd))) + assert(bit.band(addr, align-1) == 0) + end + end + local nc = collectgarbage("count") + assert(nc < oc + 3000, "GC step missing for ffi.new") +end + +do + local t = {} + for i=1,100 do t[i] = ffi.new("int[?]", i) end + assert(ffi.sizeof(t[100]) == 400) + for i=0,99 do assert(t[100][i] == 0) end +end + +do + local t = {} + local ct = ffi.typeof("struct { double x; int y[?];}") + for i=1,100 do t[i] = ct(i) end + assert(ffi.sizeof(t[100]) == 408) + for i=0,99 do assert(t[100].y[i] == 0) end +end + +do + local ct = ffi.typeof("struct __attribute__((aligned(16))) { int x; }") + local y + for i=1,200 do + local x = ct() + if i == 150 then y = x end + end + assert(bit.band(ffi.cast("intptr_t", ffi.cast("void *", y)), 15) == 0) +end + +do + local q + local p = ffi.gc(ffi.new("int[1]"), function(x) q = x end) + p = nil + collectgarbage() + assert(type(q) == "cdata") + q = nil + collectgarbage() + assert(q == nil) +end + +do + local p = ffi.gc(ffi.C.malloc(2^20), ffi.C.free) + p = nil + collectgarbage() +end + +do + local p = ffi.gc(ffi.new("int[1]"), function(x) assert(type(x) == "cdata") end) + -- test for lua_close() cleanup. +end + diff --git a/testsuite/test/lib/ffi/ffi_parse_array.lua b/testsuite/test/lib/ffi/ffi_parse_array.lua new file mode 100644 index 0000000000..3a9616d730 --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_parse_array.lua @@ -0,0 +1,78 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") + +checkfail{ + "int [", + "int [-1]", + "int [[1]]", + "int [10][]", + "int [10][?]", + "int [][]", + "int [][?]", + "int [?][]", + "int [?][?]", + "int [0x10000][0x2000]", + "int [256][256][256][256]", + "int [10](void)", + "int (void)[10]", + "int &[10]", + "union { double x; int a[?]; }", +} + +ffi.cdef([[ + typedef int foo1_t[10]; + typedef foo1_t foo2_t[5]; +]]) +assert(ffi.sizeof("foo1_t") == 40) +assert(ffi.sizeof("foo2_t") == 200) + +local P = ffi.sizeof("void *") + +checktypes{ + 10, 1, "char [10]", + 4*10, 4, "int [10]", + 4*10, 4, "int [10]", + 4*10*5, 4, "int [10][5]", + 4*10*5*3*2*7, 4, "int [10][5][3][2][7]", + 4*10*5, 4, "int ([10])[5]", + P*10, P, "int *[10]", + P, P, "int (*)[10]", + P*5, P, "int (*[5])[10]", + 8*10, 4, "struct { int x; char y; } [10]", + P*5*10, P, "volatile int *(* const *[5][10])(void)", + nil, 4, "int []", + 4*10, 8, "int __attribute__((aligned(8))) [10]", + 4*10, 8, "__attribute__((aligned(8))) int [10]", + 4*10, 8, "int [10] __attribute__((aligned(8)))", + 97, 1, "char ['a']", + 83, 1, "char ['\\123']", + 79, 1, "char ['\x4F']", + 5, 1, "char [sizeof(\"aa\" \"bb\")]", + 80, 8, "double [10]", +} + +do + assert(ffi.sizeof("int [?]", 10) == 4*10) + local id = ffi.typeof("const short [?]") + assert(ffi.sizeof(id, 10) == 2*10) + assert(ffi.sizeof(id, 0) == 0*10) + fails(ffi.sizeof, id) + assert(ffi.sizeof(id, -1) == nil) + assert(ffi.sizeof(id, 0x80000000) == nil) + assert(ffi.sizeof(id, 0x40000000) == nil) + assert(ffi.sizeof(id, 0x3fffffff) == 2*0x3fffffff) +end + +do + assert(ffi.sizeof("struct { double x; int a[?]; }", 10) == 8+4*10) + local id = ffi.typeof("struct { int x; short a[?]; }") + assert(ffi.sizeof(id, 10) == 4+2*10) + assert(ffi.sizeof(id, 0) == 4+0*10) + fails(ffi.sizeof, id) + assert(ffi.sizeof(id, -1) == nil) + assert(ffi.sizeof(id, 0x80000000) == nil) + assert(ffi.sizeof(id, 0x40000000) == nil) + assert(ffi.sizeof(id, 0x3ffffffd) == 4+2*0x3ffffffd) +end + diff --git a/testsuite/test/lib/ffi/ffi_parse_basic.lua b/testsuite/test/lib/ffi/ffi_parse_basic.lua new file mode 100644 index 0000000000..c054bcfb89 --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_parse_basic.lua @@ -0,0 +1,131 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") + +checkfail{ + "", + " ", + "\n", + "1", + ".", + ";", + ",", + "*", + "[]", + "()", + "(*)", + "//", + "/*", + "xyz", + "const", + "volatile", + "typedef", + "extern", + "static", + "auto", + "register", + "struct", + "union", + "sizeof", + "int int", + "int char", + "int double", + "int;", +} + +checktypes{ + 1, 1, "char", + 1, 1, " \n\r\t\vchar \n\r\t\v", + 1, 1, "ch\\\nar", + 1, 1, "char /* abc */", + 1, 1, "char /* abc */ const", + 1, 1, "char // abc\n const", +} + +checktypes{ + nil, 1, "void", + 1, 1, "bool", + 1, 1, "_Bool", + 4, 4, "_Bool int", + 1, 1, "char", + 1, 1, "signed char", + 1, 1, "unsigned char", + 2, 2, "short", + 2, 2, "signed short", + 2, 2, "unsigned short", + 4, 4, "int", + 4, 4, "signed int", + 4, 4, "unsigned int", + 4, 4, "signed", + 4, 4, "unsigned", + 4, 4, "float", + 8, 8, "long long", + 8, 8, "signed long long", + 8, 8, "unsigned long long", + 8, 8, "double", + -- NYI: long double is architecture- and compiler-specific. + 8, 4, "_Complex float", + 16, 8, "_Complex", + 16, 8, "_Complex double", +} + +-- mode/vector_size attributes +checktypes{ + 1, 1, "int __attribute__((mode(QI)))", + 2, 2, "int __attribute__((mode(HI)))", + 4, 4, "int __attribute__((mode(SI)))", + 8, 8, "int __attribute__((mode(DI)))", + 16, 16, "int __attribute__((mode(TI)))", + 32, 16, "int __attribute__((mode(OI)))", + 4, 4, "float __attribute__((mode(SF)))", + 8, 8, "float __attribute__((mode(DF)))", + 2, 2, "int __attribute__((mode(V2QI)))", + 16, 16, "float __attribute__((mode(V4SF)))", + 32, 16, "double __attribute__((mode(V8SF)))", + 8, 8, "char __attribute__((vector_size(8)))", + 16, 16, "int __attribute__((vector_size(16)))", + 32, 16, "double __attribute__((vector_size(32)))", + 64, 16, "double __attribute__((vector_size(64)))", +} + +-- ABI-specific types: +local L = (ffi.abi("32bit") or ffi.abi("win")) and 4 or 8 +local P = ffi.abi("32bit") and 4 or 8 +local W = ffi.abi("win") and 2 or 4 + +checktypes{ + L, L, "long", + L, L, "signed long", + L, L, "unsigned long", + P, P, "int *", + P, P, "int **", + 4, 4, "int * __ptr32", +} + +checktypes{ + P, P, "ptrdiff_t", + P, P, "size_t", + W, W, "wchar_t", + 1, 1, "int8_t", + 2, 2, "int16_t", + 4, 4, "int32_t", + 8, 8, "int64_t", + 1, 1, "uint8_t", + 2, 2, "uint16_t", + 4, 4, "uint32_t", + 8, 8, "uint64_t", + P, P, "intptr_t", + P, P, "uintptr_t", +} + +checktypes{ + 1, 8, "char __attribute__((aligned(8)))", + 1, 8, "char __attribute((aligned(8)))", + 1, 8, "char __attribute__((__aligned__(8)))", + 1, 8, "__attribute__((aligned(8))) char", + 1, 8, "char __declspec(align(8))", + 1, 8, "__declspec(align(8)) char", + 1, 2, "char __attribute__((aligned(8))) const __attribute__((aligned(2)))", + 1, 16, "char __attribute__((aligned(8))) const __attribute__((aligned(16)))", +} + diff --git a/testsuite/test/lib/ffi/ffi_parse_cdef.lua b/testsuite/test/lib/ffi/ffi_parse_cdef.lua new file mode 100644 index 0000000000..4bb5d903eb --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_parse_cdef.lua @@ -0,0 +1,77 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") + +checkfail({ + "int", + "int aa1; int aa2 ", + "static int x;", + "static const long long x = 1;", -- NYI + "static const double x = 1;", -- NYI + "static const bool x = 1;", -- NYI (intentional, need true/false) + "struct { static int x = 1; };", + ";;static int y" +}, ffi.cdef) + +ffi.cdef[[ +static const int K_42a = 42; +static const char K_42b = 42+256; +static const short K_M1a = 65535; +static const unsigned short K_65535a = 65535; +static const int K_1b = 0xffffffff >> 31; +static const int K_1c = 0xffffffffu >> 31; +static const int K_M1b = (int)0xffffffff >> 31; +]] + +checktypes{ + 42, 1, "char[K_42a]", + 42, 1, "char[K_42b]", + 1, 1, "char[-K_M1a]", + 65535, 1, "char[K_65535a]", + 1, 1, "char[K_1b]", + 1, 1, "char[K_1c]", + 1, 1, "char[-K_M1b]", +} + +ffi.cdef[[ +struct str1 { + enum { + K_99 = 99 + }; + static const int K_55 = 55; +} extk; +]] + +checktypes{ + 99, 1, "char[K_99]", + 99, 1, "char[extk.K_99]", + 99, 1, "char[((struct str1)0).K_99]", + 99, 1, "char[((struct str1 *)0)->K_99]", + 55, 1, "char[extk.K_55]", +} + +checkfail{ + "char[K_55]", +} + +ffi.cdef[[ +extern int func1(void); +extern int func2(); +static int func3(); +static inline int func4(int n) +{ + int i, k = 0; + float x = 1.0f; + for (i = 0; i < n; i++) { + k += i; + } + return k; +} +;;; +]] + +ffi.cdef[[ +int ext1; +extern int ext2; +]] + diff --git a/testsuite/test/lib/ffi/ffi_parse_struct.lua b/testsuite/test/lib/ffi/ffi_parse_struct.lua new file mode 100644 index 0000000000..16a3d053e4 --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_parse_struct.lua @@ -0,0 +1,259 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") + +checkfail{ + "struct", + "struct {", + "struct xx xx {}", + "struct { int x }", + "struct { int x, }", + "struct { int x,y }", + "struct { void x; }", + "struct { int x(void); }", + "struct recursive1 { struct recursive1 { } x; }", + "union", + "union {", + "union xx xx {}", + "union { int x }", + "union { int x, }", + "union { int x,y }", + "union { void x; }", + "union { int x(void); }", + "union recursive1 { union recursive1 { } x; }", +} + +-- NYI: rollback doesn't recover struct state +-- ffi.cdef("struct zzz") +-- fails(ffi.cdef, "struct zzz { int") +-- ffi.cdef("struct zzz { int x; }") + +ffi.cdef("struct foo; typedef struct foo foo_t;") +assert(ffi.sizeof("struct foo") == nil) +assert(ffi.sizeof("foo_t") == nil) +ffi.cdef("struct foo { int x,y; };") +assert(ffi.sizeof("struct foo") == 8) +assert(ffi.sizeof("foo_t") == 8) +assert(ffi.sizeof(ffi.typeof("struct foo")) == 8) +assert(ffi.sizeof(ffi.typeof("foo_t")) == 8) +ffi.cdef("struct foo;") +fails(ffi.cdef, "struct foo {};") +fails(ffi.cdef, "union foo;") +fails(ffi.cdef, "union foo {};") +fails(ffi.cdef, "enum foo;") +fails(ffi.cdef, "enum foo { ZZZ1 };") + +local P = ffi.sizeof("void *") +local A = (ffi.arch == "x86" and not ffi.abi("win")) and 4 or 8 + +checktypes{ + 0, 1, "struct {}", + 1, 1, "struct { char x; }", + 2, 1, "struct { char x,y; }", + 4, 1, "struct { char x,y; char a,b; }", + 4, 2, "struct { char x; short y; }", + 4, 2, "struct { short x; char y; }", + 8, 4, "struct { char x; int y; }", + 8, 4, "struct { int x; char y; }", + 12, 4, "struct { char x; int y; char z; }", + P*4, P, "struct { char x,*y,**z,a,b,c,d; }", + 64, 4, "struct { struct { struct { struct { int x,y; } a,b; } a,b; } a,b; }", + 4, 4, "struct { struct { struct { struct { int x; }; }; }; }", + 8, 4, "struct { struct foo; }", + 8, 4, "struct { foo_t; }", + 8, 8, "struct __attribute__((aligned(sizeof(foo_t)))) { int a; }", + 6, 2, "struct { char a; char x; short y; char z; char c; }", + 10, 2, "struct { char a; struct { char x; short y; char z; } b; char c; }", + 8, A, "struct { double a; }", + A+8, A, "struct { int a; double b; }", + 8, A, "struct { long long a; }", + A+8, A, "struct { int a; long long b; }", + 16, A, "struct { _Complex a; }", + A+16, A, "struct { int a; _Complex b; }", + 8, 8, "struct { float __attribute__((mode(__V2SF__))) a; }", + 16, 8, "struct { int a; float __attribute__((mode(__V2SF__))) b; }", + 16, 8, "struct { float __attribute__((mode(__V2SF__))) a[2]; }", + 24, 8, "struct { int a; float __attribute__((mode(__V2SF__))) b[2]; }", + 16, 16, "struct { float __attribute__((vector_size(16))) a; }", + 32, 16, "struct { int a; float __attribute__((vector_size(16))) b; }", +} + +checktypes{ + 0, 1, "union {}", + 1, 1, "union { char x; }", + 1, 1, "union { char x,y; }", + 2, 2, "union { char x; short y; }", + 2, 2, "union { short x; char y; }", + 4, 4, "union { char x; int y; }", + 4, 4, "union { int x; char y; }", + 4, 4, "union { char x; int y; short z; }", + P, P, "union { char x,*y,**z,a,b,c,d; }", + 4, 4, "union { union { union { union { int x,y; } a,b; } a,b; } a,b; }", + 4, 4, "union { union { union { union { int x; }; }; }; }", + 2, 2, "union { union { short x; }; char y; }", + 2, 2, "union { struct { short x; }; char y; }", + 4, 2, "struct { union { short x; }; char y; }", + 2, 1, "union { struct { char a,b; }; char y; }", + 2, 1, "struct { union { char a,b; }; char y; }", + 8, A, "union { double a; }", + 8, A, "union { int a; double b; }", + 8, A, "union { long long a; }", + 8, A, "union { int a; long long b; }", + 16, A, "union { _Complex a; }", + 16, A, "union { int a; _Complex b; }", + 8, 8, "union { float __attribute__((mode(__V2SF__))) a; }", + 8, 8, "union { int a; float __attribute__((mode(__V2SF__))) b; }", + 16, 16, "union { float __attribute__((vector_size(16))) a; }", + 16, 16, "union { int a; float __attribute__((vector_size(16))) b; }", +} + +do + local ct + ct = ffi.typeof("struct { int a; char b; short c; int d; }") + assert(ffi.offsetof(ct, "a") == 0) + assert(ffi.offsetof(ct, "b") == 4) + assert(ffi.offsetof(ct, "c") == 6) + assert(ffi.offsetof(ct, "d") == 8) + ct = ffi.typeof("struct { char a; struct { char x; short y; char z; }; char c; }") + assert(ffi.offsetof(ct, "a") == 0) + assert(ffi.offsetof(ct, "x") == 2) + assert(ffi.offsetof(ct, "y") == 4) + assert(ffi.offsetof(ct, "z") == 6) + assert(ffi.offsetof(ct, "c") == 8) + ct = ffi.typeof("struct { char a; struct { short b; struct { int c; }; }; }") + assert(ffi.offsetof(ct, "a") == 0) + assert(ffi.offsetof(ct, "b") == 4) + assert(ffi.offsetof(ct, "c") == 8) + ct = ffi.typeof("struct { int a; double b; }") + assert(ffi.offsetof(ct, "a") == 0) + assert(ffi.offsetof(ct, "b") == A) +end + +checkfail{ + "struct { int :; }", + "struct { int a:; }", + "struct { int a:bad; }", + "struct { int a:0; }", + "struct { int a:33; }", + "struct { int a:-1; }", + "struct { _Bool a:2; }", + "struct { double a:2; }", + "struct { complex a:2; }", + "struct { int __attribute__((mode(__TI__))) a:2; }", + "struct { int __attribute__((vector_size(16))) a:2; }", + "struct { int a[2]:2; }", + "struct { void a:2; }", +} + +checktypes{ + 4, 4, "struct { unsigned a:1; }", + 4, 4, "struct { unsigned a:1, b:1, c:1; }", + 1, 1, "struct { _Bool a:1, b:1, c:1; }", + 8, 4, "struct { unsigned a:16, b:16, c:16; }", + 8, 4, "struct { unsigned a:17, b:16, c:16; }", + 12, 4, "struct { unsigned a:17, b:16, c:17; }", + 12, 4, "struct { unsigned a:16, b:17, c:16; }", + 8, 4, "struct { unsigned a:16, :16, c:16; }", + 8, 4, "struct { unsigned a:17, :16, c:16; }", + 12, 4, "struct { unsigned a:17, :16, c:17; }", + 12, 4, "struct { unsigned a:16, :17, c:16; }", + 8, 4, "struct { unsigned a:16, :0, c:16; }", + 4, 4, "struct { unsigned a:16, b:16, :0, :0; }", + 8, 4, "struct { unsigned a:16, :0, :0, :0, c:16; }", + 1, 1, "struct { char a:1; _Bool b:1; }", + 1, 1, "struct { char a:1; signed char b:1; unsigned char c:1; }", +} + +-- NYI: bit fields > 32 bit +-- local L = ffi.alignof("struct { long long a; }") +-- checktypes{ +-- L, L, "struct { long long a:1; }", +-- } + +-- Bit field packing. +checktypes{ + 1, 1, "struct { _Bool a:1, b:1, c:1; }", + 4, 4, "struct { short a:9; int b:9; char c; }", + 4, 4, "struct { char a; int b:7; }", + 4, 4, "struct { short a; char b; int c:7; }", + 4, 4, "struct { char a:7; int b:7; int c:7; int d:10; }", + 4, 1, "struct { char a:7; char b:7; char c:7; char d:7; }", + 4, 4, "struct { char a:7; int b:7, c:7, d:7; int e:4; }", + 4, 4, "struct { char a:7; int b:7, c:7, d:7; char e:4; }", + 5, 1, "struct { char a:7; char b:7, c:7, d:7; char e:4; }", + 4, 1, "struct __attribute__((packed)) { char a:7; char b:7, c:7, d:7; char e:4; }", + 4, 4, "struct { char a:7; int b:7; int c:7; int d:10; }", + 8, 4, "struct { char a:7; int b:7; char c:7; int d:10; }", + 4, 1, "struct __attribute__((packed)) { char a:7; int b:7; char c:7; int d:10; }", + 4, 1, "struct { char a:7; int b:7; char c:7; int d:10; } __attribute__((packed))", + 2, 1, "struct __attribute__((packed)) { char a:4; char b:8; }", + 2, 1, "struct __attribute__((packed)) { char a:4; char :0; char b:4; }", + 1, 1, "struct __attribute__((packed)) { _Bool a:1; _Bool b:1; }", + 2, 1, "struct __attribute__((packed)) { _Bool a:1; _Bool b:1 __attribute((aligned(1))); }", + 4, 2, "struct __attribute__((packed)) { _Bool a:1; _Bool b:1 __attribute((aligned(2))); }", + 8, 4, "struct { _Bool a:1; int b __attribute((aligned(2))); }", + 16, 8, "struct { _Bool a:1; int b __attribute((aligned(8))); }", + 6, 2, "struct { _Bool a:1; int b __attribute((aligned(2))) __attribute((packed)); }", + 6, 2, "struct __attribute__((packed)) { _Bool a:1; int b __attribute((aligned(2))); }", + 6, 2, "struct __attribute__((packed)) { _Bool a:1; int b __attribute((aligned(2))) __attribute((packed)); }", +} + +do + ffi.cdef[[ + struct foo_packorig { char a; int b; short c; }; + #pragma pack(1) + struct foo_pack1 { char a; int b; short c; }; + #pragma pack(2) + struct foo_pack2 { char a; int b; short c; }; + #pragma pack(4) + struct foo_pack4 { char a; int b; short c; }; + #pragma pack(8) + struct foo_pack8 { char a; int b; short c; }; + #pragma pack() + struct foo_packdef { char a; int b; short c; }; + #pragma pack(push) + struct foo_packpush { char a; int b; short c; }; + #pragma pack(1) + struct foo_packpush1 { char a; int b; short c; }; + #pragma pack(pop) + struct foo_packpop { char a; int b; short c; }; + #pragma pack(push,2) + struct foo_packpush2 { char a; int b; short c; }; + #pragma pack(pop) + struct foo_packpop2 { char a; int b; short c; }; + ]] + + assert(ffi.sizeof("struct foo_packorig") == 12) + assert(ffi.sizeof("struct foo_pack1") == 7) + assert(ffi.sizeof("struct foo_pack2") == 8) + assert(ffi.sizeof("struct foo_pack4") == 12) + assert(ffi.sizeof("struct foo_pack8") == 12) + assert(ffi.sizeof("struct foo_packdef") == 12) + assert(ffi.sizeof("struct foo_packpush") == 12) + assert(ffi.sizeof("struct foo_packpush1") == 7) + assert(ffi.sizeof("struct foo_packpop") == 12) + assert(ffi.sizeof("struct foo_packpush2") == 8) + assert(ffi.sizeof("struct foo_packpop2") == 12) +end + +do + ffi.cdef[[ + #pragma pack(2) + struct foo_packalign8 { + char a; int y __attribute((aligned(8))); + }; + typedef int __attribute((aligned(8))) int_align8; + struct foo_packintalign8 { + char a; int_align8 y; + }; + typedef int __attribute((aligned(1))) int_align1; + struct foo_packintalign1 { + char a; int_align1 y; + }; + ]] + + assert(ffi.sizeof("struct foo_packalign8") == 6) + assert(ffi.sizeof("struct foo_packintalign8") == 6) + assert(ffi.sizeof("struct foo_packintalign1") == 5) +end + diff --git a/testsuite/test/lib/ffi/ffi_tabov.lua b/testsuite/test/lib/ffi/ffi_tabov.lua new file mode 100644 index 0000000000..ba621960bd --- /dev/null +++ b/testsuite/test/lib/ffi/ffi_tabov.lua @@ -0,0 +1,12 @@ +local ffi = require("ffi") + +local last = 0 + +assert(pcall(function() + for i=1,65536 do + last = i + ffi.typeof"struct {}" + end +end) == false) + +assert(last > 20000) diff --git a/testsuite/test/lib/ffi/index b/testsuite/test/lib/ffi/index new file mode 100644 index 0000000000..59e36dd8b0 --- /dev/null +++ b/testsuite/test/lib/ffi/index @@ -0,0 +1,12 @@ +bit64.lua +luajit>=2.1 +cdata_var.lua +copy_fill.lua +err.lua +istype.lua +jit_array.lua +jit_complex.lua +jit_misc.lua +jit_struct.lua +meta_tostring.lua +redir.lua +type_punning.lua diff --git a/testsuite/test/lib/ffi/istype.lua b/testsuite/test/lib/ffi/istype.lua new file mode 100644 index 0000000000..5aba7759e7 --- /dev/null +++ b/testsuite/test/lib/ffi/istype.lua @@ -0,0 +1,88 @@ +local ffi = require("ffi") + +do --- 1 + local void_t = ffi.typeof("void") + assert(ffi.istype(void_t, void_t)) + assert(ffi.istype("const void", void_t)) + + assert(ffi.istype("void", "void") == false) -- 2nd arg is a string. + assert(ffi.istype("double", 1.5) == false) -- 2nd arg is a number. +end + +do --- 2 + local i8_t = ffi.typeof("int8_t") + local u8_t = ffi.typeof("uint8_t") + local i32_t = ffi.typeof("int32_t") + assert(ffi.istype(i32_t, i32_t) == true) + assert(ffi.istype("const int32_t", i32_t) == true) + + assert(ffi.istype("bool", u8_t) == false) + assert(ffi.istype(i8_t, u8_t) == false) + assert(ffi.istype(i32_t, u8_t) == false) + assert(ffi.istype(u8_t, i32_t) == false) + assert(ffi.istype("double", i32_t) == false) + + assert(ffi.istype("int64_t", ffi.typeof("long long"))) + assert(ffi.istype("long long", ffi.typeof("int64_t"))) +end + +do --- 3 + local ptr_t = ffi.typeof("int *") + local p = ptr_t() + assert(ffi.istype(ptr_t, ptr_t) == true) + assert(ffi.istype(ptr_t, p) == true) + assert(ffi.istype(p, ptr_t) == true) + assert(ffi.istype("const int *", ptr_t) == true) + assert(ffi.istype("const int * const", ptr_t) == true) + assert(ffi.istype("unsigned int *", ptr_t) == true) + + assert(ffi.istype("char *", ptr_t) == false) + assert(ffi.istype("void *", ptr_t) == false) +end + +do --- 4 + ffi.cdef[[ +typedef int istype_arr_t[10]; +typedef const istype_arr_t istype_carr_t; +typedef struct { int x; } istype_struct_t; +]] + + local arr_t = ffi.typeof("istype_arr_t") + local carr_t = ffi.typeof("istype_carr_t") + assert(ffi.istype(arr_t, arr_t) == true) + assert(ffi.istype("int[10]", arr_t) == true) + + assert(ffi.istype("int[11]", arr_t) == false) + assert(ffi.istype("int[]", arr_t) == false) + assert(ffi.istype("int *", arr_t) == false) + + assert(ffi.istype("const int[10]", arr_t) == true) + assert(ffi.istype("volatile int[10]", arr_t) == true) + assert(ffi.istype(carr_t, arr_t) == true) + + local struct_t = ffi.typeof("istype_struct_t") + local structp_t = ffi.typeof("istype_struct_t *") + assert(ffi.istype(struct_t, struct_t) == true) + assert(ffi.istype("const istype_struct_t", struct_t) == true) + assert(ffi.istype("struct { int x; }", struct_t) == false) + assert(ffi.istype(struct_t, structp_t) == true) -- struct ptr is ok for struct. + assert(ffi.istype(structp_t, struct_t) == false) +end + +do --- 5 + local int_t = ffi.typeof("int") + local t = {} + for i=1,200 do t[i] = int_t() end + t[100] = ffi.new("uint8_t") + local x = 0 + for i=1,200 do if not ffi.istype("int", t[i]) then x = x + i end end + assert(x == 100) + x = 0 + for i=1,200 do if not ffi.istype(int_t, t[i]) then x = x + i end end + assert(x == 100) + for i=1,200 do t[i] = int_t end + t[100] = ffi.typeof("uint8_t") + x = 0 + for i=1,200 do if not ffi.istype(t[i], int_t) then x = x + i end end + assert(x == 100) +end diff --git a/testsuite/test/lib/ffi/jit_array.lua b/testsuite/test/lib/ffi/jit_array.lua new file mode 100644 index 0000000000..e8de4af13c --- /dev/null +++ b/testsuite/test/lib/ffi/jit_array.lua @@ -0,0 +1,104 @@ +local ffi = require("ffi") + +do --- smoke + local types = { + "int8_t", "uint8_t", + "int16_t", "uint16_t", + "int32_t", "uint32_t", + "int64_t", "uint64_t", + "float", "double", + } + for j,tp in ipairs(types) do + local t = ffi.new(tp.."[?]", 301) + for i=1,300 do t[i] = 1 end + for i=1,300 do assert(t[i] == 1) end + for i=1,300 do t[i] = t[i-1] end -- reassoc across PHIs, a[i-1] forwarding + for i=1,300 do assert(t[i] == 0) end + for i=1,300 do t[i] = i end + local x = 0 + for i=1,300 do x = x + t[i] end + if tp == "int8_t" then assert(x == 862) + elseif tp == "uint8_t" then assert(x == 33630) + else assert(x == 45150) end + end +end + +do --- int array pointer arithmetic + local a = ffi.new("int[?]", 101) + local p = a+1; + for i=1,100 do + p[0] = i + assert(p - a == i) -- pointer difference + p = p + 1 -- pointer increment by 4 bytes + end + for i=1,100 do assert(a[i] == i) end + for i=1,100 do assert((a+i)[0] == i) end -- pointer arithmetic + for i=1,100 do assert((i+a)[0] == i) end -- pointer arithmetic +end + +do --- double array pointer arithmetic + local a = ffi.new("double[?]", 101) + local p = a+1; + for i=1,100 do + p[0] = i + p = p + 1 -- pointer increment by 8 bytes + end + for i=1,100 do assert(a[i] == i) end + for i=1,100 do assert((a+i)[0] == i) end -- pointer arithmetic +end + +do --- double array pointer comparisons +bit + local a = ffi.new("double[?]", 201) + local p = a+3 + for i=1,200 do local j = bit.band(i, 7); assert((a+j == p) == (j == 3)) end + p = a+100; + for i=1,200 do assert((a+i < p) == (i < 100)) end + for i=1,200 do assert((a+i <= p) == (i <= 100)) end +end + +do --- constant offset in double array index + local a = ffi.new("double[?]", 100) + for i=1,100 do a[i-1LL] = i end + for i=1,100 do assert(a[100LL-i] == 101-i) end +end + +do --- fixed index of minus one + local a = ffi.new("int[10]") + local p = a+1 + local k = ffi.new("int", -1) + a[0] = 42 + for i=1,100 do assert(p[-1] == 42); assert(p[k] == 42) end +end + +do --- uint8_t array element comparisons + local a = ffi.new("uint8_t[?]", 256) + for i=0,255 do a[i] = i end + for i=1,255 do assert(a[i] >= 1) end + for i=0,254 do assert(a[i] <= 254) end +end + +do --- int32_t array bit/bswap tricks +bit + local a = ffi.new("int32_t[?]", 256) + local tobit, bswap, shl = bit.tobit, bit.bswap, bit.lshift + for i=0,255 do a[i] = bswap(i+0x12345600) end + for i=0,255 do assert(a[i] == tobit(shl(i, 24)+0x00563412)) end + for i=0,255 do assert(bswap(a[i]) == tobit(i+0x12345600)) end +end + +do --- int32_t shift/rotate/and +bit + local a = ffi.new("int32_t[?]", 256) + local shl, shr, rol, band = bit.lshift, bit.rshift, bit.rol, bit.band + for i=0,255 do a[i] = i + shl(i, 8) + shl(i, 16) end + + for i=0,255 do assert(shl(band(a[i], 0xff), 8) == shl(i, 8)) end + for i=0,255 do assert(band(shl(a[i], 8), 0xff00) == shl(i, 8)) end + + for i=0,255 do assert(shr(band(a[i], 0xff00), 8) == i) end + for i=0,255 do assert(band(shr(a[i], 8), 255) == i) end + + for i=0,255 do assert(rol(band(a[i], 0xff), 8) == shl(i, 8)) end + for i=0,255 do assert(band(rol(a[i], 8), 0xff00) == shl(i, 8)) end + + for i=0,255 do assert(shl(band(a[i], 0x000000ff), 24) == shl(i, 24)) end + for i=0,255 do assert(shr(band(a[i], 0xffff0000), 16) == i) end +end diff --git a/testsuite/test/lib/ffi/jit_complex.lua b/testsuite/test/lib/ffi/jit_complex.lua new file mode 100644 index 0000000000..3296f0cbfa --- /dev/null +++ b/testsuite/test/lib/ffi/jit_complex.lua @@ -0,0 +1,109 @@ +local ffi = require("ffi") + +local cx = ffi.typeof("complex") +local cxf = ffi.typeof("complex float") + +ffi.cdef[[ +typedef struct jit_complex_chain_t { + struct jit_complex_chain_t *next; + complex c; +} jit_complex_chain_t; +]] + +do --- field access + local c = cx(1, 2) + local x + for i=1,100 do + x = c.re + c.im + end + assert(x == 3) +end + +do --- one element circular chain, named indexing + local cp = ffi.new("jit_complex_chain_t") + local p = cp + p.next = p + p.c = cx(1, 2) + local x,y = 0,0 + for i=1,100 do + x = x + p.c.re + y = y + p.c.im + p = p.next + end + assert(x == 100) + assert(y == 200) +end + +do --- one element circular chain, array indexing + local cp = ffi.new("jit_complex_chain_t") + local p = cp + p.next = p + p.c = cx(1, 2) + local x,y = 0,0 + for i=1,100 do + x = x + p.c[0] + y = y + p.c[1] + p = p.next + end + assert(x == 100) + assert(y == 200) +end + +do --- one-arg initialiser + local ca = ffi.new("complex[?]", 101) + for i=1,100 do + ca[i] = cx(i) -- handled as init single + end + local x,y = 0,0 + for i=1,100 do + x = x + ca[i].re + y = y + ca[i].im + end + assert(x == 5050) + assert(y == 0) +end + +do --- two-arg initialiser + local ca = ffi.new("complex[?]", 101) + for i=1,100 do + ca[i] = cx(i, -i) + end + local x,y = 0,0 + for i=1,100 do + x = x + ca[i].re + y = y + ca[i].im + end + assert(x == 5050) + assert(y == -5050) +end + +do --- float<>double conversions + local ca = ffi.new("complex[?]", 101) + local caf = ffi.new("complex float[?]", 101) + for i=1,100 do + ca[i] = cxf(i, -i) + caf[i] = cx(i, -i) + end + local x,y = 0,0 + for i=1,100 do + x = x + caf[i].re + ca[i].re + y = y + caf[i].im + ca[i].im + end + assert(x == 2*5050) + assert(y == -2*5050) +end + +do --- Complex struct field + local s = ffi.new("struct { complex x;}") + for i=1,100 do + s.x = 12.5i + end + assert(s.x.re == 0) + assert(s.x.im == 12.5) +end + +do --- Index overflow for complex is ignored + local c = cx(1, 2) + local x + for i=1e7,1e7+100 do x = c[i] end +end diff --git a/testsuite/test/lib/ffi/jit_misc.lua b/testsuite/test/lib/ffi/jit_misc.lua new file mode 100644 index 0000000000..41e4737bc7 --- /dev/null +++ b/testsuite/test/lib/ffi/jit_misc.lua @@ -0,0 +1,109 @@ +local ffi = require("ffi") + +do --- errno + ffi.errno(42) + local x = 0 + for i=1,100 do x = x + ffi.errno() end + assert(x == 4200) + ffi.errno(0) +end + +do --- string + local a = ffi.new("uint8_t[?]", 101) + for i=0,99 do a[i] = i end + local s + for i=1,90 do s = ffi.string(a+i, 10) end + assert(s == "Z[\\]^_`abc") + for i=1,90 do s = ffi.string(a+i) end + assert(s == "Z[\\]^_`abc") +end + +do --- fill + local a = ffi.new("uint8_t[?]", 100) + local x = 0 + for i=0,90 do x = x + a[i]; ffi.fill(a+i, 10, i); x = x + a[i] end + assert(x == 8100) + for i=1,100 do ffi.fill(a, 15, 0x1234) end + assert(a[0] == 0x34 and a[14] == 0x34 and a[15] == 15) + local b = ffi.new("uint32_t[?]", 104) + for i=0,100 do ffi.fill(b+i, 15, 0x1234) end + assert(b[0] == 0x34343434) + assert(b[103] == (ffi.abi("le") and 0x343434 or 0x34343400)) +end + +do --- copy array elements + local a = ffi.new("uint8_t[?]", 100) + local b = ffi.new("uint8_t[?]", 100) + for i=0,99 do b[i] = i end + local x = 0 + for i=0,90 do x = x + a[i]; ffi.copy(a+i, b+i, 1); x = x + a[i] end + assert(x == 4095) + local x = 0 + for i=0,90 do ffi.copy(b+i, a+90-i, 10); x = x + b[i] end + assert(x == 4095) +end + +do --- copy from string + local a = ffi.new("uint8_t[?]", 100, 42) + for i=0,90 do ffi.copy(a+i, "abc") end + local x = 0 + for i=0,99 do x = x + a[i] end + assert(x == 9276) +end + +do --- copy structures + local tp = ffi.typeof("struct { int x, y; }") + local a = tp(1, 2) + local b = tp(3, 4) + local x = 0 + for i=1,100 do a.y = i; ffi.copy(b, a, 8); x = x + b.y end + assert(x == 5050) + local x = 0 + for i=1,100 do a.y = i; local t = tp(a); x = x + t.y end + assert(x == 5050) +end + +do --- init struct from first field, complex + local tp = ffi.typeof("struct { complex x, y; }") + local cx = ffi.typeof("complex") + local a = tp(cx(1, 2), cx(3, 4)) + local x = 0 + for i=1,100 do a.y = i; local t = tp(a); x = x + t.y.re end + assert(x == 5050) +end + +do --- int array as parameterised type + local tp = ffi.typeof("int[10]") + local a = tp(42) + local b = ffi.new(ffi.typeof("struct { $ x; }", tp)) + for i=1,100 do b.x = a end + assert(b.x[0] == 42 and b.x[9] == 42) +end + +do --- double array as parameterised type + local tp = ffi.typeof("double[5]") + local a = tp(42) + local b = ffi.new(ffi.typeof("struct { $ x; }", tp)) + for i=1,100 do b.x = a end + assert(b.x[0] == 42 and b.x[4] == 42) + b.x[0] = 0 + for i=1,100 do ffi.copy(b.x, a, ffi.sizeof(a)) end + assert(b.x[0] == 42 and b.x[4] == 42) +end + +do --- abi + local x, y + for i=1,100 do x = ffi.abi("32bit"); y = ffi.abi("64bit") end + assert(x == ffi.abi("32bit")) + assert(y == ffi.abi("64bit")) + for _,s in ipairs{"64bit", "32bit", "fpu", "softfp", "hardfp", "eabi", "win", "le", "be"} do + for i=1,100 do x = ffi.abi(s) end + assert(x == ffi.abi(s)) + end +end + +do --- typeof constructed typeof + local ct = ffi.typeof("struct { int x; }") + local cd = ct() + for i=1,100 do assert(ffi.typeof(cd) == ct) end +end diff --git a/testsuite/test/lib/ffi/jit_struct.lua b/testsuite/test/lib/ffi/jit_struct.lua new file mode 100644 index 0000000000..8aa64c1bca --- /dev/null +++ b/testsuite/test/lib/ffi/jit_struct.lua @@ -0,0 +1,201 @@ +local ffi = require("ffi") + +ffi.cdef[[ +typedef struct { int a, b, c; } jit_struct_foo_t; +typedef struct { int a, b, c; } jit_struct_foo2_t; +typedef struct { int a[10]; int b[10]; } jit_struct_sarr_t; +typedef struct jit_struct_chain_t { + struct jit_struct_chain_t *next; + int v; +} jit_struct_chain_t; +]] + +do --- iteration variable as field name + local s = ffi.new("jit_struct_foo_t") + for j,k in ipairs{ "a", "b", "c" } do + for i=1,100 do s[k] = s[k] + j end + end + assert(s.a == 100) + assert(s.b == 200) + assert(s.c == 300) +end + +do --- constant field names + local s = ffi.new("jit_struct_foo_t") + for i=1,100 do + s.a = s.a + 1 + s.b = s.b + 2 + s.c = s.c + 3 + end + assert(s.a == 100) + assert(s.b == 200) + assert(s.c == 300) +end + +do --- constants from structure + local s = ffi.new("jit_struct_foo_t") + local s2 = ffi.new("jit_struct_foo2_t", 1, 2, 3) + for i=1,100 do + s.a = s.a + s2.a + s.b = s.b + s2.b + s.c = s.c + s2.c + end + assert(s.a == 100) + assert(s.b == 200) + assert(s.c == 300) +end + +do --- adding to array elements + local s = ffi.new("jit_struct_sarr_t") + for i=1,100 do + s.a[5] = s.a[5] + 1 + s.b[5] = s.b[5] + 2 + end + assert(s.a[5] == 100) + assert(s.b[5] == 200) +end + +do --- double indexing + local s = ffi.new([[ + struct { + struct { + int x; + int b[10]; + } a[100]; + }]]) + s.a[10].b[4] = 10 + s.a[95].b[4] = 95 + local x = 0 + for i=1,100 do + x = x + s.a[i-1].b[4] -- reassociate offsets for base and index + end + assert(x == 105) +end + +do --- structurally identical + local s1 = ffi.new("struct { int a; }") + local s2 = ffi.new("struct { int a; }") + local x = 0 + for j=1,2 do + for i=1,100 do + s2.a = i + s1.a = 1 + x = x + s2.a -- cannot forward across aliasing store + end + if j == 1 then + assert(x == 5050) + s2 = s1 + x = 0 + else + assert(x == 100) + end + end +end + +do --- structurally different + local s1 = ffi.new("struct { int a; }") + local s2 = ffi.new("struct { char a; }") + local x = 0 + for j=1,2 do + for i=1,100 do + s2.a = i + s1.a = 1 + x = x + s2.a -- can forward across aliasing store + end + if j == 1 then + assert(x == 5050) + s2 = s1 -- this will cause a side trace + x = 0 + else + assert(x == 100) + end + end +end + +do --- union + local s = ffi.new("union { uint8_t a; int8_t b; }") + local x = 0 + for i=1,200 do + s.a = i + x = x + s.b -- same offset, but must not alias (except if sign-extended) + end + assert(x == 1412) +end + +do --- circular chain + local s1 = ffi.new("jit_struct_chain_t") + local s2 = ffi.new("jit_struct_chain_t") + local s3 = ffi.new("jit_struct_chain_t") + s1.next = s2 + s2.next = s3 + s3.next = s1 + local p = s1 + for i=1,99 do + p.v = i + p = p.next + end + assert(s1.v == 97) + assert(s2.v == 98) + assert(s3.v == 99) +end + +do --- int struct initialiser + local ct = ffi.typeof("struct { int a,b,c; }") + local x,y,z = 0,0,0 + for i=1,100 do + local s = ct(i, i+1) + x = x + s.a + y = y + s.b + z = z + s.c + end + assert(x == 5050) + assert(y == 5150) + assert(z == 0) +end + +do --- double struct initialiser + local ct = ffi.typeof("struct { double a,b,c; }") + local x,y,z = 0,0,0 + for i=1,100 do + local s = ct(i, i+1) + x = x + s.a + y = y + s.b + z = z + s.c + end + assert(x == 5050) + assert(y == 5150) + assert(z == 0) +end + +do --- pointer / int struct initialiser + local s1 = ffi.new("jit_struct_chain_t") + local s + for i=1,100 do + s = ffi.new("jit_struct_chain_t", s1, i) + end + assert(tonumber(ffi.cast("int", s.next)) == + tonumber(ffi.cast("int", ffi.cast("jit_struct_chain_t *", s1)))) + assert(s.v == 100) +end + +do --- unstable pointer/int type struct initialiser + local ct = ffi.typeof("struct { int *p; int y; }") + local s + for i=1,200 do + if i == 100 then ct = ffi.typeof("jit_struct_chain_t") end + s = ct(nil, 10) + end + assert(s.v == 10) +end + +do --- upvalued int box + local s = ffi.new("struct { int x; }", 42) + local function f() + for i=1,100 do + s.x = i + assert(s.x == i) + end + end + f() +end + diff --git a/testsuite/test/lib/ffi/meta_tostring.lua b/testsuite/test/lib/ffi/meta_tostring.lua new file mode 100644 index 0000000000..bb065e16ab --- /dev/null +++ b/testsuite/test/lib/ffi/meta_tostring.lua @@ -0,0 +1,55 @@ +local ffi = require("ffi") + +ffi.cdef[[ +typedef union meta_tostring_foo_t { + int64_t i64; + uint64_t u64; + complex cd; + double d[2]; + complex float cf; + float f[2]; +} meta_tostring_foo_t; +]] + +do --- tostring/typeof semi-roundtrip + assert(tostring(ffi.typeof("int (*(*[1][2])[3][4])[5][6]")) == + "ctype") + assert(tostring(ffi.typeof("int (*const)(void)")) == + "ctype") + assert(tostring(ffi.typeof("complex float(*(void))[2]")) == + "ctype") + assert(tostring(ffi.typeof("complex*")) == "ctype") +end + +do --- assorted union fields + local foo_t = ffi.typeof("meta_tostring_foo_t") + local x = foo_t() + + assert(tostring(foo_t) == "ctype") + assert(string.find(tostring(x), "^cdata: ")) + + x.i64 = -1; + assert(tostring(x.i64) == "-1LL") + assert(tostring(x.u64) == "18446744073709551615ULL") + + x.d[0] = 12.5 + x.d[1] = -753.125 + assert(tostring(x.cd) == "12.5-753.125i") + x.d[0] = -12.5 + x.d[1] = 753.125 + assert(tostring(x.cd) == "-12.5+753.125i") + x.d[0] = 0/-1 + x.d[1] = 0/-1 + assert(tostring(x.cd) == "-0-0i") + x.d[0] = 1/0 + x.d[1] = -1/0 + assert(tostring(x.cd) == "inf-infI") + x.d[0] = -1/0 + x.d[1] = 0/0 + assert(tostring(x.cd) == "-inf+nanI") + + x.f[0] = 12.5 + x.f[1] = -753.125 + assert(tostring(x.cf) == "12.5-753.125i") +end + diff --git a/testsuite/test/lib/ffi/redir.lua b/testsuite/test/lib/ffi/redir.lua new file mode 100644 index 0000000000..c492055adc --- /dev/null +++ b/testsuite/test/lib/ffi/redir.lua @@ -0,0 +1,19 @@ +local ffi = require("ffi") + +do --- function + ffi.cdef[[ + int redir_foo(const char *s) asm("strlen"); + ]] + + assert(ffi.C.redir_foo("abcd") == 4) +end + +do --- variable -windows + ffi.cdef[[ + int redir_bar asm("errno"); + ]] + + ffi.C.redir_bar = 14 + assert(ffi.C.redir_bar == 14) + ffi.C.redir_bar = 0 +end diff --git a/testsuite/test/lib/ffi/type_punning.lua b/testsuite/test/lib/ffi/type_punning.lua new file mode 100644 index 0000000000..ac70b4b4cb --- /dev/null +++ b/testsuite/test/lib/ffi/type_punning.lua @@ -0,0 +1,138 @@ +local ffi = require("ffi") + +local u = ffi.new([[ +union { + int8_t i8[8]; + uint8_t u8[8]; + int16_t i16[4]; + uint16_t u16[4]; + int32_t i32[2]; + uint32_t u32[2]; + int64_t i64[1]; + uint64_t u64[1]; + void *v[2]; + float f[2]; + double d[1]; +} +]]) + +do --- float -> u32 type punning at same offset + local x = 0LL + for i=1,100 do + u.f[0] = i + x = x + u.u32[0] + end + assert(x == 110888222720LL) +end + +do --- double -> u64 type punning at same offset + local x = 0LL + for i=1,100 do + u.d[0] = i + x = x + u.u64[0] + end + assert(x == 1886586031403171840ULL) +end + +do --- i8 -> u8 type punning at same offset (fwd -> CONV.int.u8) + local x = 0 + for i=-100,100 do + u.i8[0] = i + x = x + u.u8[0] + end + assert(x == 25600) +end + +do --- p32/p64 -> u64 type punning at same offset (32 bit: different size) + local x = 0LL + u.u64[0] = 0 + for i=-100,150 do + u.v[0] = ffi.cast("void *", ffi.cast("ptrdiff_t", i)) + x = x + u.u64[0] + end + assert(x == (ffi.abi"64bit" and 6275ULL or + (ffi.abi"le" and 0x6400001883ULL or 0x188300000000ULL))) +end + +do --- u16 -> u8 type punning at overlapping offsets [0] + local x = 0 + for i=255,520 do + u.u16[0] = i + x = x + u.u8[0] + end + assert(x == (ffi.abi"be" and 274 or 32931)) +end + +do --- u16 -> u8 type punning at overlapping offsets [1] + local x = 0 + for i=255,520 do + u.u16[0] = i + x = x + u.u8[1] + end + assert(x == (ffi.abi"le" and 274 or 32931)) +end + +do --- i16 -> i32 type punning at overlapping offsets [0] + local x = 0 + u.i32[0] = 0 + for i=-100,150 do + u.i16[0] = i + x = x + u.i32[0] + end + assert(x == (ffi.abi"be" and 411238400 or 6559875)) +end + +do --- i16 -> i32 type punning at overlapping offsets [1] + local x = 0 + u.i32[0] = 0 + for i=-100,150 do + u.i16[1] = i + x = x + u.i32[0] + end + assert(x == (ffi.abi"le" and 411238400 or 6559875)) +end + +do --- double -> i32 type punning at overlapping offsets [0] + local x = 0 + for i=1.5,120,1.1 do + u.d[0] = i + x = x + u.i32[0] + end + assert(x == (ffi.abi"be" and 116468870297 or -858993573)) +end + +do --- double -> i32 type punning at overlapping offsets [1] + local x = 0 + for i=1.5,120,1.1 do + u.d[0] = i + x = x + u.i32[1] + end + assert(x == (ffi.abi"le" and 116468870297 or -858993573)) +end + +do --- u32 -> u64 type punning, constify u, 32 bit SPLIT: fold KPTR + local u = ffi.new("union { struct { uint32_t lo, hi; }; uint64_t u64; }") + + local function conv(lo, hi) + u.lo = lo + u.hi = hi + return u.u64 + end + + local x = 0ll + for i=1,100 do + x = x + conv(i, i) + end + assert(x == 21689584849850ULL) +end + +do --- u64 -> u32 -> u64 type punning with KPTR + local s = ffi.new("union { int64_t q; int32_t i[2]; }") + local function f() + s.q = 0 + s.i[1] = 1 + return s.q + end + for i=1,50 do f() f() f() end + assert(f() ~= 0) +end diff --git a/testsuite/test/lib/index b/testsuite/test/lib/index new file mode 100644 index 0000000000..cc9d7d7301 --- /dev/null +++ b/testsuite/test/lib/index @@ -0,0 +1,8 @@ +base +bit.lua +bit +math +string +table +coroutine +ffi +ffi +contents.lua \ No newline at end of file diff --git a/testsuite/test/lib/math/abs.lua b/testsuite/test/lib/math/abs.lua new file mode 100644 index 0000000000..4223a78062 --- /dev/null +++ b/testsuite/test/lib/math/abs.lua @@ -0,0 +1,16 @@ +local abs = math.abs +local expect_error = require"common.expect_error" + +do --- smoke + assert(abs(-1.5) == 1.5) + assert(abs("-1.5") == 1.5) +end + +do --- argcheck + expect_error(function() abs() end, + "bad argument #1 to 'abs' (number expected, got no value)") + expect_error(function() abs(false) end, + "bad argument #1 to 'abs' (number expected, got boolean)") + expect_error(function() abs("a") end, + "bad argument #1 to 'abs' (number expected, got string)") +end diff --git a/testsuite/test/lib/math/constants.lua b/testsuite/test/lib/math/constants.lua new file mode 100644 index 0000000000..ec35b4cecb --- /dev/null +++ b/testsuite/test/lib/math/constants.lua @@ -0,0 +1,8 @@ +do --- pi + assert(math.pi == 3.141592653589793) +end + +do --- huge + assert(math.huge > 0) + assert(1/math.huge == 0) +end diff --git a/testsuite/test/lib/math/index b/testsuite/test/lib/math/index new file mode 100644 index 0000000000..944e1aebd4 --- /dev/null +++ b/testsuite/test/lib/math/index @@ -0,0 +1,3 @@ +abs.lua +constants.lua +random.lua diff --git a/testsuite/test/lib/math/random.lua b/testsuite/test/lib/math/random.lua new file mode 100644 index 0000000000..dc2ca00bda --- /dev/null +++ b/testsuite/test/lib/math/random.lua @@ -0,0 +1,47 @@ +local random = math.random +local MAX_SEED = 10 + +do --- generally uniform in range [0, 1) + local N = 100 + local min, max = math.min, math.max + for j=1,MAX_SEED do + math.randomseed(j) + local lo, hi, sum = math.huge, -math.huge, 0 + for i=1,N do + local x = random() + assert(0 <= x and x < 1) + sum = sum + x + lo = min(lo, x) + hi = max(hi, x) + end + assert(lo*N < 15 and (1-hi)*N < 15) + assert(sum > N*0.45 and sum < N*0.55) + end +end + +do --- all in range [1, 10] + math.randomseed(1) + local counts = setmetatable({}, {__index = function() return 0 end}) + for i = 1, 100 do + local n = random(10) + counts[n] = counts[n] + 1 + end + for i = 1, 10 do + assert(counts[i]) + counts[i] = nil + end + assert(not next(counts)) +end + +do --- all in range [-3, 11] + math.randomseed(1) + local seen = setmetatable({}, {__index = function() return false end}) + for i = 1, 120 do + seen[random(-3, 11)] = true + end + for i = -3, 11 do + assert(seen[i]) + seen[i] = nil + end + assert(not next(seen)) +end diff --git a/testsuite/test/lib/string/byte.lua b/testsuite/test/lib/string/byte.lua new file mode 100644 index 0000000000..697a2c2e72 --- /dev/null +++ b/testsuite/test/lib/string/byte.lua @@ -0,0 +1,92 @@ +local band, bor = bit and bit.band, bit and bit.bor +local byte = string.byte + +do --- simple + local a, b = ("foo"):byte(1) + assert(type(a) == "number") + assert(b == nil) + local c, d = ("foo"):byte(2, 3) + assert(type(c) == "number") + assert(c == d) + assert(c ~= a) +end + +do --- Fixed slice [i,i+k] or overflow +bit + local s = "abcdefg" + local x,y,z + for j=100,107 do + for i=1,j do x,y,z = byte("abcdefg", band(i, 7), band(i+2, 7)) end + local a,b,c = byte("abcdefg", band(j, 7), band(j+2, 7)) + assert(x == a and y == b and z == c) + end + for j=100,107 do + for i=1,j do x,y,z = byte(s, band(i, 7), band(i+2, 7)) end + local a,b,c = byte(s, band(j, 7), band(j+2, 7)) + assert(x == a and y == b and z == c) + end +end + +do --- Positive slice [i,len] or overflow +bit + local s = "abc" + local x,y,z + for j=100,107 do + for i=1,j do x,y,z = byte("abc", band(i, 7), -1) end + local a,b,c = byte("abc", band(j, 7), -1) + assert(x == a and y == b and z == c) + end + for j=100,107 do + for i=1,j do x,y,z = byte(s, band(i, 7), -1) end + local a,b,c = byte(s, band(j, 7), -1) + assert(x == a and y == b and z == c) + end +end + +do --- Negative slice [-i,len] or underflow +bit + local s = "abc" + local x,y,z + for j=-100,-107,-1 do + for i=-1,j,-1 do x,y,z = byte("abc", bor(i, -8), -1) end + local a,b,c = byte("abc", bor(j, -8), -1) + assert(x == a and y == b and z == c) + end + for j=-100,-107,-1 do + for i=-1,j,-1 do x,y,z = byte(s, bor(i, -8), -1) end + local a,b,c = byte(s, bor(j, -8), -1) + assert(x == a and y == b and z == c) + end +end + +do --- Positive slice [1,i] or overflow +bit + local s = "abc" + local x,y,z + for j=100,107 do + for i=1,j do x,y,z = byte("abc", 1, band(i, 7)) end + local a,b,c = byte("abc", 1, band(j, 7)) + assert(x == a and y == b and z == c) + end + for j=100,107 do + for i=1,j do x,y,z = byte(s, 1, band(i, 7)) end + local a,b,c = byte(s, 1, band(j, 7)) + assert(x == a and y == b and z == c) + end +end + +do --- Negative slice [1,-i] or underflow +bit + local s = "abc" + local x,y,z + for j=-100,-107,-1 do + for i=-1,j,-1 do x,y,z = byte("abc", 1, bor(i, -8)) end + local a,b,c = byte("abc", 1, bor(j, -8)) + assert(x == a and y == b and z == c) + end + for j=-100,-107,-1 do + for i=-1,j,-1 do x,y,z = byte(s, 1, bor(i, -8)) end + local a,b,c = byte(s, 1, bor(j, -8)) + assert(x == a and y == b and z == c) + end +end + +do --- Check for slot stack overflow + local s = string.rep("x", 500) + for i=1,100 do byte(s, 1, 500) end +end diff --git a/testsuite/test/lib/string/char.lua b/testsuite/test/lib/string/char.lua new file mode 100644 index 0000000000..544767de03 --- /dev/null +++ b/testsuite/test/lib/string/char.lua @@ -0,0 +1,29 @@ +local char = string.char + +do --- jit one char + local y + for i=1,100 do y = char(65) end + assert(y == "A") + local x = 97 + for i=1,100 do y = char(x) end + assert(y == "a") + x = "98" + for i=1,100 do y = char(x) end + assert(y == "b") + for i=1,100 do y = char(32+i) end + assert(y == "\132") +end + +do --- jit until out of bounds + local y + assert(not pcall(function() + for i=1,200 do y = char(100+i) end + end)) + assert(y == "\255") +end + +do --- jit five chars + local y + for i=1,100 do y = char(65, 66, i, 67, 68) end + assert(y == "ABdCD") +end diff --git a/testsuite/test/lib/string/dump.lua b/testsuite/test/lib/string/dump.lua new file mode 100644 index 0000000000..216c6eb8bf --- /dev/null +++ b/testsuite/test/lib/string/dump.lua @@ -0,0 +1,31 @@ +local loadstring = loadstring or load + +do --- Must unpatch modified bytecode with ILOOP/JLOOP etc. + local function foo() + local t = {} + for i=1,100 do t[i] = i end + for a,b in ipairs(t) do end + local m = 0 + while m < 100 do m = m + 1 end + end + + local d1 = string.dump(foo) + foo() + assert(string.dump(foo) == d1) + if jit then jit.off(foo) end + foo() + assert(string.dump(foo) == d1) + local d2 = string.dump(loadstring(d1, ""), true) + local d3 = string.dump(assert(loadstring(d2, "")), true) + assert(d2 == d3) + assert(loadstring(string.dump(assert(loadstring(d2, ""))))) +end + +do --- roundtrip constants + local function f1() return -0x80000000 end + local function f2() return 0.971234567 end + assert(f1() == -0x80000000) + assert(loadstring(string.dump(f1), "")() == -0x80000000) + assert(f2() == 0.971234567) + assert(loadstring(string.dump(f2), "")() == 0.971234567) +end diff --git a/testsuite/test/lib/string/format/index b/testsuite/test/lib/string/format/index new file mode 100644 index 0000000000..4408853633 --- /dev/null +++ b/testsuite/test/lib/string/format/index @@ -0,0 +1 @@ +num.lua diff --git a/testsuite/test/lib/string/format/num.lua b/testsuite/test/lib/string/format/num.lua new file mode 100644 index 0000000000..e8cb33f3ce --- /dev/null +++ b/testsuite/test/lib/string/format/num.lua @@ -0,0 +1,184 @@ +local format, type, tonumber = string.format, type, tonumber + +local function check(input, fstr, output, inputN) + local actual = format(fstr, inputN or tonumber(input)) + if actual == output then return end + local t = type(output) + if t == "string" then + if output:find"[[%]]" then + local s, e = actual:find((output:gsub("%.", "%%."))) + if s == 1 and e == #actual then return end + end + end + error(format("expected string.format(%q, %q) == %q, but got %q", + fstr, input, output, actual)) +end + +do --- small denormals at low precision +hexfloat !lex + assert(("%.9e"):format(0x1.0E00D1p-1050) == "8.742456525e-317") + assert(("%.13e"):format(0x1.1Cp-1068) == "3.5078660854729e-322") +end + +do --- smoke + local cases = { + -- input, %e, %f, %g + { "0", "0.000000e+00", "0.000000", "0"}, + { "1", "1.000000e+00", "1.000000", "1"}, + { "0.5", "5.000000e-01", "0.500000", "0.5"}, + { "123", "1.230000e+02", "123.000000", "123"}, + {"0.0078125", "7.812500e-03", "0.00781[23]", "0.0078125"}, + { "1.109375", "1.109375e+00", "1.109375", "1.1093[78]"}, + { "0.999995", "9.999950e-01", "0.999995", "0.999995"}, + {"0.9999995", "9.999995e-01", "1.000000", "1"}, + { "99999.95", "9.999995e+04", "99999.950000", "99999.9"}, + {"999999.95", "9.999999e+05", "999999.950000", "1e+06"}, + {"123456978", "1.234570e+08", "123456978.000000", "1.23457e+08"}, + { "33.3", "3.330000e+01", "33.300000", "33.3"}, + } + for _, t in ipairs(cases) do + local n = tonumber(t[1]) + check(t[1], "%e", t[2], n) + check(t[1], "%f", t[3], n) + check(t[1], "%g", t[4], n) + end +end + +do --- easily enumerable cases of %a, %A +hexfloat + for i = 1, 16 do + check(1+(i-1)/16, "%.1a", "0x1.".. ("0123456789abcdef"):sub(i,i) .."p+0") + check(16+(i-1), "%.1A", "0X1.".. ("0123456789ABCDEF"):sub(i,i) .."P+4") + end +end + +do --- easily enumerable cases of %f + for i = 1, 16 do + check(("1"):rep(i), "%#2.0f", ("1"):rep(i)..".") + end +end + +do --- easily enumerable cases of %e + local z, f, c = ("0"):byte(), math.floor, string.char + for p = 0, 14 do + local head = "1.".. ("0"):rep(p) + local fmt = "%#.".. c(z + f(p / 10), z + (p % 10)) .."e" + for i = 1, 99 do + local istr = c(z + f(i / 10), z + (i % 10)) + check("1e-".. istr, fmt, head .."e-".. istr) + check("1e+".. istr, fmt, head .."e+".. istr) + end + for i = 100, 308 do + local istr = c(z + f(i / 100), z + f(i / 10) % 10, z + (i % 10)) + check("1e-".. istr, fmt, head .."e-".. istr) + check("1e+".. istr, fmt, head .."e+".. istr) + end + end +end + +do --- assorted + check("0", "%.14g", "0") + check("1e-310", "%.0g", "1e-310") + check("1e8", "%010.5g", "000001e+08") + check("1e8", "% -10.5g", " 1e+08 ") + check("4e123", "%+#.0e", "+4.e+123") + check("1e49", "%.0f", "9999999999999999464902769475481793196872414789632") + check("1e50", "%.0f", "100000000000000007629769841091887003294964970946560") + check("1e50", "%.35g", "1.00000000000000007629769841091887e+50") + check("1e50", "%40.35g", " 1.00000000000000007629769841091887e+50") + check("1e50", "%#+40.34g", "+1.000000000000000076297698410918870e+50") + check("1e50", "%-40.35g", "1.00000000000000007629769841091887e+50 ") + check("0.5", "%.0f", "[01]") + check("0.25", "%.1f", "0.[23]") + check("999999.95", "%.7g", "999999.9") + check("999.99995", "%.7g", "1000") + check("6.9039613742e-314", "%.3e", "6.904e-314") + + check(1e-323, "%.99g", "9.8813129168249308835313758573644274473011960522864".. + "9528851171365001351014540417503730599672723271985e-324") + check(1e308, "%.99f", "1000000000000000010979063629440455417404923096773118".. + "463368106829031575854049114915371633289784946888990612496697211725".. + "156115902837431400883283070091981460460312716645029330271856974896".. + "995885590433383844661650011784268976262129451776280911957867074581".. + "22783970171784415105291802893207873272974885715430223118336.000000".. + "000000000000000000000000000000000000000000000000000000000000000000".. + "000000000000000000000000000") + check("1", "%.99f", "1."..("0"):rep(99)) + check("5", "%99g", (" "):rep(98).."5") + check("5", "%099g", ("0"):rep(98).."5") + check("5", "%-99g", "5".. (" "):rep(98)) + check("5", "%0-99g", "5".. (" "):rep(98)) + + check((2^53-1)*2^971, "%e", "1.797693e+308") + check((2^53-1)*2^971, "%.0e", "2e+308") + + check("0", "%.14g", "0") + + check("0.15", "%.1f", "0.1") + check("0.45", "%.1f", "0.5") + check("0.55", "%.1f", "0.6") + check("0.85", "%.1f", "0.8") +end + +do --- assorted %a +luajit>=2.1 + check((2^53-1)*2^971, "%a", "0x1.fffffffffffffp+1023") + check((2^53-1)*2^971, "%.0a", "0x2p+1023") + check("0", "%a", "0x0p+0") + check("1.53173828125", "%1.8a", "0x1.88200000p+0") + check("1.53173828125", "%8.1a", "0x1.9p+0") -- libc on OSX gets this wrong + check("1.5317", "%8.1a", "0x1.9p+0") + check("1.53", "%8.1a", "0x1.8p+0") + check("-1.5", "%11.2a", " -0x1.80p+0") + check("3.14159265358", "%a", "0x1.921fb5443d6f4p+1") + check("3.14159265358", "%A", "0X1.921FB5443D6F4P+1") +end + +do --- Cases where inprecision can easily affect rounding + check("2.28579528986935e-262", "%.14g", "2.2857952898694e-262") + check("4.86009084710405e+243", "%.14g", "4.8600908471041e+243") + check("6.28108398359615e+258", "%.14g", "6.2810839835962e+258") + check("4.29911075733405e+250", "%.14g", "4.2991107573341e+250") + check("8.5068432121065e+244", "% .13g", " 8.506843212107e+244") + check("8.1919113161235899e+233", "%.40g", "8.191911316123589934222156598061".. + "949037266e+233") + check("7.1022381748280933e+272", "%.40g", "7.102238174828093393858336547341".. + "897013319e+272") + check("5.8018368514358030e+261", "%.67g", "5.801836851435803025936253580958".. + "042578728799220447411839451694590343e+261") + check("7.9225909325493999e-199", "%.26g", "7.922590932549399935196127e-199") + check("2.4976643533685383e-153", "%.43g", "2.497664353368538321643894302495".. + "469512999562e-153") + check("9.796500001282779e+222", "%.4g", "9.797e+222") + check("7.7169235e-227", "%e", "7.716923e-227") + check("7.7169235000000044e-227", "%e", "7.716924e-227") + check("5.3996444915000004e+87", "%.9e", "5.399644492e+87") + check("2.03037546395e-49", "%.10e", "2.0303754640e-49") + check("3.38759425741500027e+65", "%.11e", "3.38759425742e+65") + check("1.013960434983135e-66", "%.0e", "1e-66") + check("1.32423054454835e-204", "%.13e", "1.3242305445484e-204") + check("5.9005060812045502e+100", "%.13e", "5.9005060812046e+100") +end + +do --- ExploringBinary.com/print-precision-of-dyadic-fractions-varies-by-language/ + check(5404319552844595/2^53, "%.53g", "0.5999999999999999777955395074968691".. + "9152736663818359375") + check(2^-1074, "%.99e", "4.940656458412465441765687928682213723650598026143".. + "247644255856825006755072702087518652998363616359924e-324") + check(1-2^-53, "%1.53f", "0.99999999999999988897769753748434595763683319091".. + "796875") +end + +do --- ExploringBinary.com/incorrect-floating-point-to-decimal-conversions/ + check("1.0551955", "%.7g", "1.055195") + check("8.330400913327153", "%.15f", "8.330400913327153") + check("9194.25055964485", "%.14g", "9194.2505596449") + check("816.2665949149578", "%.16g", "816.2665949149578") + check("95.47149571505499", "%.16g", "95.47149571505498") +end + +do --- big f +luajit>=2.1 + check("9.522938016739373", "%.15F", "9.522938016739372") +end + +do --- RandomASCII.wordpress.com/2013/02/07/ + check("6.10351562e-05", "%1.8e", "6.1035156[23]e%-05") + check("4.3037358649999999e-15", "%1.8e", "4.30373586e-15") +end diff --git a/testsuite/test/lib/string/index b/testsuite/test/lib/string/index new file mode 100644 index 0000000000..c0638e9c34 --- /dev/null +++ b/testsuite/test/lib/string/index @@ -0,0 +1,11 @@ +metatable.lua +byte.lua +char.lua +dump.lua +format +len.lua +lower_upper.lua +multiple_functions.lua +rep.lua +reverse.lua +sub.lua diff --git a/testsuite/test/lib/string/len.lua b/testsuite/test/lib/string/len.lua new file mode 100644 index 0000000000..8ed7e8ae49 --- /dev/null +++ b/testsuite/test/lib/string/len.lua @@ -0,0 +1,14 @@ +local len = string.len +local expect_error = require"common.expect_error" + +do --- smoke + assert(len("abc") == 3) + assert(len(123) == 3) +end + +do --- argcheck + expect_error(function() len() end, + "bad argument #1 to 'len' (string expected, got nil)") + expect_error(function() len(false) end, + "bad argument #1 to 'len' (string expected, got boolean)") +end diff --git a/testsuite/test/lib/string/lower_upper.lua b/testsuite/test/lib/string/lower_upper.lua new file mode 100644 index 0000000000..7370c44ce1 --- /dev/null +++ b/testsuite/test/lib/string/lower_upper.lua @@ -0,0 +1,51 @@ +do --- smoke + assert(("abc123DEF_<>"):lower() == "abc123def_<>") + assert(("abc123DEF_<>"):upper() == "ABC123DEF_<>") +end + +do --- repeated + local l = "the quick brown fox..." + local u = "THE QUICK BROWN FOX..." + local s = l + for i = 1, 75 do + s = s:upper() + assert(s == u) + s = s:lower() + assert(s == l) + end +end + +do --- repeated with growing string + local y, z + local x = "aBcDe" + for i=1,100 do + y = string.upper(x) + z = y.."fgh" + end + assert(y == "ABCDE") + assert(z == "ABCDEfgh") +end + +do --- misc upper + local y + for i=1,100 do y = string.upper("aBc9") end + assert(y == "ABC9") + local x = ":abCd+" + for i=1,100 do y = string.upper(x) end + assert(y == ":ABCD+") + x = 1234 + for i=1,100 do y = string.upper(x) end + assert(y == "1234") +end + +do --- misc lower + local y + for i=1,100 do y = string.lower("aBc9") end + assert(y == "abc9") + local x = ":abcd+" + for i=1,100 do y = string.lower(x) end + assert(y == ":abcd+") + x = 1234 + for i=1,100 do y = string.lower(x) end + assert(y == "1234") +end diff --git a/testsuite/test/lib/string/metatable.lua b/testsuite/test/lib/string/metatable.lua new file mode 100644 index 0000000000..d39ed43264 --- /dev/null +++ b/testsuite/test/lib/string/metatable.lua @@ -0,0 +1,3 @@ +do --- __index metamethod is string library + assert(debug.getmetatable("").__index == string) +end diff --git a/testsuite/test/lib/string/multiple_functions.lua b/testsuite/test/lib/string/multiple_functions.lua new file mode 100644 index 0000000000..7b9d0f1383 --- /dev/null +++ b/testsuite/test/lib/string/multiple_functions.lua @@ -0,0 +1,16 @@ +do --- string_op + local t, y = {}, {} + for i=1,100 do t[i] = string.char(i, 16+i, 32+i) end + for i=1,100 do t[i] = string.reverse(t[i]) end + assert(t[100] == "\132\116\100") + for i=1,100 do t[i] = string.reverse(t[i]) end + for i=1,100 do assert(t[i] == string.char(i, 16+i, 32+i)) end + for i=1,100 do y[i] = string.upper(t[i]) end + assert(y[65] == "AQA") + assert(y[97] == "AQ\129") + assert(y[100] == "DT\132") + for i=1,100 do y[i] = string.lower(t[i]) end + assert(y[65] == "aqa") + assert(y[97] == "aq\129") + assert(y[100] == "dt\132") +end diff --git a/testsuite/test/lib/string/rep.lua b/testsuite/test/lib/string/rep.lua new file mode 100644 index 0000000000..550c15b8a8 --- /dev/null +++ b/testsuite/test/lib/string/rep.lua @@ -0,0 +1,68 @@ +local rep = string.rep + +do --- smoke + assert(("p"):rep(0) == "") + assert(("a"):rep(3) == "aaa") + assert(("x\0z"):rep(4) == "x\0zx\0zx\0zx\0z") +end + +do --- versus concat + local s = "" + for i = 1, 75 do + s = s .. "{}" + assert(s == ("{}"):rep(i)) + end +end + +do --- misc + local y + for i=1,100 do y = rep("a", 10) end + assert(y == "aaaaaaaaaa") + for i=1,100 do y = rep("ab", 10) end + assert(y == "abababababababababab") + local x = "a" + for i=1,100 do y = rep(x, 10) end + assert(y == "aaaaaaaaaa") + local n = 10 + for i=1,100 do y = rep(x, n) end + assert(y == "aaaaaaaaaa") + x = "ab" + for i=1,100 do y = rep(x, n) end + assert(y == "abababababababababab") + x = 12 + n = "10" + for i=1,100 do y = rep(x, n) end + assert(y == "12121212121212121212") +end + +do --- separator +goto + local y + for i=1,100 do y = rep("ab", 10, "c") end + assert(y == "abcabcabcabcabcabcabcabcabcab") +end + +do --- iterate to table + local t = {} + for i=1,100 do t[i] = rep("ab", i-85) end + assert(t[100] == "ababababababababababababababab") +end + +do --- iterate to table with sep +goto + local t = {} + for i=1,100 do t[i] = rep("ab", i-85, "c") end + assert(t[85] == "") + assert(t[86] == "ab") + assert(t[87] == "abcab") + assert(t[100] == "abcabcabcabcabcabcabcabcabcabcabcabcabcabcab") +end + +do --- iterate and concat + local y, z + local x = "ab" + for i=1,100 do + y = rep(x, i-90) + z = y.."fgh" + end + assert(y == "abababababababababab") + assert(z == "ababababababababababfgh") +end diff --git a/testsuite/test/lib/string/reverse.lua b/testsuite/test/lib/string/reverse.lua new file mode 100644 index 0000000000..deaade7cef --- /dev/null +++ b/testsuite/test/lib/string/reverse.lua @@ -0,0 +1,13 @@ +local reverse = string.reverse + +do --- misc + local y + for i=1,100 do y = reverse("abc") end + assert(y == "cba") + local x = "abcd" + for i=1,100 do y = reverse(x) end + assert(y == "dcba") + x = 1234 + for i=1,100 do y = reverse(x) end + assert(y == "4321") +end diff --git a/testsuite/test/lib/string/sub.lua b/testsuite/test/lib/string/sub.lua new file mode 100644 index 0000000000..ecb80216c9 --- /dev/null +++ b/testsuite/test/lib/string/sub.lua @@ -0,0 +1,189 @@ +local band, bor = bit and bit.band, bit and bit.bor +local sub = string.sub +local expect_error = require"common.expect_error" + +do --- smoke + assert(sub("abc", 2) == "bc") + assert(sub(123, "2") == "23") +end + +do --- argcheck + expect_error(function() sub("abc", false) end, + "bad argument #2 to 'sub' (number expected, got boolean)") + expect_error(function() ("abc"):sub(false) end, + "bad argument #1 to 'sub' (number expected, got boolean)") +end + +do --- all bar substrings + local subs = { + {"b", "ba", "bar"}, + { "", "a", "ar"}, + { "", "", "r"} + } + for i = 1, 3 do + for j = 1, 3 do + assert(sub("bar", i, j) == subs[i][j]) + assert(sub("bar", -4+i, j) == subs[i][j]) + assert(sub("bar", i, -4+j) == subs[i][j]) + assert(sub("bar", -4+i, -4+j) == subs[i][j]) + end + end +end + +do --- Positive slice [i,len] or overflow +bit + local s = "abc" + local x + for j=100,107 do + for i=1,j do x = sub("abc", band(i, 7)) end + assert(x == sub("abc", band(j, 7))) + end + for j=100,107 do + for i=1,j do x = sub(s, band(i, 7)) end + assert(x == sub(s, band(j, 7))) + end +end + +do --- Negative slice [-i,len] or underflow +bit + local s = "abc" + local x + for j=-100,-107,-1 do + for i=-1,j,-1 do x = sub("abc", bor(i, -8)) end + assert(x == sub("abc", bor(j, -8))) + end + for j=-100,-107,-1 do + for i=-1,j,-1 do x = sub(s, bor(i, -8)) end + assert(x == sub(s, bor(j, -8))) + end +end + +do --- Positive slice [1,i] or overflow +bit + local s = "abc" + local x + for j=100,107 do + for i=1,j do x = sub("abc", 1, band(i, 7)) end + assert(x == sub("abc", 1, band(j, 7))) + end + for j=100,107 do + for i=1,j do x = sub(s, 1, band(i, 7)) end + assert(x == sub(s, 1, band(j, 7))) + end +end + +do --- Negative slice [1,-i] or underflow +bit + local s = "abc" + local x + for j=-100,-107,-1 do + for i=-1,j,-1 do x = sub("abc", 1, bor(i, -8)) end + assert(x == sub("abc", 1, bor(j, -8))) + end + for j=-100,-107,-1 do + for i=-1,j,-1 do x = sub(s, 1, bor(i, -8)) end + assert(x == sub(s, 1, bor(j, -8))) + end +end + +do --- jit sub 1 eq + local s = "abcde" + local x = 0 + for i=1,100 do + if sub(s, 1, 1) == "a" then x = x + 1 end + end + assert(x == 100) +end + +do --- jit sub 1 ne (contents) + local s = "abcde" + local x = 0 + for i=1,100 do + if sub(s, 1, 1) == "b" then x = x + 1 end + end + assert(x == 0) +end + +do --- jit sub 1 ne (rhs too long) + local s = "abcde" + local x = 0 + for i=1,100 do + if sub(s, 1, 1) == "ab" then x = x + 1 end + end + assert(x == 0) +end + +do --- jit sub 1,2 ne + local s = "abcde" + local x = 0 + for i=1,100 do + if sub(s, 1, 2) == "a" then x = x + 1 end + end + assert(x == 0) +end + +do --- jit sub 1,k eq + local s = "abcde" + local x = 0 + local k = 1 + for i=1,100 do + if sub(s, 1, k) == "a" then x = x + 1 end + end + assert(x == 100) +end + +do --- jit sub 1,k ne (contents) + local s = "abcde" + local x = 0 + local k = 1 + for i=1,100 do + if sub(s, 1, k) == "b" then x = x + 1 end + end + assert(x == 0) +end + +do --- jit sub 1,k ne (rhs too long) + local s = "abcde" + local x = 0 + local k = 1 + for i=1,100 do + if sub(s, 1, k) == "ab" then x = x + 1 end + end + assert(x == 0) +end + +do --- jit sub 1,2 eq + local s = "abcde" + local x = 0 + for i=1,100 do + if sub(s, 1, 2) == "ab" then x = x + 1 end + end + assert(x == 100) +end + +do --- jit sub 1,3 eq + local s = "abcde" + local x = 0 + for i=1,100 do + if sub(s, 1, 3) == "abc" then x = x + 1 end + end + assert(x == 100) +end + +do --- jit sub 1,4 eq + local s = "abcde" + local x = 0 + for i=1,100 do + if sub(s, 1, 4) == "abcd" then x = x + 1 end + end + assert(x == 100) +end + +do --- jit sub i,i + local t = {} + local line = string.rep("..XX", 100) + local i = 1 + local c = line:sub(i, i) + while c ~= "" and c ~= "Z" do + t[i] = c == "X" and "Y" or c + i = i + 1 + c = line:sub(i, i) + end + assert(table.concat(t) == string.rep("..YY", 100)) +end diff --git a/testsuite/test/lib/table/concat.lua b/testsuite/test/lib/table/concat.lua new file mode 100644 index 0000000000..1f2a2f924d --- /dev/null +++ b/testsuite/test/lib/table/concat.lua @@ -0,0 +1,55 @@ +local concat, assert, pcall = table.concat, assert, pcall + +do --- table.concat + local t = {a=1,b=2,c=3,d=4,e=5} + t[1] = 4 + t[3] = 6 + local ok, err = pcall(concat, t, "", 1, 3) + assert(not ok and err:match("index 2 ")) + local q = {} + for i=1,100 do q[i] = {9,8,7} end + q[90] = t + for i=1,100 do + assert(pcall(concat, q[i], "", 1, 3) == (i ~= 90)) + end + t[2] = 5 -- index 1 - 3 in hash part + q[91] = {} + q[92] = {9} + for i=1,100 do q[i] = concat(q[i], "x") end + assert(q[90] == "4x5x6") + assert(q[91] == "") + assert(q[92] == "9") + assert(q[93] == "9x8x7") +end + +do --- table.concat must inhibit CSE and DSE + local t = {1,2,3} + local y, z + for i=1,100 do + y = concat(t, "x", 1, 3) + t[2] = i + z = concat(t, "x", 1, 3) + end + assert(y == "1x99x3") + assert(z == "1x100x3") +end + +do --- table.concat must inhibit CSE and DSE 2 + local y + for i=1,100 do + local t = {1,2,3} + t[2] = 4 + y = concat(t, "x") + t[2] = 9 + end + assert(y == "1x4x3") +end + +do --- table.concat must inhibit CSE and DSE 3 + local t = {[0]={}, {}, {}, {}} + for i=1,30 do + for j=3,0,-1 do + t[j].x = t[j-1] + end + end +end diff --git a/testsuite/test/lib/table/index b/testsuite/test/lib/table/index new file mode 100644 index 0000000000..bd3af0bef1 --- /dev/null +++ b/testsuite/test/lib/table/index @@ -0,0 +1,6 @@ +concat.lua +insert.lua +new.lua +table.new +pack.lua +compat5.2 +remove.lua +sort.lua diff --git a/testsuite/test/lib/table/insert.lua b/testsuite/test/lib/table/insert.lua new file mode 100644 index 0000000000..91d4dd8767 --- /dev/null +++ b/testsuite/test/lib/table/insert.lua @@ -0,0 +1,17 @@ +local tinsert = table.insert +local assert = assert + +do --- table.insert(t,i) + local t = {} + for i=1,100 do t[i] = i end + for i=1,100 do tinsert(t, i) end + assert(#t == 200 and t[100] == 100 and t[200] == 100) +end + +do --- table.insert(t,i,i) + local t = {} + for i=1,200 do t[i] = i end + for i=101,200 do tinsert(t, i, i) end + assert(#t == 300 and t[101] == 101 and t[200] == 200 and t[300] == 200) +end + diff --git a/testsuite/test/lib/table/misc.lua b/testsuite/test/lib/table/misc.lua new file mode 100644 index 0000000000..e0e2fc592f --- /dev/null +++ b/testsuite/test/lib/table/misc.lua @@ -0,0 +1,58 @@ +-- TODO: Organise + +-- ABC elim +-- +opt +abc +do + local s, t = {}, {} + for i=1,100 do t[i] = 1 end + for i=1,100 do s[i] = t end + s[90] = {} + local n = 100 + for i=1,n do s[i][i] = i end +end + +--- TSETM +-- Initialize table with multiple return values +do + local function f(a,b,c) + return a,b,c + end + + local t + + t = {(f(1,2,3))} + assert(t[1] == 1 and t[2] == nil and t[3] == nil) + + t = {f(1,2,3)} + assert(t[1] == 1 and t[2] == 2 and t[3] == 3 and t[4] == nil) + t = {f(1,2,3),} + assert(t[1] == 1 and t[2] == 2 and t[3] == 3 and t[4] == nil) + + t = {f(1,2,3), f(4,5,6)} + assert(t[1] == 1 and t[2] == 4 and t[3] == 5 and t[4] == 6 and t[5] == nil) + + t = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + f(2,3,4)} + assert(t[255] == 1 and t[256] == 2 and t[257] == 3 and t[258] == 4 and t[259] == nil) +end + +--- TSETM 2 +-- Initialize table with function returning 2 constant return values +do + local function f() return 9, 10 end + local t + for i=1,100 do t = { 1, 2, 3, f() } end + assert(t[1] == 1 and t[2] == 2 and t[3] == 3 and t[4] == 9 and t[5] == 10 and + t[6] == nil) +end + + + diff --git a/testsuite/test/lib/table/new.lua b/testsuite/test/lib/table/new.lua new file mode 100644 index 0000000000..483c1298fe --- /dev/null +++ b/testsuite/test/lib/table/new.lua @@ -0,0 +1,11 @@ +local tnew = require"table.new" + +do --- table.new + local x, y + for i=1,100 do + x = tnew(100, 30) + assert(type(x) == "table") + if i == 90 then y = x end + end + assert(x ~= y) +end diff --git a/testsuite/test/lib/table/pack.lua b/testsuite/test/lib/table/pack.lua new file mode 100644 index 0000000000..5bd6ecbe0e --- /dev/null +++ b/testsuite/test/lib/table/pack.lua @@ -0,0 +1,7 @@ +do --- empty + local t = table.pack() + assert(type(t) == "table") + assert(t.n == 0) + assert(t[0] == nil) + assert(t[1] == nil) +end diff --git a/testsuite/test/lib/table/remove.lua b/testsuite/test/lib/table/remove.lua new file mode 100644 index 0000000000..1b24a4fb8c --- /dev/null +++ b/testsuite/test/lib/table/remove.lua @@ -0,0 +1,42 @@ +local tremove = table.remove +local assert = assert + +do --- table.remove(t) removes correct entries + local t = {} + for i=1,200 do t[i] = i end + for i=1,100 do tremove(t) end + assert(#t == 100 and t[100] == 100) +end + +do --- table.remove(t) returns the removed entry + local t = {} + for i=1,200 do t[i] = i end + for i=1,100 do assert(tremove(t) == 201-i) end + assert(#t == 100 and t[100] == 100) +end + +do --- table.remove(t, 1) removes and returns the first entry + local t = {} + for i=1,200 do t[i] = i end + for i=1,100 do assert(tremove(t, 1) == i) end + assert(#t == 100 and t[100] == 200) +end + +do --- TSETR hash part +table.new + local tnew = require"table.new" + local t = tnew(0, 16) + for i=10,1,-1 do t[i] = i+3 end + for i=10,1,-1 do assert(tremove(t) == i+3) end + assert(#t == 0) +end + +do --- TSETR write barrier +table.new + local tnew = require"table.new" + for _, t in ipairs{{}, tnew(0, 16)} do + for i = 1, 10 do t[i] = {i} end + for i = 1, 10 do + collectgarbage() + assert(tremove(t, 1)[1] == i) + end + end +end diff --git a/testsuite/test/lib/table/sort.lua b/testsuite/test/lib/table/sort.lua new file mode 100644 index 0000000000..6a86fcf382 --- /dev/null +++ b/testsuite/test/lib/table/sort.lua @@ -0,0 +1,27 @@ +-- Really a test for lua_lessthan() +local N = 1000 + +do --- numbers + math.randomseed(42) + local t = {} + for i=1,N do t[i] = math.random(N) end + table.sort(t) + for i=2,N do assert(t[i-1] <= t[i]) end +end + +do --- strings + math.randomseed(42) + local t = {} + for i=1,N do t[i] = math.random(1, N/10).."" end + table.sort(t) + for i=2,N do assert(t[i-1] <= t[i]) end +end + +do --- tables + math.randomseed(42) + local mt = { __lt = function(a,b) return a[1] < b[1] end } + local t = {} + for i=1,N do t[i] = setmetatable({ math.random(N) }, mt) end + table.sort(t) + for i=2,N do assert(t[i-1][1] <= t[i][1]) end +end diff --git a/testsuite/test/misc/alias_alloc.lua b/testsuite/test/misc/alias_alloc.lua new file mode 100644 index 0000000000..02fe618d81 --- /dev/null +++ b/testsuite/test/misc/alias_alloc.lua @@ -0,0 +1,54 @@ + +do + local t = {1} + local x + for i=1,100 do + local v = {i} + t[1] = v[1] + x = v[1] + end + assert(x == 100 and t[1] == 100) +end + +do + local t = {1} + local x,y + for i=1,100 do + local v = {i} + local w = {i+1} + x = v[1] + y = w[1] + end + assert(x == 100 and y == 101) +end + +do + local mt = {} + local t = setmetatable({}, mt) + local x + for i=1,100 do + local v = {} + setmetatable(v, getmetatable(t)) + assert(getmetatable(v) == mt) + end +end + +-- See also sink_alloc.lua +do + local x,k={1,2},{3,4} + for i=1,100 do x = {x[1]+k[1], x[2]+k[2]} end + assert(x[1] == 301) + assert(x[2] == 402) +end + +-- FLOAD for tab.asize/tab.array crossing NEWREF. +do + local t = {1} + for i=1,100 do + local v = {} + local w = {} + v[1] = t[1] + w[1] = t[1] + end +end + diff --git a/testsuite/test/misc/api_call.lua b/testsuite/test/misc/api_call.lua new file mode 100644 index 0000000000..7dbd5e4011 --- /dev/null +++ b/testsuite/test/misc/api_call.lua @@ -0,0 +1,98 @@ +local ctest = require("ctest") + +local function ret0() end +local function ret1() return 1 end +local function ret2() return 1,2 end +local function ret3() return 1,2,3 end +local function retva(...) return ... end +local function ret1va(...) return 1,... end + +local function pack(...) + return { n = select('#', ...), ... } +end + +local function ck(res, ...) + local ok = pack(...) + if res.n ~= ok.n then error("nresults wrong: "..res.n.." ~= "..ok.n, 2) end + for i=1,res.n do + if res[i] ~= ok[i] then + error("result["..i.."] wrong: "..tostring(res[i]).." ~= "..tostring(ok[i]), 2) + end + end +end + +local function test_adjust_results(testfunc) + + local function cc(nres, f, ...) + return pack(testfunc(nres, f, ...)) + end + + ck(cc(0, ret0)) + ck(cc(0, ret1)) + ck(cc(0, ret2)) + ck(cc(0, ret3)) + ck(cc(0, retva)) + + ck(cc(1, ret0), nil) + ck(cc(1, ret1), 1) + ck(cc(1, ret2), 1) + ck(cc(1, ret3), 1) + ck(cc(1, retva), nil) + ck(cc(1, retva, 1), 1) + + ck(cc(2, ret0), nil, nil) + ck(cc(2, ret1), 1, nil) + ck(cc(2, ret2), 1, 2) + ck(cc(2, ret3), 1, 2) + ck(cc(2, retva), nil, nil) + ck(cc(2, retva, 1), 1, nil) + ck(cc(2, retva, 1, 2), 1, 2) + + ck(cc(-1, ret0)) + ck(cc(-1, ret1), 1) + ck(cc(-1, ret2), 1, 2) + ck(cc(-1, ret3), 1, 2, 3) + ck(cc(-1, retva)) + ck(cc(-1, retva, 1), 1) + ck(cc(-1, retva, 1, 2), 1, 2) +end + +test_adjust_results(ctest.call) +test_adjust_results(ctest.pcall_err) + + +local function gcshrink() + for i=1,10 do collectgarbage() end +end + +assert(select('#', ctest.call(2000, gcshrink)) == 2000) +gcshrink() +assert(select('#', ctest.call(7000, gcshrink)) == 7000) +gcshrink() + +local function test_yield(resume, yield) + local function inpcall() + ck(pack(yield(6, 7)), 18, 19) + end + local co = coroutine.create(function(...) + ck(pack(...), 11, 12) + ck(pack(yield(1, 2))) + ck(pack(yield()), 13, 14, 15) + ck(pack(yield(3, 4, 5)), 16, 17) + assert(pcall(inpcall) == true) + return 8, 9 + end) + + ck(pack(resume(co, 11, 12)), true, 1, 2) + ck(pack(resume(co)), true) + ck(pack(resume(co, 13, 14, 15)), true, 3, 4, 5) + ck(pack(resume(co, 16, 17)), true, 6, 7) + ck(pack(resume(co, 18, 19)), true, 8, 9) + assert(resume(co) == false) +end + +test_yield(coroutine.resume, coroutine.yield) +test_yield(ctest.resume, coroutine.yield) +test_yield(coroutine.resume, ctest.yield) +test_yield(ctest.resume, ctest.yield) + diff --git a/testsuite/test/misc/catch_wrap.lua b/testsuite/test/misc/catch_wrap.lua new file mode 100644 index 0000000000..7f656bcc26 --- /dev/null +++ b/testsuite/test/misc/catch_wrap.lua @@ -0,0 +1,45 @@ + +local cp = require("cpptest") +cp.wrapon() + +do + local a, b = pcall(cp.catch, function() return "x" end) + assert(a == true and b == "x") +end + +do + local a, b = pcall(function() cp.throw("foo") end) + assert(a == false and b == "foo") +end + +local unwind +do + local a, b = pcall(cp.catch, function() cp.throw("foo") end) + unwind = a + assert((a == false and b == "foo") or (a == true and b == "catch ...")) +end + +do + local st = cp.alloc(function() return cp.isalloc() end) + assert(st == true) + assert(cp.isalloc() == false) +end + +do + local a, b = pcall(cp.alloc, function() + assert(cp.isalloc() == true) + return "foo", cp.throw + end) + assert(a == false and b == "foo") + assert(cp.isalloc() == false) +end + +do + local a, b = pcall(cp.alloc, function() + assert(cp.isalloc() == true) + return "foo", error + end) + assert(a == false and b == "foo") + if unwind then assert(cp.isalloc() == false) end +end + diff --git a/testsuite/test/misc/coro_traceback.lua b/testsuite/test/misc/coro_traceback.lua new file mode 100644 index 0000000000..2676d2c68e --- /dev/null +++ b/testsuite/test/misc/coro_traceback.lua @@ -0,0 +1,8 @@ + +local co = coroutine.create(function() + local x = nil + local y = x.x +end) +assert(coroutine.resume(co) == false) +debug.traceback(co) + diff --git a/testsuite/test/misc/coro_yield.lua b/testsuite/test/misc/coro_yield.lua new file mode 100644 index 0000000000..ae3206e05b --- /dev/null +++ b/testsuite/test/misc/coro_yield.lua @@ -0,0 +1,111 @@ +local create = coroutine.create +local wrap = coroutine.wrap +local resume = coroutine.resume +local yield = coroutine.yield + +-- Test stack overflow handling on return from coroutine. +do + wrap(function() + local co = create(function() + yield(string.byte(string.rep(" ", 100), 1, 100)) + end) + assert(select('#', resume(co)) == 101) + end)() +end + +do + wrap(function() + local f = wrap(function() + yield(string.byte(string.rep(" ", 100), 1, 100)) + end) + assert(select('#', f()) == 100) + end)() +end + +do + local function cogen(x) + return wrap(function(n) repeat x = x+n; n = yield(x) until false end), + wrap(function(n) repeat x = x*n; n = yield(x) until false end) + end + + local a,b=cogen(3) + local c,d=cogen(5) + assert(d(b(c(a(d(b(c(a(1)))))))) == 168428160) +end + +do + local function verify(what, expect, ...) + local got = {...} + for i=1,100 do + if expect[i] ~= got[i] then + error("FAIL " .. what) + end + if expect[i] == nil then + break + end + end + end + + local function cofunc(...) + verify("call", { 1, "foo" }, ...) + verify("yield", { "bar" }, yield(2, "test")) + verify("pcall yield", { true, "again" }, pcall(yield, "from pcall")) + return "end" + end + + local co = create(cofunc) + verify("resume", { true, 2, "test" }, resume(co, 1, "foo")) + verify("resume pcall", { true, "from pcall" }, resume(co, "bar")) + verify("resume end", { true, "end" }, resume(co, "again")) +end + +do + local function verify(expect, func, ...) + local co = create(func) + for i=1,100 do + local ok, res = resume(co, ...) + if not ok then + if expect[i] ~= nil then + error("too few results: ["..i.."] = "..tostring(expect[i]).." (got: "..tostring(res)..")") + end + break + end + if expect[i] ~= res then + error("bad result: ["..i.."] = "..tostring(res).." (should be: "..tostring(expect[i])..")") + end + end + end + + verify({ 42, 99 }, + function(x) pcall(yield, x) return 99 end, + 42) + + verify({ 42, 99 }, + function(x) pcall(function(y) yield(y) end, x) return 99 end, + 42) + + verify({ 42, 99 }, + function(x) xpcall(yield, debug.traceback, x) return 99 end, + 42) + + verify({ 45, 44, 43, 42, 99 }, + function(x, y) + for i in + function(o, k) + yield(o+k) + if k ~= 0 then return k-1 end + end,x,y do + end + return 99 + end, + 42, 3) + + verify({ 84, 99 }, + function(x) + local o = setmetatable({ x }, + {__add = function(a, b) yield(a[1]+b[1]) return 99 end }) + return o+o + end, + 42) +end + diff --git a/testsuite/test/misc/debug_gc.lua b/testsuite/test/misc/debug_gc.lua new file mode 100644 index 0000000000..30fb2b99b9 --- /dev/null +++ b/testsuite/test/misc/debug_gc.lua @@ -0,0 +1,47 @@ + +-- Do not run this test unless the JIT compiler is turned off. +if jit and jit.status and jit.status() then return end + +local caught, caught_line, caught_mm + +local function gcmeta() + if caught ~= "end" then +-- print(debug.traceback()) + -- This may point to the wrong instruction if in a JIT trace. + -- But there's no guarantee if, when or where any GC steps occur. + local dbg = debug.getinfo(2) + caught_line = dbg.currentline + caught_mm = debug.getinfo(1).name + caught = true + end +end + +local function testgc(mm, f) + collectgarbage() + caught = false + local u = newproxy(true) + getmetatable(u).__gc = gcmeta + u = nil + for i=1,100000 do + f(i) + -- This check may be hoisted. __gc is not supposed to have side-effects. + if caught then break end + end + if not caught then + error(mm.." metamethod not called", 2) + end + if type(caught_line) ~= "number" or caught_line < 0 then + error("bad linenumber in debug info", 2) + end + if caught_mm ~= mm then + error("bad name for metamethod in debug info", 2) + end +end + +local x +testgc("__gc", function(i) x = {} end) +testgc("__gc", function(i) x = {1} end) +testgc("__gc", function(i) x = function() end end) +testgc("__concat", function(i) x = i.."" end) + +caught = "end" diff --git a/testsuite/test/misc/dualnum.lua b/testsuite/test/misc/dualnum.lua new file mode 100644 index 0000000000..5f1288c8df --- /dev/null +++ b/testsuite/test/misc/dualnum.lua @@ -0,0 +1,47 @@ + +-- Positive overflow +do + local x = 0 + for i=2147483446,2147483647,2 do x = x + 1 end + assert(x == 101) +end + +-- Negative overflow +do + local x = 0 + for i=-2147483447,-2147483648,-2 do x = x + 1 end + assert(x == 101) +end + +-- SLOAD with number to integer conversion. +do + local k = 1 + local a, b, c = 1/k, 20/k, 1/k + for i=1,20 do + for j=a,b,c do end + end +end + +do + local function fmin(a, b) + for i=1,100 do a = math.min(a, b) end + return a + end + local function fmax(a, b) + for i=1,100 do a = math.max(a, b) end + return a + end + assert(fmin(1, 3) == 1) + assert(fmin(3, 1) == 1) + assert(fmin(-1, 3) == -1) + assert(fmin(3, -1) == -1) + assert(fmin(-1, -3) == -3) + assert(fmin(-3, -1) == -3) + assert(fmax(1, 3) == 3) + assert(fmax(3, 1) == 3) + assert(fmax(-1, 3) == 3) + assert(fmax(3, -1) == 3) + assert(fmax(-1, -3) == -1) + assert(fmax(-3, -1) == -1) +end + diff --git a/testsuite/test/misc/for_dir.lua b/testsuite/test/misc/for_dir.lua new file mode 100644 index 0000000000..4dd38dee0d --- /dev/null +++ b/testsuite/test/misc/for_dir.lua @@ -0,0 +1,13 @@ + +local a,b,c = 10,1,-1 +for i=1,20 do + if c == -1 then + a,b,c = 1,10,1 + else + a,b,c = 10,1,-1 + end + local x = 0 + for i=a,b,c do for j=1,10 do end x=x+1 end + assert(x == 10) +end + diff --git a/testsuite/test/misc/fori_coerce.lua b/testsuite/test/misc/fori_coerce.lua new file mode 100644 index 0000000000..7330943bd8 --- /dev/null +++ b/testsuite/test/misc/fori_coerce.lua @@ -0,0 +1,33 @@ + +do + local n = 1 + local x = 0 + for i=1,20 do + for j=n,100 do x = x + 1 end + if i == 13 then n = "2" end + end + assert(x == 1993) +end + +do + local n = 1 + local x = 0 + for i=1,20 do + for j=n,100 do x = x + 1 end + if i == 10 then n = "2" end + end + assert(x == 1990) +end + +do + local function f() + local n = 1 + local x = 0 + for i=1,20 do + for j=n,100 do x = x + 1 end + if i == 10 then n = "x" end + end + end + assert(not pcall(f)) +end + diff --git a/testsuite/test/misc/gc_rechain.lua b/testsuite/test/misc/gc_rechain.lua new file mode 100644 index 0000000000..285f408671 --- /dev/null +++ b/testsuite/test/misc/gc_rechain.lua @@ -0,0 +1,32 @@ + +do + local k + + collectgarbage() + + local t = {} + t.ac = 1 + + t.nn = 1 + t.mm = 1 + t.nn = nil + t.mm = nil + + k = "a".."i" + t[k] = 2 + + t.ad = 3 + + t[k] = nil + k = nil + + collectgarbage() + + k = "a".."f" + t[k] = 4 + + t.ak = 5 + + assert(t[k] == 4) +end + diff --git a/testsuite/test/misc/gc_trace.lua b/testsuite/test/misc/gc_trace.lua new file mode 100644 index 0000000000..bc38ce0ca3 --- /dev/null +++ b/testsuite/test/misc/gc_trace.lua @@ -0,0 +1,37 @@ + +if not jit or not jit.status or not jit.status() then return end + +collectgarbage() +for j=1,100 do + loadstring("for i=1,100 do end")() +end +local jutil = require("jit.util") +assert(jutil.traceinfo(90) == nil) +collectgarbage() +assert(jutil.traceinfo(1) == nil) +assert(jutil.traceinfo(2) == nil) +assert(jutil.traceinfo(3) == nil) + +do + local f + local function reccb(tr) + if f == nil then + collectgarbage() + local info = jutil.traceinfo(tr) + jutil.tracek(tr, -info.nk) + -- Error in lj_ir_kvalue() if KGC not marked. + -- Only caught with assertions or Valgrind. + end + end + jit.attach(reccb, "record") + for i=1,200 do + if i % 5 == 0 then + f = function() end + elseif f then + f() + f = nil + end + end + jit.attach(reccb) +end + diff --git a/testsuite/test/misc/gcstep.lua b/testsuite/test/misc/gcstep.lua new file mode 100644 index 0000000000..533356b76e --- /dev/null +++ b/testsuite/test/misc/gcstep.lua @@ -0,0 +1,33 @@ + +local function testgc(what, func) + collectgarbage() + local oc = gcinfo() + func() + local nc = gcinfo() + assert(nc < oc*4, "GC step missing for "..what) +end + +testgc("TNEW", function() + for i=1,10000 do + local t = {} + end +end) + +testgc("TDUP", function() + for i=1,10000 do + local t = {1} + end +end) + +testgc("FNEW", function() + for i=1,10000 do + local function f() end + end +end) + +testgc("CAT", function() + for i=1,10000 do + local s = "x"..i + end +end) + diff --git a/testsuite/test/misc/hook_active.lua b/testsuite/test/misc/hook_active.lua new file mode 100644 index 0000000000..37dfc37937 --- /dev/null +++ b/testsuite/test/misc/hook_active.lua @@ -0,0 +1,95 @@ +local ctest = require("ctest") + +local called = 0 +local function clearhook() debug.sethook(nil, "", 0) end + +-- Return from pcall with active hook must prepend true. FF pcall. +called = 0 +debug.sethook(function() called=called+1; assert(pcall(function() end) == true); clearhook() end, "", 1) +do local x = 1 end +assert(called == 1) + +-- Hook with special caught error must not unblock hooks. FF pcall. +called = 0 +debug.sethook(function() called=called+1; pcall(nil); clearhook() end, "", 1) +do local x = 1 end +assert(called == 1) + +-- Hook with caught error must not unblock hooks. FF pcall. +called = 0 +local function p2() error("") end +debug.sethook(function() called=called+1; pcall(p2); clearhook() end, "", 1) +do local x = 1 end +assert(called == 1) + +-- Hook with special caught error must not unblock hooks. C pcall. +called = 0 +debug.sethook(function() called=called+1; ctest.pcall(nil); clearhook() end, "", 1) +do local x = 1 end +assert(called == 1) + +-- Hook with caught error must not unblock hooks. C pcall +called = 0 +local function p2() error("") end +debug.sethook(function() called=called+1; ctest.pcall(p2); clearhook() end, "", 1) +do local x = 1 end +assert(called == 1) + +-- Regular pcall must not block hooks. +debug.sethook(function() called=called+1 end, "", 1) +pcall(function() end) +called = 0 +do local x = 1 end +assert(called > 0) +pcall(function() error("") end) +called = 0 +do local x = 1 end +assert(called > 0) +ctest.pcall(function() end) +called = 0 +do local x = 1 end +assert(called > 0) +ctest.pcall(function() error("") end) +called = 0 +do local x = 1 end +assert(called > 0) +clearhook() + +-- Hook with uncaught error must unblock hooks. FF pcall +called = 0 +pcall(function() + debug.sethook(function() + local old = called + called = 1 + if old == 0 then error("") end + end, "", 1) + do local x = 1 end +end) +assert(called == 1) +called = 2 +do local x = 1 end +assert(called == 1, "hook not unblocked after uncaught error") +clearhook() +called = 2 +do local x = 1 end +assert(called == 2) + +-- Hook with uncaught error must unblock hooks. C pcall +called = 0 +ctest.pcall(function() + debug.sethook(function() + local old = called + called = 1 + if old == 0 then error("") end + end, "", 1) + do local x = 1 end +end) +assert(called == 1) +called = 2 +do local x = 1 end +assert(called == 1, "hook not unblocked after uncaught error") +clearhook() +called = 2 +do local x = 1 end +assert(called == 2) + diff --git a/testsuite/test/misc/hook_line.lua b/testsuite/test/misc/hook_line.lua new file mode 100644 index 0000000000..36f710807a --- /dev/null +++ b/testsuite/test/misc/hook_line.lua @@ -0,0 +1,41 @@ +local lines = {} +local function hook() + lines[#lines+1] = debug.getinfo(2).currentline +end + +local function dummy() +end -- <-- line 7 + +debug.sethook(hook, "l", 0) +-- <-- line 10 +local x +dummy() +local y = 1 +dummy() dummy() +local z = 2; local r = true +while y < 4 do y = y + 1 end +while z < 4 do + z = z + 1 +end +-- <-- line 20 +local v +debug.sethook(nil, "", 0) + +assert(#lines > 0) +while lines[1] < 10 do table.remove(lines, 1) end +while lines[#lines] > 20 do table.remove(lines) end + +local s = table.concat(lines, " ") +assert(s == "11 12 7 13 14 7 7 15 16 16 16 16 17 18 17 18 17" or + s == "11 12 7 13 14 7 14 7 15 16 16 16 16 17 18 17 18 17") + +lines = {} +local function f() + if true then return end + local function x() end +end -- <-- line 36 +debug.sethook(hook, "l", 0) +f() +debug.sethook(nil, "", 0) +for i=1,#lines do assert(lines[i] ~= 36) end + diff --git a/testsuite/test/misc/hook_norecord.lua b/testsuite/test/misc/hook_norecord.lua new file mode 100644 index 0000000000..8e7cba05ea --- /dev/null +++ b/testsuite/test/misc/hook_norecord.lua @@ -0,0 +1,12 @@ + +if not jit or not jit.status or not jit.status() then return end + +local called = false +local function f() local x = "wrong"; called = true end +jit.off(f) +debug.sethook(f, "", 5) +for i=1,1000 do local a,b,c,d,e,f=1,2,3,4,5,6 end +assert(called) +-- Check that no trace was generated. +assert(require("jit.util").traceinfo(1) == nil) + diff --git a/testsuite/test/misc/hook_record.lua b/testsuite/test/misc/hook_record.lua new file mode 100644 index 0000000000..6f1646dead --- /dev/null +++ b/testsuite/test/misc/hook_record.lua @@ -0,0 +1,8 @@ + +if not jit or not jit.status or not jit.status() then return end + +debug.sethook(function() for i=1,100 do end end, "", 10) +for i=1,10 do end +debug.sethook() +assert((require("jit.util").traceinfo(1))) + diff --git a/testsuite/test/misc/hook_top.lua b/testsuite/test/misc/hook_top.lua new file mode 100644 index 0000000000..f809fcea64 --- /dev/null +++ b/testsuite/test/misc/hook_top.lua @@ -0,0 +1,55 @@ + +local t = {} +for i=1,26 do t[i] = string.char(96+i) end + +local function tcheck(t1, t2) + assert(#t1 == #t2) + for i=1,#t1 do assert(t1[i] == t2[i]) end +end + +local function foo1(...) -- VARG RETM + return ... +end + +local function foo2(...) -- VARG UCLO RETM + local function dummy() end + return ... +end + +local function foo3(...) -- VARG UCLO -> RETM + do return ... end + local function dummy() end +end + +local function foo4() -- UCLO UCLO RET + do + local x + local function dummy() return x end + end +end + +called = false +debug.sethook(function() local x = "wrong"; called = true end, "", 1) +tcheck(t, {foo1(unpack(t))}) -- CALLM TSETM +assert(called) +called = false +tcheck(t, {foo2(unpack(t))}) +assert(called) +called = false +tcheck(t, {foo2(unpack(t))}) +assert(called) +called = false +foo4() +assert(called) + +debug.sethook(function() + local name, val = debug.getlocal(2, 1) + assert(name == "a" and val == nil) + debug.setlocal(2, 1, "bar") + debug.sethook(nil) +end, "c") +local function foo5(a) + assert(a == "bar") +end +foo5() + diff --git a/testsuite/test/misc/jit_flush.lua b/testsuite/test/misc/jit_flush.lua new file mode 100644 index 0000000000..ead1e4e991 --- /dev/null +++ b/testsuite/test/misc/jit_flush.lua @@ -0,0 +1,50 @@ + +if not jit or not jit.status or not jit.status() then return end + +for i=1,100 do + if i==50 then jit.flush(2) end + for j=1,100 do end + for j=1,100 do end +end + +jit.flush() + +local function f() for i=1,100 do end end +for i=1,100 do local x = gcinfo(); f() end + +jit.flush() + +local function fib(n) + if n < 2 then return 1 end + return fib(n-2) + fib(n-1) +end + +fib(11) + +jit.flush() + +local names = {} +for i=1,100 do names[i] = i end + +function f() + for k,v in ipairs(names) do end +end + +f() + +for i=1,2 do + f() + f() + jit.flush() +end + +jit.flush() + +jit.flush(1) -- ignored +jit.flush(2) -- ignored +for i=1,1e7 do end -- causes trace #1 + +jit.flush(2) -- ignored +jit.flush(1) -- ok +jit.flush(1) -- crashes + diff --git a/testsuite/test/misc/lightud.lua b/testsuite/test/misc/lightud.lua new file mode 100644 index 0000000000..4974d50fcb --- /dev/null +++ b/testsuite/test/misc/lightud.lua @@ -0,0 +1,88 @@ +local ctest = require("ctest") + +local lightud = ctest.lightud +local assert = assert + +-- x64 lightud tests +if jit and jit.arch == "x64" then + do + local ud1 = lightud(0x12345678) + local ud2 = lightud(0x12345678) + assert(ud1 == ud2) + assert(tostring(ud1) == "userdata: 0x12345678") + end + do + local ud1 = lightud(1) + local ud2 = lightud(2) + assert(ud1 ~= ud2) + end + do + local ud1 = lightud(2^47-1) + local ud2 = lightud(2^47-1) + assert(ud1 == ud2) + assert(tostring(ud1) == "userdata: 0x7fffffffffff") + end + do + local ud1 = lightud(0x12345678+123*2^32) + local ud2 = lightud(0x12345678+456*2^32) + for i=1,100 do assert(ud1 ~= ud2) end + end + assert(tostring(lightud(0x5abc*2^32 + 0xdef01234)) == "userdata: 0x5abcdef01234") + assert(pcall(lightud, 2^47) == false) + assert(pcall(lightud, 2^64-2048) == false) +end + +assert(getmetatable(lightud(1)) == nil) + +-- lightuserdata SLOAD value and HREF key +do + local ud = lightud(12345) + local t = {[ud] = 42} + for i=1,100 do + assert(t[ud] == 42) + end +end + +-- lightuserdata NEWREF key +do + local ud = lightud(12345) + for i=1,100 do + local t = {[ud] = 42} + assert(t[ud] == 42) + end +end + +-- lightuserdata ASTORE/HSTORE value +do + local ud = lightud(12345) + local t = {} + for i=1,100 do + t[i] = ud + end + assert(t[100] == ud) +end + +-- lightuserdata sync to stack +do + local ud = lightud(12345) + local x = nil + for j=1,20 do + for i=1,50 do + x = ud + end + assert(x == ud) + end +end + +-- lightuserdata vs. number type check +do + local t = {} + for i=1,200 do t[i] = i end + t[180] = lightud(12345) + local x = 0 + assert(not pcall(function(t) + for i=1,200 do x = x + t[i] end + end, t)) + assert(x == 16110) +end + diff --git a/testsuite/test/misc/loop_unroll.lua b/testsuite/test/misc/loop_unroll.lua new file mode 100644 index 0000000000..1700fac910 --- /dev/null +++ b/testsuite/test/misc/loop_unroll.lua @@ -0,0 +1,35 @@ + +-- type instability on loop unroll -> record unroll +do + local flip = true + for i=1,100 do flip = not flip end + assert(flip == true) +end + +do + local t = {} + local a, b, c = 1, "", t + for i=1,100 do a,b,c=b,c,a end + assert(c == 1 and a == "" and b == t) +end + +-- FAILFOLD on loop unroll -> LJ_TRERR_GFAIL -> record unroll +do + local t = { 1, 2 } + local k = 2 + local x = 0 + for i=1,200 do + x = x + t[k] + k = k == 1 and 2 or 1 + end + assert(x == 300 and k == 2) +end + +-- Unroll if inner loop aborts. +local j = 0 +for i = 1,100 do + repeat + j = j+1 + until true +end + diff --git a/testsuite/test/misc/parse_comp.lua b/testsuite/test/misc/parse_comp.lua new file mode 100644 index 0000000000..5e1948da80 --- /dev/null +++ b/testsuite/test/misc/parse_comp.lua @@ -0,0 +1,13 @@ + +do + local f = {{n=5}} + local a = f[1].n + assert(1 < a) + assert(1 < (f[1].n)) + assert(1 < f[1].n) +end + +do + tt = { a = 1 } + assert(not(0 >= tt.a)) +end diff --git a/testsuite/test/misc/parse_esc.lua b/testsuite/test/misc/parse_esc.lua new file mode 100644 index 0000000000..4bcce0e864 --- /dev/null +++ b/testsuite/test/misc/parse_esc.lua @@ -0,0 +1,7 @@ +assert("\79\126" == "O~") +assert("\x4f\x7e" == "O~") +assert(loadstring[[return "\xxx"]] == nil) +assert(loadstring[[return "\xxx"]] == nil) +assert(assert(loadstring[[return "abc \z + + def"]])() == "abc def") diff --git a/testsuite/test/misc/parse_misc.lua b/testsuite/test/misc/parse_misc.lua new file mode 100644 index 0000000000..8031ec171f --- /dev/null +++ b/testsuite/test/misc/parse_misc.lua @@ -0,0 +1,31 @@ + +-- Ambiguous syntax: function call vs. new statement. +if os.getenv("LUA52") then + assert(assert(loadstring([[ +local function f() return 99 end +return f +() +]]))() == 99) +else + assert(loadstring([[ +local function f() return 99 end +return f +() +]]) == nil) +end + +-- UTF-8 identifiers. +assert(loadstring([[ +local ä = 1 +local aäa = 2 +local äöü·€晶 = 3 + +assert(ä == 1) +assert(aäa == 2) +assert(äöü·€晶 == 3) + +assert(#"ä" == 2) +assert(#"aäa" == 4) +assert(#"äöü·€晶" == 14) +]]))() + diff --git a/testsuite/test/misc/phi_conv.lua b/testsuite/test/misc/phi_conv.lua new file mode 100644 index 0000000000..8d7bea5fdc --- /dev/null +++ b/testsuite/test/misc/phi_conv.lua @@ -0,0 +1,53 @@ + +local bit = require("bit") + +local Rm = {} +for i=0,16 do Rm[i] = 0 end + +for k=1,10 do + local seed = 1 + for i=16,0,-1 do + seed = bit.band(seed*9069, 0x7fffffff) + Rm[i] = seed + end + assert(seed == 1952688301) +end + +local retindex = 0 +local retdata = { 3, 1, 1, 1, 0, 3, 1, 0, 0, 2, 0, 2, 0, 0, 3, 1, 1, 1, 1 } + +local function get_bits() + retindex = retindex + 1 + return retdata[retindex] +end + +local hufcodes = { [0] = true, [4] = true, [11] = true, [36] = true, [68] = true } + +local maskhuf = { 0x0002, 0x0003, 0x0004, 0x0005, } + +local function decodeCode() + local lookup = get_bits() + local code = hufcodes[lookup] + local z = {1,1,1,1} + if not code then + for i = 1, 4 do + lookup = bit.bor(lookup, bit.lshift(get_bits(), i + 1)) + -- need PHI for CONV num.int of lookup, used in snapshot + code = hufcodes[lookup + maskhuf[i]] + if code then break end + end + end + assert(code) + return code +end + +local function test() + for i = 1, 6 do + decodeCode() + end +end + +if jit and jit.status and jit.status() then jit.opt.start("hotloop=1") end + +test() + diff --git a/testsuite/test/misc/recurse_deep.lua b/testsuite/test/misc/recurse_deep.lua new file mode 100644 index 0000000000..9b9af2952f --- /dev/null +++ b/testsuite/test/misc/recurse_deep.lua @@ -0,0 +1,29 @@ + +do + local function sum(n) + if n == 1 then return 1 end + return n + sum(n-1) + end + assert(sum(200) == 20100) +end + +do + local pcall = pcall + local tr1 + local x = 0 + function tr1(n) + if n <= 0 then return end + x = x + 1 + return pcall(tr1, n-1) + end + assert(tr1(200) == true and x == 200) +end + +do + local function fib(n) + if n < 2 then return 1 end + return fib(n-2) + fib(n-1) + end + assert(fib(15) == 987) +end + diff --git a/testsuite/test/misc/recurse_tail.lua b/testsuite/test/misc/recurse_tail.lua new file mode 100644 index 0000000000..ef76443211 --- /dev/null +++ b/testsuite/test/misc/recurse_tail.lua @@ -0,0 +1,22 @@ + +do + local tr1 + function tr1(n) + if n <= 0 then return 0 end + return tr1(n-1) + end + assert(tr1(200) == 0) +end + +do + local tr1, tr2 + function tr1(n) + if n <= 0 then return 0 end + return tr2(n-1) + end + function tr2(n) + return tr1(n) + end + assert(tr2(200) == 0) +end + diff --git a/testsuite/test/misc/stack_gc.lua b/testsuite/test/misc/stack_gc.lua new file mode 100644 index 0000000000..656a06a054 --- /dev/null +++ b/testsuite/test/misc/stack_gc.lua @@ -0,0 +1,15 @@ + +do + local t = setmetatable({}, { __index=function(t, k) + k = k - 1 + if k == 0 then + collectgarbage() -- Mark stack, including holes. + return 0 + else + return t[k] -- Leaves holes in each frame. + end + do local a,b,c,d,e,f,g,h,i,j,k,l,m,n end -- Ensure bigger frame size. + end}) + local x = t[50] +end + diff --git a/testsuite/test/misc/stack_purge.lua b/testsuite/test/misc/stack_purge.lua new file mode 100644 index 0000000000..bfaee0f3e9 --- /dev/null +++ b/testsuite/test/misc/stack_purge.lua @@ -0,0 +1,25 @@ + +-- Must preserve the modified function slot in the RET snapshot. +local function a() + local _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_ + local _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_ + local _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_ + return 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +end + +local function b() + return a() +end + +local function c() + for j=1,10 do + for i=1,50 do b() b() b() end + collectgarbage() + local t = {} + for i=1,50 do t = {t} end + end +end + +jit.off(c) +c() + diff --git a/testsuite/test/misc/stackov.lua b/testsuite/test/misc/stackov.lua new file mode 100644 index 0000000000..ef105af6f9 --- /dev/null +++ b/testsuite/test/misc/stackov.lua @@ -0,0 +1,40 @@ + +local function f() + f() +end + +local err, s = xpcall(f, debug.traceback) +assert(err == false) + +local first = string.match(s, "[^\n]+") +local line = debug.getinfo(f, "S").linedefined+1 +assert(string.match(first, ":"..line..": stack overflow$")) + +local n = 1 +for _ in string.gmatch(s, "\n") do n = n + 1 end +assert(n == 1+1+11+1+10) + +local function g(i) + g(i) +end + +local err, s = xpcall(g, debug.traceback, 1) +assert(err == false) + +--[[ +-- too slow +local function vtail(...) + return vtail(1, ...) +end + +local err, s = xpcall(vtail, debug.traceback, 1) +assert(err == false) +--]] + +local function vcall(...) + vcall(1, ...) +end + +local err, s = xpcall(vcall, debug.traceback, 1) +assert(err == false) + diff --git a/testsuite/test/misc/stackovc.lua b/testsuite/test/misc/stackovc.lua new file mode 100644 index 0000000000..c00bcbd825 --- /dev/null +++ b/testsuite/test/misc/stackovc.lua @@ -0,0 +1,4 @@ +local j = 1e4 +local co = coroutine.create(function() t = {} for i = 1, j do t[i] = i end return unpack(t) end) +local ok, err = coroutine.resume(co) +assert(not ok and string.find(err, "unpack")) diff --git a/testsuite/test/misc/tcall_base.lua b/testsuite/test/misc/tcall_base.lua new file mode 100644 index 0000000000..c6c4ae1a31 --- /dev/null +++ b/testsuite/test/misc/tcall_base.lua @@ -0,0 +1,20 @@ + +local r = 0 +local function g() + r = r + 1 + for i=1,100 do end +end + +local function f() + for j=1,20 do + if j > 19 then + return g() -- Tailcall at base. + -- Let this link to the already compiled loop in g(). + end + end +end + +g() -- Compile this loop first. +for i=1,50 do f() end +assert(r == 51) + diff --git a/testsuite/test/misc/tcall_loop.lua b/testsuite/test/misc/tcall_loop.lua new file mode 100644 index 0000000000..d3c6f1a6de --- /dev/null +++ b/testsuite/test/misc/tcall_loop.lua @@ -0,0 +1,8 @@ +local function f(i) + if i > 0 then return f(i-1) end + return 1 +end + +local x = 0 +for i=1,100 do x = x + f(1000) end +assert(x == 100) diff --git a/testsuite/test/misc/tonumber_scan.lua b/testsuite/test/misc/tonumber_scan.lua new file mode 100644 index 0000000000..78e1ca3ee5 --- /dev/null +++ b/testsuite/test/misc/tonumber_scan.lua @@ -0,0 +1,180 @@ +local ffi = require("ffi") +local bit = require("bit") + +ffi.cdef[[ +double strtod(const char *, char **); +]] + +local t = { + -- errors + false, "", + false, " ", + false, "+", + false, "z", + false, ".", + false, ".z", + false, "0.z", + false, ".0z", + false, "0xz", + false, "0x.z", + false, "0x0.z", + false, "0x.0z", + false, ".e5", + false, ".p4", + false, "1.p4", + false, "1.p+4", + false, "0x1.e+4", + false, "infi", + false, "+ 1", + false, "- 9", + -- misc + 0x3ff0000000000000ULL, " \t\n\v\f\r 1", + -- inf/nan + 0x7ff0000000000000ULL, "iNF", + 0xfff0000000000000ULL, "-Inf", + 0x7ff0000000000000ULL, "+iNfInItY", + 0xfff0000000000000ULL, "-INFINITY", + 0xfff8000000000000ULL, "naN", + 0xfff8000000000000ULL, "+NaN", + 0xfff8000000000000ULL, "-nAn", + -- smallest/largest numbers + 0x0000000000000000ULL, "0e1000", + 0x0000000000000000ULL, "0e-1000", + 0x0000000000000000ULL, "0x0p2000", + 0x0000000000000000ULL, "0x0p-2000", + 0x7ff0000000000000ULL, "1e1000", + 0x0000000000000000ULL, "1e-1000", + 0xfff0000000000000ULL, "-1e1000", +-- wrong for DUALNUM: 0x8000000000000000ULL, "-1e-1000", + 0x7ff0000000000000ULL, "0x1p2000", + 0x0000000000000000ULL, "0x1p-2000", + 0xfff0000000000000ULL, "-0x1p2000", +-- wrong for DUALNUM: 0x8000000000000000ULL, "-0x1p-2000", + 0x0010000000000000ULL, "2.2250738585072014e-308", + 0x7fefffffffffffffULL, "1.7976931348623158e+308", + 0x8000b8157268fdafULL, "-1e-309", + 0x000ac941b426dd3bULL, "1.5e-308", + 0x000ac941b426dd3bULL, "0x0.ac941b426dd3b7p-1022", + 0x0000000000000001ULL, "4.9406564584124654e-324", + 0x000f9c7573d7fe52ULL, "2.171e-308", + 0x241d21ecf36d4a22ULL, "1.0020284025808569e-134", + 0x0000000000000001ULL, "0x1p-1074", + 0x0000000000000000ULL, "0x1p-1075", + 0x0000000000000000ULL, "0x1p-1076", + 0x0000000000000000ULL, "0x0.ffffffffffffffffffffffffffp-1075", + 0x0000000000000000ULL, "0x1.00000000000000000000000000p-1075", + 0x0000000000000001ULL, "0x1.00000000000000000000000001p-1075", + 0x7fe0000000000000ULL, "0x1p1023", + 0x7ff0000000000000ULL, "0x1p1024", + 0x7ff0000000000000ULL, "0x1p1025", + 0x7ff0000000000000ULL, "0x3p1023", + 0x7ff0000000000000ULL, "0x3.ffffffffffffecp1023", + 0xfff0000000000000ULL, "-0xf7dcba98765432p969", + 0x7fefffffffffffffULL, "0x1.fffffffffffff0000000000000p1023", + 0x7fefffffffffffffULL, "0x1.fffffffffffff0000000000001p1023", + 0x7fefffffffffffffULL, "0x1.fffffffffffff7ffffffffffffp1023", + 0x7ff0000000000000ULL, "0x1.fffffffffffff8000000000000p1023", + 0x7fefffffffffffffULL, "179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0", + 0x7fefffffffffffffULL, "179769313486231580793728971405303415079934132710037826936173778980444968292764750946649017977587207096330286416692887910946555547851940402630657488671505820681908902000708383676273854845817711531764475730270069855571366959622842914819860834936475292719074168444365510704342711559699508093042880177904174497791.999", + 0x7ff0000000000000ULL, "179769313486231580793728971405303415079934132710037826936173778980444968292764750946649017977587207096330286416692887910946555547851940402630657488671505820681908902000708383676273854845817711531764475730270069855571366959622842914819860834936475292719074168444365510704342711559699508093042880177904174497792.0", + 0x3ff0000000000000ULL, "0x100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000p-1028", + 0x1214e2995454ee0bULL, "0."..string.rep("0", 220).."1"..string.rep("4", 800), + -- http://www.exploringbinary.com/15-digit-quick-and-dirty-conversions-dont-round-trip/ + 0x04409cf3929ffbc3ULL, "3.409452297963e-288", + 0x7fe02b4782a6c378ULL, "9.08344e+307", + 0x6e05e258a3929ee5ULL, "9.88819e+221", + -- http://www.exploringbinary.com/incorrectly-rounded-conversions-in-gcc-and-glibc/ + 0x3fe0000000000002ULL, "0.500000000000000166533453693773481063544750213623046875", + 0x42c0000000000002ULL, "3.518437208883201171875e13", + 0x404f44abd5aa7ca4ULL, "62.5364939768271845828", + 0x3e0bd5cbaef0fd0cULL, "8.10109172351e-10", + 0x3ff8000000000000ULL, "1.50000000000000011102230246251565404236316680908203125", + 0x433fffffffffffffULL, "9007199254740991.4999999999999999999999999999999995", + 0x7ecd2e77eb6e3fadULL, "6.253649397682718e+302", + 0x7ecd2e77eb6e3fadULL, "6.2536493976827180e+302", + -- http://www.exploringbinary.com/incorrectly-rounded-conversions-in-visual-c-plus-plus/ + 0x43405e6cec57761aULL, "9214843084008499", + 0x3fe0000000000002ULL, "0.500000000000000166533453693773481063544750213623046875", + 0x44997a3c7271b021ULL, "30078505129381147446200", + 0x4458180d5bad2e3eULL, "1777820000000000000001", + 0x3fe0000000000002ULL, "0.500000000000000166547006220929549868969843373633921146392822265625", + 0x3fe0000000000002ULL, "0.50000000000000016656055874808561867439493653364479541778564453125", + 0x3fd92bb352c4623aULL, "0.3932922657273", + -- http://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/ + 0x0010000000000000ULL, "2.2250738585072012e-308", + -- http://www.exploringbinary.com/incorrectly-rounded-subnormal-conversions-in-java/ + 0x0000000008000000ULL, "6.631236871469758276785396630275967243399099947355303144249971758736286630139265439618068200788048744105960420552601852889715006376325666595539603330361800519107591783233358492337208057849499360899425128640718856616503093444922854759159988160304439909868291973931426625698663157749836252274523485312442358651207051292453083278116143932569727918709786004497872322193856150225415211997283078496319412124640111777216148110752815101775295719811974338451936095907419622417538473679495148632480391435931767981122396703443803335529756003353209830071832230689201383015598792184172909927924176339315507402234836120730914783168400715462440053817592702766213559042115986763819482654128770595766806872783349146967171293949598850675682115696218943412532098591327667236328125e-316", + 0x0000000000010000ULL, "3.237883913302901289588352412501532174863037669423108059901297049552301970670676565786835742587799557860615776559838283435514391084153169252689190564396459577394618038928365305143463955100356696665629202017331344031730044369360205258345803431471660032699580731300954848363975548690010751530018881758184174569652173110473696022749934638425380623369774736560008997404060967498028389191878963968575439222206416981462690113342524002724385941651051293552601421155333430225237291523843322331326138431477823591142408800030775170625915670728657003151953664260769822494937951845801530895238439819708403389937873241463484205608000027270531106827387907791444918534771598750162812548862768493201518991668028251730299953143924168545708663913273994694463908672332763671875e-319", + 0x0000800000000100ULL, "6.953355807847677105972805215521891690222119817145950754416205607980030131549636688806115726399441880065386399864028691275539539414652831584795668560082999889551357784961446896042113198284213107935110217162654939802416034676213829409720583759540476786936413816541621287843248433202369209916612249676005573022703244799714622116542188837770376022371172079559125853382801396219552418839469770514904192657627060319372847562301074140442660237844114174497210955449896389180395827191602886654488182452409583981389442783377001505462015745017848754574668342161759496661766020028752888783387074850773192997102997936619876226688096314989645766000479009083731736585750335262099860150896718774401964796827166283225641992040747894382698751809812609536720628966577351093292236328125e-310", + 0x0000000000010800ULL, "3.339068557571188581835713701280943911923401916998521771655656997328440314559615318168849149074662609099998113009465566426808170378434065722991659642619467706034884424989741080790766778456332168200464651593995817371782125010668346652995912233993254584461125868481633343674905074271064409763090708017856584019776878812425312008812326260363035474811532236853359905334625575404216060622858633280744301892470300555678734689978476870369853549413277156622170245846166991655321535529623870646888786637528995592800436177901746286272273374471701452991433047257863864601424252024791567368195056077320885329384322332391564645264143400798619665040608077549162173963649264049738362290606875883456826586710961041737908872035803481241600376705491726170293986797332763671875e-319", + -- EGLIBC 2.16 tests + 0x4028b0a3d70a3d71ULL, "12.345", + 0x441ac4da03bc47e4ULL, "12.345e19", + 0xc197d78400000000ULL, "-.1e+9", + 0x3fc0000000000000ULL, ".125", + 0x4415af1d78b58c40ULL, "1e20", + 0x0000000000000000ULL, "0e-19", + 0x3051144f2d9a718bULL, "5.9e-76", + 0x4024000000000000ULL, "0x1.4p+3", + 0x4024000000000000ULL, "0xAp0", + 0x4024000000000000ULL, "0x0Ap0", + 0x4024000000000000ULL, "0x0A", + 0x4064000000000000ULL, "0xA0", + 0x4064000000000000ULL, "0x0.A0p8", + 0x4064000000000000ULL, "0x0.50p9", + 0x4064000000000000ULL, "0x0.28p10", + 0x4064000000000000ULL, "0x0.14p11", + 0x4064000000000000ULL, "0x0.0A0p12", + 0x4064000000000000ULL, "0x0.050p13", + 0x4064000000000000ULL, "0x0.028p14", + 0x4064000000000000ULL, "0x0.014p15", + 0x4064000000000000ULL, "0x00.00A0p16", + 0x4064000000000000ULL, "0x00.0050p17", + 0x4064000000000000ULL, "0x00.0028p18", + 0x4064000000000000ULL, "0x00.0014p19", + 0x0008000000000000ULL, "0x1p-1023", + 0x0008000000000000ULL, "0x0.8p-1022", + 0x3ff0000140000000ULL, "0x80000Ap-23", + 0x0000000000000000ULL, "1e-324", + 0x4370000000000000ULL, "0x100000000000008p0", + 0x4370000000000000ULL, "0x100000000000008.p0", + 0x4370000000000000ULL, "0x100000000000008.00p0", + 0x43f0000000000000ULL, "0x10000000000000800p0", + 0x43f0000000000001ULL, "0x10000000000000801p0", + -- Fuzzing + 0x699783fbf2d24ea5ULL, "449999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999.9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999", + 0x43f158e460913d00ULL, "2e19", +} + +local function tohex64(x) + return "0x"..bit.tohex(tonumber(x/2LL^32))..bit.tohex(tonumber(x%2LL^32)).."ULL" +end + +local conv = tonumber + +if arg and arg[1] == "strtod" then + local e = ffi.new("char *[1]") + function conv(s) + local d = ffi.C.strtod(s, e) + return (e[0][0] == 0 and #s ~= 0) and d or nil + end +end + +local u = ffi.new("union { double d; uint64_t x; }") + +for i=1,#t,2 do + local y, s = t[i], t[i+1] + local d = conv(s) + local ok + if d == nil then + ok = (y == false) + else + u.d = d + ok = (y == u.x) + end + if not ok then + io.write('FAIL: "', s, '"\n GOT: ', d and tohex64(u.x) or "nil", " OK: ", y and tohex64(y) or "nil", "\n\n") +-- print(" "..tohex64(u.x)..", \""..s.."\",") + end +end + diff --git a/testsuite/test/misc/uclo.lua b/testsuite/test/misc/uclo.lua new file mode 100644 index 0000000000..bd9bd24299 --- /dev/null +++ b/testsuite/test/misc/uclo.lua @@ -0,0 +1,91 @@ + +local function test_for() + local z1, z2 + for i=1,10 do + local function f() return i end + if z1 then z2 = f else z1 = f end + end + assert(z1() == 1) + assert(z2() == 10) +end + +local function test_while() + local z1, z2 + local i = 1 + while i <= 10 do + local j = i + local function f() return j end + if z1 then z2 = f else z1 = f end + i = i + 1 + end + assert(z1() == 1) + assert(z2() == 10) +end + +local function test_repeat() + local z1, z2 + local i = 1 + repeat + local j = i + local function f() return j end + if z1 then z2 = f else z1 = f end + i = i + 1 + until i > 10 + assert(z1() == 1) + assert(z2() == 10) +end + +local function test_func() + local function ff(x) + return function() return x end + end + local z1, z2 + for i=1,10 do + local f = ff(i) + if z1 then z2 = f else z1 = f end + end + assert(z1() == 1) + assert(z2() == 10) +end + +test_for() +test_while() +test_repeat() +test_func() + +do + local function f1(a) + if a > 0 then + local b = f1(a - 1) + return function() + if type(b) == "function" then + return a + b() + end + return a + b + end + end + return a + end + + local function f2(a) + return f1(a)() + end + + for i = 1, 41 do + local r = f2(4) + f2(4) + end +end + +-- Don't mark upvalue as immutable if written to after prototype definition. +do + local x = 1 + local function f() + local y = 0 + for i=1,100 do y=y+x end + return y + end + assert(f() == 100) + x = 2 + assert(f() == 200) +end + diff --git a/testsuite/test/misc/unordered_jit.lua b/testsuite/test/misc/unordered_jit.lua new file mode 100644 index 0000000000..5ff1a1bafd --- /dev/null +++ b/testsuite/test/misc/unordered_jit.lua @@ -0,0 +1,96 @@ + +local nan = 0/0 +local t = {} +for i=1,100 do t[i] = i+0.5 end +for i=101,200 do t[i] = nan end + +do + local z = 0 + for i=1,200 do if t[i] > 1000 then z=i end end + assert(z == 0) +end + +do + local z = 0 + for i=1,200 do if not (t[i] < 1000) then z=i end end + assert(z == 200) +end + +do + local z = 0 + for i=1,200 do if t[i] <= 1000 then z=i end end + assert(z == 100) +end + +do + local z = 0 + for i=1,200 do if not (t[i] >= 1000) then z=i end end + assert(z == 200) +end + +do + local z = 0 + for i=1,200 do if t[i] > 0 then z=i end end + assert(z == 100) +end + +do + local z = 0 + for i=1,200 do if not (t[i] < 0) then z=i end end + assert(z == 200) +end + +do + local z = 0 + for i=1,200 do if t[i] <= 0 then z=i end end + assert(z == 0) +end + +do + local z = 0 + for i=1,200 do if not (t[i] >= 0) then z=i end end + assert(z == 200) +end + +do local z; for i=1,100 do z = 0/0 end; assert(z ~= z) end + +do local z; for i=1,100 do z = nan == nan end; assert(z == false) end +do local z; for i=1,100 do z = nan == 1 end; assert(z == false) end +do local z; for i=1,100 do z = 1 == nan end; assert(z == false) end + +do local z; for i=1,100 do z = nan ~= nan end; assert(z == true) end +do local z; for i=1,100 do z = nan ~= 1 end; assert(z == true) end +do local z; for i=1,100 do z = 1 ~= nan end; assert(z == true) end + +do local z; for i=1,100 do z = nan < nan end; assert(z == false) end +do local z; for i=1,100 do z = nan < 1 end; assert(z == false) end +do local z; for i=1,100 do z = 1 < nan end; assert(z == false) end + +do local z; for i=1,100 do z = not (nan < nan) end; assert(z == true) end +do local z; for i=1,100 do z = not (nan < 1) end; assert(z == true) end +do local z; for i=1,100 do z = not (1 < nan) end; assert(z == true) end + +do local z; for i=1,100 do z = nan > nan end; assert(z == false) end +do local z; for i=1,100 do z = nan > 1 end; assert(z == false) end +do local z; for i=1,100 do z = 1 > nan end; assert(z == false) end + +do local z; for i=1,100 do z = not (nan > nan) end; assert(z == true) end +do local z; for i=1,100 do z = not (nan > 1) end; assert(z == true) end +do local z; for i=1,100 do z = not (1 > nan) end; assert(z == true) end + +do local z; for i=1,100 do z = nan <= nan end; assert(z == false) end +do local z; for i=1,100 do z = nan <= 1 end; assert(z == false) end +do local z; for i=1,100 do z = 1 <= nan end; assert(z == false) end + +do local z; for i=1,100 do z = not (nan <= nan) end; assert(z == true) end +do local z; for i=1,100 do z = not (nan <= 1) end; assert(z == true) end +do local z; for i=1,100 do z = not (1 <= nan) end; assert(z == true) end + +do local z; for i=1,100 do z = nan >= nan end; assert(z == false) end +do local z; for i=1,100 do z = nan >= 1 end; assert(z == false) end +do local z; for i=1,100 do z = 1 >= nan end; assert(z == false) end + +do local z; for i=1,100 do z = not (nan >= nan) end; assert(z == true) end +do local z; for i=1,100 do z = not (nan >= 1) end; assert(z == true) end +do local z; for i=1,100 do z = not (1 >= nan) end; assert(z == true) end + diff --git a/testsuite/test/misc/wbarrier.lua b/testsuite/test/misc/wbarrier.lua new file mode 100644 index 0000000000..5536625a36 --- /dev/null +++ b/testsuite/test/misc/wbarrier.lua @@ -0,0 +1,7 @@ +local t={} +for i=1,20000 do + t[i] = tostring(i) +end +for i=1,#t do + assert(t[i] == tostring(i)) +end diff --git a/testsuite/test/misc/wbarrier_jit.lua b/testsuite/test/misc/wbarrier_jit.lua new file mode 100644 index 0000000000..2c8dd7fbcb --- /dev/null +++ b/testsuite/test/misc/wbarrier_jit.lua @@ -0,0 +1,18 @@ + +do + local t = {[0]={}} + for i=1,1e5 do t[i] = {t[i-1]} end + for i=1,1e5 do assert(t[i][1] == t[i-1]) end +end + +do + local f + do + local x = 0 + function f() + for i=1,1e5 do x = {i} end + end + end + f() +end + diff --git a/testsuite/test/misc/wbarrier_obar.lua b/testsuite/test/misc/wbarrier_obar.lua new file mode 100644 index 0000000000..258db2158e --- /dev/null +++ b/testsuite/test/misc/wbarrier_obar.lua @@ -0,0 +1,22 @@ +-- DSE of USTORE must eliminate OBAR, too. + +if jit and jit.opt then pcall(jit.opt.start, "-sink") end + +local f +do + local x + f = function() + local y = 0 + for i=1,10000 do + x = {1} + if y > 0 then end + x = 1 + end + end +end + +collectgarbage() +collectgarbage("setstepmul", 1) +collectgarbage("restart") +f() + diff --git a/testsuite/test/opt/dse/array.lua b/testsuite/test/opt/dse/array.lua new file mode 100644 index 0000000000..8c766248a3 --- /dev/null +++ b/testsuite/test/opt/dse/array.lua @@ -0,0 +1,197 @@ +local assert = assert + +-- Same value ---------------------------------------------------------------- + +do --- 1 +-- Store with same ref and same value. +-- 2nd store eliminated. All stores in loop eliminated. + local t = { 1, 2 } + for i=1,100 do + t[1] = 11 + assert(t[1] == 11) + t[1] = 11 + assert(t[1] == 11) + end + assert(t[1] == 11) +end + +do --- 2 +-- Store with different tab, same idx and same value. +-- All stores in loop eliminated. + local t1 = { 1, 2 } + local t2 = { 1, 2 } + for i=1,100 do + t1[1] = 11 + assert(t1[1] == 11) + t2[1] = 11 + assert(t2[1] == 11) + end + assert(t1[1] == 11) + assert(t2[1] == 11) +end + +do --- 3 +-- Store with same tab, different const idx and same value. +-- All stores in loop eliminated. Also disambiguated. + local t = { 1, 2 } + for i=1,100 do + t[1] = 11 + assert(t[1] == 11) + t[2] = 11 + assert(t[2] == 11) + end + assert(t[1] == 11) + assert(t[2] == 11) +end + +do --- 4 +-- Store with different tab, different const idx and same value. +-- All stores in loop eliminated. Also disambiguated. + local t1 = { 1, 2 } + local t2 = { 1, 2 } + for i=1,100 do + t1[1] = 11 + assert(t1[1] == 11) + t2[2] = 11 + assert(t2[2] == 11) + end + assert(t1[1] == 11) + assert(t2[2] == 11) +end + +do --- 5 +-- Store with different tab, different non-const idx and same value. +-- All stores in loop eliminated. Not disambiguated (but not needed). + local t1 = { 1, 2 } + local t2 = { 1, 2 } + local k = 1 + for i=1,100 do + t1[k] = 11 + assert(t1[k] == 11) + t2[2] = 11 + assert(t2[2] == 11) + end + assert(t1[1] == 11) + assert(t2[2] == 11) +end + +do --- 6 +-- Store with same ref, same value and aliased loads. +-- 2nd store eliminated. Not disambiguated (but not needed). + local t1 = { 1, 2 } + local t2 = t1 + for i=1,100 do + t1[1] = 11 + assert(t2[1] == 11) + t1[1] = 11 + assert(t2[1] == 11) + end + assert(t1[1] == 11) +end + +-- Different value ----------------------------------------------------------- + +do --- 7 +-- Store with same ref and different value. +-- 1st store eliminated. All stores in loop eliminated. + local t = { 1, 2 } + for i=1,100 do + assert(true) + t[1] = 11 + assert(t[1] == 11) + t[1] = 22 + assert(t[1] == 22) + end + assert(t[1] == 22) +end + +do --- 8 +-- Store with different tab, same idx and different value. +-- Cannot eliminate any stores (would need dynamic disambiguation). + local t1 = { 1, 2 } + local t2 = { 1, 2 } + for i=1,100 do + assert(true) + t1[1] = 11 + assert(t1[1] == 11) + t2[1] = 22 + assert(t2[1] == 22) + end + assert(t1[1] == 11) + assert(t2[1] == 22) +end + +do --- 9 +-- Store with same tab, different const idx and different value. +-- Disambiguated. All stores in loop eliminated. + local t = { 1, 2 } + for i=1,100 do + assert(true) + t[1] = 11 + assert(t[1] == 11) + t[2] = 22 + assert(t[2] == 22) + end + assert(t[1] == 11) + assert(t[2] == 22) +end + +do --- 10 +-- Store with different tab, different const idx and different value. +-- Disambiguated. All stores in loop eliminated. + local t1 = { 1, 2 } + local t2 = { 1, 2 } + for i=1,100 do + assert(true) + t1[1] = 11 + assert(t1[1] == 11) + t2[2] = 22 + assert(t2[2] == 22) + end + assert(t1[1] == 11) + assert(t2[2] == 22) +end + +do --- 11 +-- Store with different tab, different non-const idx and different value. +-- Cannot eliminate any stores (would need dynamic disambiguation). + local t1 = { 1, 2 } + local t2 = { 1, 2 } + local k = 1 + for i=1,100 do + assert(true) + t1[k] = 11 + assert(t1[k] == 11) + t2[2] = 22 + assert(t2[2] == 22) + end + assert(t1[1] == 11) + assert(t2[2] == 22) +end + +do --- 12 +-- Store with same ref, different value and aliased loads. +-- Cannot eliminate any stores (would need dynamic disambiguation). + local t1 = { 1, 2 } + local t2 = t1 + for i=1,100 do + assert(true) + t1[1] = 11 + assert(t2[1] == 11) + t1[1] = 22 + assert(t2[1] == 22) + end + assert(t1[1] == 22) +end + +do --- CALLL must inhibit DSE. + local a,b + local t = {1,2} + for i=1,100 do + t[2]=nil + a=#t + t[2]=2 + b=#t + end + assert(a == 1 and b == 2) +end diff --git a/testsuite/test/opt/dse/field.lua b/testsuite/test/opt/dse/field.lua new file mode 100644 index 0000000000..d8a5411c75 --- /dev/null +++ b/testsuite/test/opt/dse/field.lua @@ -0,0 +1,70 @@ +local getmetatable, setmetatable = getmetatable, setmetatable + +do --- 1. Store with same ref and same value. All stores in loop eliminated. + local mt = {} + local t = {} + for i=1,100 do + setmetatable(t, mt) + assert(getmetatable(t) == mt) + setmetatable(t, mt) + assert(getmetatable(t) == mt) + end + assert(getmetatable(t) == mt) +end + +do --- 2. Store with different ref and same value. All stores in loop eliminated. + local mt = {} + local t1 = {} + local t2 = {} + for i=1,100 do + setmetatable(t1, mt) + assert(getmetatable(t1) == mt) + setmetatable(t2, mt) + assert(getmetatable(t2) == mt) + end + assert(getmetatable(t1) == mt) + assert(getmetatable(t2) == mt) +end + +do --- 3. Store with different ref and different value. Cannot eliminate any stores. + local mt1 = {} + local mt2 = {} + local t1 = {} + local t2 = {} + for i=1,100 do + setmetatable(t1, mt1) + assert(getmetatable(t1) == mt1) + setmetatable(t2, mt2) + assert(getmetatable(t2) == mt2) + end + assert(getmetatable(t1) == mt1) + assert(getmetatable(t2) == mt2) +end + +do --- 4. Store with same ref and different value. 2nd store remains in loop. + local mt1 = {} + local mt2 = {} + local t = {} + for i=1,100 do + setmetatable(t, mt1) + assert(getmetatable(t) == mt1) + setmetatable(t, mt2) + assert(getmetatable(t) == mt2) + end + assert(getmetatable(t) == mt2) +end + +do --- 5. Store with same ref, different value and aliased loads. +-- Cannot eliminate any stores. + local mt1 = {} + local mt2 = {} + local t1 = {} + local t2 = t1 + for i=1,100 do + setmetatable(t1, mt1) + assert(getmetatable(t2) == mt1) + setmetatable(t1, mt2) + assert(getmetatable(t2) == mt2) + end + assert(getmetatable(t1) == mt2) +end diff --git a/testsuite/test/opt/dse/index b/testsuite/test/opt/dse/index new file mode 100644 index 0000000000..7b8ad1f4cd --- /dev/null +++ b/testsuite/test/opt/dse/index @@ -0,0 +1,2 @@ +array.lua +field.lua diff --git a/testsuite/test/opt/fold/index b/testsuite/test/opt/fold/index new file mode 100644 index 0000000000..8b4648c788 --- /dev/null +++ b/testsuite/test/opt/fold/index @@ -0,0 +1 @@ +kfold.lua diff --git a/testsuite/test/opt/fold/kfold.lua b/testsuite/test/opt/fold/kfold.lua new file mode 100644 index 0000000000..9cd39190de --- /dev/null +++ b/testsuite/test/opt/fold/kfold.lua @@ -0,0 +1,81 @@ +do --- operators + local y = 0 + for i=1,100 do local a, b = 23, 11; y = a+b end; assert(y == 23+11) + for i=1,100 do local a, b = 23, 11; y = a-b end; assert(y == 23-11) + for i=1,100 do local a, b = 23, 11; y = a*b end; assert(y == 23*11) + for i=1,100 do local a, b = 23, 11; y = a/b end; assert(y == 23/11) + for i=1,100 do local a, b = 23, 11; y = a%b end; assert(y == 23%11) + for i=1,100 do local a, b = 23, 11; y = a^b end; assert(y == 23^11) + + for i=1,100 do local a, b = 23.5, 11.5; y = a+b end; assert(y == 23.5+11.5) + for i=1,100 do local a, b = 23.5, 11.5; y = a-b end; assert(y == 23.5-11.5) + for i=1,100 do local a, b = 23.5, 11.5; y = a*b end; assert(y == 23.5*11.5) + for i=1,100 do local a, b = 23.5, 11.5; y = a/b end; assert(y == 23.5/11.5) + for i=1,100 do local a, b = 23.5, 11.5; y = a%b end; assert(y == 23.5%11.5) +end + +do --- abs + local y = 0 + for i=1,100 do local a=23; y = math.abs(a) end; assert(y == math.abs(23)) + for i=1,100 do local a=-23; y = math.abs(a) end; assert(y == math.abs(-23)) + for i=1,100 do local a=23.5; y = math.abs(a) end; assert(y == math.abs(23.5)) + for i=1,100 do local a=-23.5; y = math.abs(a) end; assert(y==math.abs(-23.5)) + for i=1,100 do local a=-2^31; y = math.abs(a) end; assert(y==math.abs(-2^31)) +end + +do --- atan2 ldexp + local y = 0 + for i=1,100 do local a, b = 23, 11; y = math.atan2(a, b) end + assert(y == math.atan2(23, 11)) + for i=1,100 do local a, b = 23, 11; y = math.ldexp(a, b) end + assert(y == math.ldexp(23, 11)) +end + +do --- minmax + local y = 0 + for i=1,100 do local a, b = 23, 11; y = math.min(a, b) end + assert(y == math.min(23, 11)) + for i=1,100 do local a, b = 23, 11; y = math.max(a, b) end + assert(y == math.max(23, 11)) + for i=1,100 do local a, b = 23.5, 11.5; y = math.min(a, b) end + assert(y == math.min(23.5, 11.5)) + for i=1,100 do local a, b = 23.5, 11.5; y = math.max(a, b) end + assert(y == math.max(23.5, 11.5)) + for i=1,100 do local a, b = 11, 23; y = math.min(a, b) end + assert(y == math.min(11, 23)) + for i=1,100 do local a, b = 11, 23; y = math.max(a, b) end + assert(y == math.max(11, 23)) + for i=1,100 do local a, b = 11.5, 23.5; y = math.min(a, b) end + assert(y == math.min(11.5, 23.5)) + for i=1,100 do local a, b = 11.5, 23.5; y = math.max(a, b) end + assert(y == math.max(11.5, 23.5)) +end + +do --- floorceil + local y = 0 + for i=1,100 do local a=23; y=math.floor(a) end assert(y==math.floor(23)) + for i=1,100 do local a=23.5; y=math.floor(a) end assert(y==math.floor(23.5)) + for i=1,100 do local a=-23; y=math.floor(a) end assert(y==math.floor(-23)) + for i=1,100 do local a=-23.5; y=math.floor(a) end assert(y==math.floor(-23.5)) + for i=1,100 do local a=-0; y=math.floor(a) end assert(y==math.floor(-0)) + for i=1,100 do local a=23; y=math.ceil(a) end assert(y==math.ceil(23)) + for i=1,100 do local a=23.5; y=math.ceil(a) end assert(y==math.ceil(23.5)) + for i=1,100 do local a=-23; y=math.ceil(a) end assert(y==math.ceil(-23)) + for i=1,100 do local a=-23.5; y=math.ceil(a) end assert(y==math.ceil(-23.5)) + for i=1,100 do local a=-0; y=math.ceil(a) end assert(y==math.ceil(-0)) +end + +do --- sqrt exp log trig + local y = 0 + for i=1,100 do local a=23; y=math.sqrt(a) end assert(y==math.sqrt(23)) + for i=1,100 do local a=23; y=math.exp(a) end assert(y==math.exp(23)) + for i=1,100 do local a=23; y=math.log(a) end assert(y==math.log(23)) + for i=1,100 do local a=23; y=math.log10(a) end assert(y==math.log10(23)) + for i=1,100 do local a=23; y=math.sin(a) end assert(y==math.sin(23)) + for i=1,100 do local a=23; y=math.cos(a) end assert(y==math.cos(23)) + for i=1,100 do local a=23; y=math.tan(a) end assert(y==math.tan(23)) +end + +do --- exp -luajit==2.0 + assert((10^-2 - 0.01) == 0) +end diff --git a/testsuite/test/opt/fuse.lua b/testsuite/test/opt/fuse.lua new file mode 100644 index 0000000000..a68381ef07 --- /dev/null +++ b/testsuite/test/opt/fuse.lua @@ -0,0 +1,5 @@ +do --- Don't fuse i+101 on x64. +-- (except if i is sign-extended to 64 bit or addressing is limited to 32 bit) + local t = {} + for i=-100,-1 do t[i+101] = 1 end +end diff --git a/testsuite/test/opt/fwd/hrefk_rollback.lua b/testsuite/test/opt/fwd/hrefk_rollback.lua new file mode 100644 index 0000000000..5a6ad87688 --- /dev/null +++ b/testsuite/test/opt/fwd/hrefk_rollback.lua @@ -0,0 +1,32 @@ +do --- https://github.com/LuaJIT/LuaJIT/issues/124 + local function foo(a, b, f) + return f and (a.f0 < b.f1 and + b.f0 < a.f1 and + a.f2 < b.f3 and + b.f2 < a.f3) + end + + local function bar(f0, f1, f2, f3, X, f) + for _, v in ipairs(X) do + local b = {} + b.f0 = 0 + b.f2 = v + b.f1 = b.f0 + 1 + b.f3 = b.f2 + 1 + + if foo({f0 = f0, f1 = f1, f2 = f2, f3 = f3}, b, f) then + return false + end + end + + return true + end + + local X = { 0, 1, 0, 0 } + + for i = 1, 20 do + assert(bar(0, 1, 2, 3, X, true)) + end + + assert(not bar(0, 1, 1, 2, X, true)) +end diff --git a/testsuite/test/opt/fwd/index b/testsuite/test/opt/fwd/index new file mode 100644 index 0000000000..5bb1537f0a --- /dev/null +++ b/testsuite/test/opt/fwd/index @@ -0,0 +1,3 @@ +hrefk_rollback.lua +tnew_tdup.lua +upval.lua diff --git a/testsuite/test/opt/fwd/tnew_tdup.lua b/testsuite/test/opt/fwd/tnew_tdup.lua new file mode 100644 index 0000000000..9e18fa3bd1 --- /dev/null +++ b/testsuite/test/opt/fwd/tnew_tdup.lua @@ -0,0 +1,69 @@ +do --- 1. + local x = 2 + for i=1,100 do + local t = {} -- TNEW: DCE + x = t.foo -- HREF -> niltv: folded + end + assert(x == nil) +end + +do --- 2. + local x = 2 + for i=1,100 do + local t = {1} -- TDUP: DCE + x = t.foo -- HREF -> niltv: folded + end + assert(x == nil) +end + +do --- 3. + local x = 2 + for i=1,100 do + local t = {} + t[1] = 11 -- NEWREF + HSTORE + x = t[1] -- AREF + ALOAD, no forwarding, no fold + end + assert(x == 11) +end + +do --- 4. HREFK not eliminated. Ditto for the EQ(FLOAD(t, #tab.hmask), k). + local x = 2 + for i=1,100 do + local t = {} + t.foo = 11 -- NEWREF + HSTORE + x = t.foo -- HREFK + HLOAD: store forwarding + end + assert(x == 11) +end + +do --- 5. HREFK not eliminated. Ditto for the EQ(FLOAD(t, #tab.hmask), k). + local x = 2 + for i=1,100 do + local t = {foo=11} -- TDUP + x = t.foo -- HREFK + non-nil HLOAD: folded + end + assert(x == 11) +end + +do --- 6. + local x = 2 + local k = 1 + for i=1,100 do + local t = {[0]=11} -- TDUP + t[k] = 22 -- AREF + ASTORE aliasing + x = t[0] -- AREF + ALOAD, no fold + end + assert(x == 11) +end + +do --- 7. + local setmetatable = setmetatable + local mt = { __newindex = function(t, k, v) + assert(k == "foo") + assert(v == 11) + end } + for i=1,100 do + local t = setmetatable({}, mt) + t.foo = 11 + end +end diff --git a/testsuite/test/opt/fwd/upval.lua b/testsuite/test/opt/fwd/upval.lua new file mode 100644 index 0000000000..a3e83dff4e --- /dev/null +++ b/testsuite/test/opt/fwd/upval.lua @@ -0,0 +1,50 @@ +do --- 1. Open upvalue above base slot, aliasing an SSA value. + local x = 7 + local function a() x = x + 1 end + local function b() x = x + 2 end + for i=1,100 do a(); b(); x = x + 5 end + assert(x == 807) +end + +do --- 2. Open upvalue below base slot. UREFO CSE for a.x + b.x, but not x in loop. + -- ULOAD not disambiguated. 2x ULOAD + 2x USTORE (+ 1x DSE USTORE). + local x = 7 + (function() + local function a() x = x + 1 end + local function b() x = x + 2 end + for i=1,100 do a(); b(); x = x + 5 end + end)() + assert(x == 807) +end + +do --- 3. Closed upvalue. UREFC CSE for a.x + b.x, but not x in loop. + -- ULOAD not disambiguated. 2x ULOAD + 2x USTORE (+ 1x DSE for USTORE). + local xx = (function() + local x = 7 + local function a() x = x + 1 end + local function b() x = x + 2 end + return function() for i=1,100 do a(); b(); x = x + 5 end; return x end + end)()() + assert(xx == 807) +end + +do --- 4. Open upvalue below base slot. Forwarded. 1x USTORE (+ 1x DSE USTORE). + local x = 7 + (function() + local function a() x = x + 1 end + for i=1,100 do a(); a() end + end)() + assert(x == 207) +end + +do --- 5. Closed upvalue. Forwarded. 1x USTORE (+ 1x DSE USTORE). + local xx = (function() + local x = 7 + return function() + local function a() x = x + 1 end + for i=1,100 do a(); a() end + return x + end + end)()() + assert(xx == 207) +end diff --git a/testsuite/test/opt/index b/testsuite/test/opt/index new file mode 100644 index 0000000000..94d50aecfd --- /dev/null +++ b/testsuite/test/opt/index @@ -0,0 +1,6 @@ +dse +dse +fold +fold +fwd +fwd +fuse.lua +fuse +loop +loop +sink +sink diff --git a/testsuite/test/opt/loop/index b/testsuite/test/opt/loop/index new file mode 100644 index 0000000000..e582023481 --- /dev/null +++ b/testsuite/test/opt/loop/index @@ -0,0 +1 @@ +unroll.lua diff --git a/testsuite/test/opt/loop/unroll.lua b/testsuite/test/opt/loop/unroll.lua new file mode 100644 index 0000000000..6fbd565afc --- /dev/null +++ b/testsuite/test/opt/loop/unroll.lua @@ -0,0 +1,32 @@ +do --- type instability on loop unroll -> record unroll + local flip = true + for i=1,100 do flip = not flip end + assert(flip == true) +end + +do --- untitled + local t = {} + local a, b, c = 1, "", t + for i=1,100 do a,b,c=b,c,a end + assert(c == 1 and a == "" and b == t) +end + +do --- FAILFOLD on loop unroll -> LJ_TRERR_GFAIL -> record unroll + local t = { 1, 2 } + local k = 2 + local x = 0 + for i=1,200 do + x = x + t[k] + k = k == 1 and 2 or 1 + end + assert(x == 300 and k == 2) +end + +do --- Unroll if inner loop aborts. + local j = 0 + for i = 1,100 do + repeat + j = j+1 + until true + end +end diff --git a/testsuite/test/opt/sink/alloc.lua b/testsuite/test/opt/sink/alloc.lua new file mode 100644 index 0000000000..bb2a0f7272 --- /dev/null +++ b/testsuite/test/opt/sink/alloc.lua @@ -0,0 +1,126 @@ +local assert = assert + +do --- DCE or sink trivial TNEW or TDUP. + for i=1,100 do local t={} end + for i=1,100 do local t={1} end +end + +do --- Sink TNEW/TDUP + ASTORE/HSTORE. + for i=1,100 do local t={i}; assert(t[1] == i) end + for i=1,100 do local t={foo=i}; assert(t.foo == i) end + for i=1,100 do local t={1,i}; assert(t[2] == i) end + for i=1,100 do local t={bar=1,foo=i}; assert(t.foo == i) end +end + +do --- Sink outermost table of nested TNEW. + local x + for i=1,100 do + local t = {[0]={{1,i}}} + if i == 90 then x = t end + assert(t[0][1][2] == i) + end + assert(x[0][1][2] == 90) + for i=1,100 do + local t = {foo={bar={baz=i}}} + if i == 90 then x = t end + assert(t.foo.bar.baz == i) + end + assert(x.foo.bar.baz == 90) +end + +do --- Sink one TNEW + FSTORE. + for i=1,100 do local t = setmetatable({}, {}) end +end + +do --- Sink TDUP or TDUP + HSTORE. Guard of HREFK eliminated. + local x + for i=1,100 do local t = { foo = 1 }; x = t.foo; end + assert(x == 1) + for i=1,100 do local t = { foo = i }; x = t.foo; end + assert(x == 100) +end + +do --- Sink of simplified complex add, unused in next iteration, drop PHI. + local x={1,2} + for i=1,100 do x = {x[1]+3, x[2]+4} end + assert(x[1] == 301) + assert(x[2] == 402) +end + +do --- Sink of complex add, unused in next iteration, drop PHI. + local x,k={1.5,2.5},{3.5,4.5} + for i=1,100 do x = {x[1]+k[1], x[2]+k[2]} end + assert(x[1] == 351.5) + assert(x[2] == 452.5) +end + +do --- Sink of TDUP with stored values that are both PHI and non-PHI. + local x,k={1,2},{3,4} + for i=1,100 do x = {x[1]+k[1], k[2]} end + assert(x[1] == 301) + assert(x[2] == 4) +end + +do --- Sink of CONV. + local t = {1} + local x,y + for i=1,200 do + local v = {i} + local w = {i+1} + x = v[1] + y = w[1] + if i > 100 then end + end + assert(x == 200 and y == 201) +end + +do --- Sink of stores with numbers. + local x = {1.5, 0} + for i=1,200 do x = {x[1]+1, 99.5}; x[2]=4.5; if i > 100 then end end + assert(x[1] == 201.5) + assert(x[2] == 4.5) +end + +do --- Sink of stores with constants. + for i=1,100 do local t = {false}; t[1] = true; if i > 100 then g=t end end +end + +do --- Sink with two references to the same table. + for i=1,200 do + local t = {i} + local q = t + if i > 100 then assert(t == q) end + end +end + +do --- point + local point + point = { + new = function(self, x, y) + return setmetatable({x=x, y=y}, self) + end, + __add = function(a, b) + return point:new(a.x + b.x, a.y + b.y) + end, + } + point.__index = point + local a, b = point:new(1, 1), point:new(2, 2) + for i=1,100 do a = (a + b) + b end + assert(a.x == 401) + assert(a.y == 401) + assert(getmetatable(a) == point) + for i=1,200 do a = (a + b) + b; if i > 100 then end end + assert(a.x == 1201) + assert(a.y == 1201) + assert(getmetatable(a) == point) +end + +do --- untitled + local t = {} + for i=1,20 do t[i] = 1 end + for i=1,20 do + for a,b in ipairs(t) do + local s = {i} + end + end +end diff --git a/testsuite/test/opt/sink/ffi.lua b/testsuite/test/opt/sink/ffi.lua new file mode 100644 index 0000000000..0bba0978d2 --- /dev/null +++ b/testsuite/test/opt/sink/ffi.lua @@ -0,0 +1,121 @@ +local ffi = require("ffi") + +do --- incrementing + local x = 10000000000000ll + for i=1,100 do x=x+1 end + assert(x == 10000000000100ll) +end + +do --- hoistable increment !private_G + local x = 10000000000000ll + local z + for i=1,100 do z=x+1 end + assert(z == 10000000000001ll) + for i=1,100 do local y=x; z=x+1; g=y end + assert(z == 10000000000001ll) + assert(g == 10000000000000ll) +end + +do --- escaping hoistable increment + local x = 10000000000000ll + for i=1,100 do local y=x+1; if i == 90 then x=y end end + assert(x == 10000000000001ll) +end + +do --- escaping addition + local x = 10000000000000ll + for i=1,100 do local y=x+i; if i == 90 then x=y end end + assert(x == 10000000000090ll) +end + +do --- conditional addition / incrementing + local x = 10000000000000ll + for i=1,200 do local y=x+i; if i > 100 then x=y end end + assert(x == 10000000015050ll) +end + +do --- incrementing pointer + local a = ffi.new("int[?]", 100) + local p = a + for i=0,99 do p[0]=i; p=p+1 end + assert(p == a+100) + for i=0,99 do assert(a[i] == i) end +end + +do --- mutating complex + local cx = ffi.typeof("complex") + local x = cx(1, 2) + local k = cx(3, 4) + for i=1,100 do x = cx(x.re+k.re, x.im+k.im) end + assert(x.re == 301) + assert(x.im == 402) +end + +do --- mutating struct + local st = ffi.typeof("struct { int a; int64_t b; double c; }") + local x = st(1, 20000000000LL, 3.5) + local k = st(3, 4, 5.0) + for i=1,100 do x = st(x.a+k.a, x.b+k.b, x.c+k.c) end + assert(x.a == 301) + assert(x.b == 20000000400LL) + assert(x.c == 503.5) + local y, z + for i=1,100 do + local x = st(i, i, i) + if i == 90 then y = st(x.a, x.b, x.c) end + x.b = x.b + 20000000000LL + if i == 95 then z = st(x.a, x.b, x.c) end + end + assert(y.a == 90) + assert(y.b == 90) + assert(y.c == 90) + assert(z.a == 95) + assert(z.b == 20000000095LL) + assert(z.c == 95) + for i=1,200 do + local x = st(i, i, i) + if i > 100 then y = st(x.a, x.b, x.c) end + x.b = x.b + 20000000000LL + if i > 150 then z = st(x.a, x.b, x.c) end + end + assert(y.a == 200) + assert(y.b == 200) + assert(y.c == 200) + assert(z.a == 200) + assert(z.b == 20000000200LL) + assert(z.c == 200) +end + +do --- mutating struct 2 + local st = ffi.typeof("struct { int64_t a; double b; float c; }") + local x = st(1, 2.5, 3.25) + local k = st(3, 4, 5) + for i=1,100 do x = st(x.a+k.a, x.b+k.b, x.c+k.c) end + assert(x.a == 301) + assert(x.b == 402.5) + assert(x.c == 503.25) +end + +do --- escaping loop counter to float + local st = ffi.typeof("struct { float a; }") + local x + for i=1,200 do + local y = st(i) + if i > 100 then x = y end + end + assert(x.a == 200) +end + +do --- 64 bit crash bug !private_G + local t = {} + for i=1,200 do t[i] = "abcd" end + local r + for i=1,200 do + local a,b,c,d + local g = t[201-i] -- Non-zero stack slot above. + local v = ffi.cast("const char *", t[i]) -- Uses 32 bit stack slot! + a,b,c,d = {v[0]},{v[1]},{v[2]},{v[3]} -- Force above to spill. + r = {{i}} -- Spill due to call. + if i > 100 then z = v[0]+a[1]+b[1]+c[1]+d[1] end -- Crash for 64 bit ptr v. + end +end diff --git a/testsuite/test/opt/sink/ffi_nosink.lua b/testsuite/test/opt/sink/ffi_nosink.lua new file mode 100644 index 0000000000..8f7cced264 --- /dev/null +++ b/testsuite/test/opt/sink/ffi_nosink.lua @@ -0,0 +1,45 @@ +local ffi = require("ffi") + +do --- escaping global !private_G + local x = 0ll + for i=1,100 do x=x+1; g=x end + assert(x == 100ll) + assert(g == 100ll) +end + +do --- preincrement escaping global !private_G + local x = 0ll + for i=1,100 do local y=x; x=x+1; g=y end + assert(x == 100ll) + assert(g == 99ll) +end + +do --- escaping global and local !private_G + local x = 0ll + local z + for i=1,100 do z=x+1; g=z end + assert(z == 1ll) + assert(g == 1ll) +end + +do --- swapping + local x,y = 0ll, 0ll + for i=1,100 do y,x=x,x+1 end + assert(x == 100ll) + assert(y == 99ll) +end + +do --- pointer to self + local st = ffi.typeof("struct { void *p; }") + local x + for i=1,100 do x = st(); x.p = x end + assert(x.p == ffi.cast("void *", x)) +end + +do --- strchr + ffi.cdef[[char *strchr(char *, int);]] + for i=1,100 do + local p = ffi.new("char[2]"); + ffi.C.strchr(p, 32) + end +end diff --git a/testsuite/test/opt/sink/index b/testsuite/test/opt/sink/index new file mode 100644 index 0000000000..8bfa370e50 --- /dev/null +++ b/testsuite/test/opt/sink/index @@ -0,0 +1,4 @@ +alloc.lua +nosink.lua +ffi.lua +ffi +ffi_nosink.lua +ffi diff --git a/testsuite/test/opt/sink/nosink.lua b/testsuite/test/opt/sink/nosink.lua new file mode 100644 index 0000000000..762aaced57 --- /dev/null +++ b/testsuite/test/opt/sink/nosink.lua @@ -0,0 +1,109 @@ +local assert = assert + +do --- Cannot sink TNEW, aliased load. + local k = 1 + for i=1,100 do local t={i}; assert(t[k]==i) end + for i=1,100 do local t={}; t[k]=i; assert(t[1]==i) end +end + +do --- Cannot sink TNEW, escaping to upvalue. + (function() + local uv + return function() + for i=1,100 do uv = {i} end + assert(uv[1] == 100) + end + end)()() +end + +do --- Cannot sink TNEW, escaping through a store. + local t = {} + for i=1,100 do t[1] = {i} end + for i=1,100 do t.foo = {i} end + for i=1,100 do setmetatable(t, {i}) end + assert(t[1][1] == 100) + assert(t.foo[1] == 100) + assert(getmetatable(t)[1] == 100) +end + +do --- Cannot sink TNEW, iteratively escaping through a store. + local t = {} + for i=1,100 do t[1] = {i}; t[1][1] = {i} end + assert(t[1][1][1] == 100) +end + +do --- Cannot sink TNEW, escaping to next iteration (unused in 1st variant). + local t; + for i=1,200 do t = {i} end + assert(t[1] == 200) + for i=1,200 do if i > 100 then assert(t[1] == i-1) end t = {i} end + assert(t[1] == 200) +end + +do --- Cannot sink TNEW, escaping to next iteration (snapshot ref). + local t,x + for i=1,100 do x=t; t={i} end + assert(t[1] == 100) + assert(x[1] == 99) +end + +do --- Cannot sink TNEW, escaping to next iteration (IR/snapshot ref). + local t + for i=1,100 do t={t} end + assert(type(t[1][1][1]) == "table") +end + +do --- Cannot sink inner TNEW, escaping to next iteration (IR ref). + -- (Could sink outer TNEW, but the logic for stores to PHI allocs is too simple). + local t = {42, 43} + for i=1,100 do t={t[2], {i}} end + assert(t[2][1] == 100) + assert(t[1][1] == 99) +end + +do --- Cannot sink TNEW, cross-PHI ref (and snapshot ref). + local x,y + for i=1,100 do x,y={i},x end + assert(x[1] == 100) + assert(y[1] == 99) +end + +do --- Cannot sink TNEW, cross-PHI ref (and snapshot ref). + local x,y + for i=1,100 do x,y=y,{i} end + assert(x[1] == 99) + assert(y[1] == 100) +end + +do --- Cannot sink TNEW, escaping to exit. + local function f(n, t) + if n == 0 then return t end + return (f(n-1, {t})) + end + local t = f(100, 42) + assert(type(t[1][1][1]) == "table") + t = f(3, 42) + assert(t[1][1][1] == 42) +end + +do --- Cannot sink TNEW, escaping to exit. + local function f(n) + if n == 0 then return 42 end + local t = f(n-1) + return {t} + end + for i=1,20 do + local t = f(100) + assert(type(t[1][1][1]) == "table") + end + local t = f(3) + assert(t[1][1][1] == 42) +end + +do --- Cannot sink, since nested inner table is non-PHI. + local a, b = {{1}}, {{1}} + for i=1,10000 do -- Need to force GC exit sometimes + a = {{a[1][1]+b[1][1]}} + end + assert(a[1][1] == 10001) +end diff --git a/testsuite/test/raptorjit/index b/testsuite/test/raptorjit/index new file mode 100644 index 0000000000..1a3dcd3271 --- /dev/null +++ b/testsuite/test/raptorjit/index @@ -0,0 +1 @@ +vmprofile.lua diff --git a/testsuite/test/raptorjit/vmprofile.lua b/testsuite/test/raptorjit/vmprofile.lua new file mode 100644 index 0000000000..ae9bb30a41 --- /dev/null +++ b/testsuite/test/raptorjit/vmprofile.lua @@ -0,0 +1,34 @@ +local vmprofile = require("jit.vmprofile") + +do --- vmprofile start and stop + vmprofile.start() + vmprofile.stop() +end + + +do --- vmprofile multiple starts + for i = 1, 1000 do vmprofile.start() end + vmprofile.stop() +end + +do --- vmprofile multiple profiles + vmprofile.start() + local a = vmprofile.open("a.vmprofile") + local b = vmprofile.open("b.vmprofile") + vmprofile.select(a) + for i = 1, 1e8 do end + vmprofile.select(b) + for i = 1, 1e8 do end + vmprofile.select(a) + for i = 1, 1e8 do end + vmprofile.stop() + vmprofile.close(a) + vmprofile.close(b) + -- simple sanity check that the profiles have different contents. + -- e.g. to make sure there was at least one sample taken somewhere. + assert(io.open("a.vmprofile", "r"):read("*a") ~= + io.open("b.vmprofile", "r"):read("*a"), + "check that profiles have different contents") + os.remove("a.vmprofile") + os.remove("b.vmprofile") +end diff --git a/testsuite/test/src/cpptest.cpp b/testsuite/test/src/cpptest.cpp new file mode 100644 index 0000000000..a5893ed600 --- /dev/null +++ b/testsuite/test/src/cpptest.cpp @@ -0,0 +1,129 @@ + +#include + +extern "C" { +#define LUA_LIB +#include "lua.h" +#include "lauxlib.h" +#include "luajit.h" +} + +static int testobj_alloc; + +class TestObj { +public: + TestObj(int x) { foo = x; testobj_alloc = 1; } + ~TestObj() { testobj_alloc = 0; } +private: + int foo; +}; + +static int ct_alloc(lua_State *L) +{ + TestObj foo(1); + lua_pushlightuserdata(L, (void *)&foo); + lua_call(L, lua_gettop(L)-1, LUA_MULTRET); + if (lua_iscfunction(L, -1)) { + lua_CFunction f = lua_tocfunction(L, -1); + lua_pop(L, 1); + f(L); + } + return lua_gettop(L); +} + +static int ct_isalloc(lua_State *L) +{ + lua_pushboolean(L, testobj_alloc); + return 1; +} + +static int ct_usereg(lua_State *L) +{ + int n = luaL_checkint(L, 1); + int m = luaL_checkint(L, 2); + int i; + int a = 0, b = 0, c = 0, d = 0, e = 0, f = 0; + for (i = 0; i < n; i++) { + a = (a + 1) ^ 0x12345678; + b = (b + 2) ^ 0x12345678; + c = (c + 3) ^ 0x12345678; + d = (d + 4) ^ 0x12345678; + e = (e + 5) ^ 0x12345678; + f = (f + 5) ^ 0x12345678; + if (i == m) { + if (i & 1) + lua_pcall(L, 1, 0, 0); + else + lua_call(L, 1, 0); + } + } + lua_pushinteger(L, a); + lua_pushinteger(L, b); + lua_pushinteger(L, c); + lua_pushinteger(L, d); + lua_pushinteger(L, e); + lua_pushinteger(L, f); + return 6; +} + +static int ct_catch(lua_State *L) +{ + try { + lua_call(L, lua_gettop(L)-1, LUA_MULTRET); + return lua_gettop(L); + } catch (const char *s) { + lua_pushstring(L, s); + } catch (...) { + lua_pushliteral(L, "catch ..."); + } + return 1; +} + +static int ct_throw(lua_State *L) +{ + const char *s = lua_tostring(L, 1); + throw(s); + return 0; +} + +static int ct_wrap(lua_State *L, lua_CFunction f) +{ + try { + return f(L); + } catch (const char *s) { + lua_pushstring(L, s); + } + return lua_error(L); +} + +static int ct_wrapon(lua_State *L) +{ + lua_pushlightuserdata(L, (void *)ct_wrap); + luaJIT_setmode(L, -1, LUAJIT_MODE_WRAPCFUNC|LUAJIT_MODE_ON); + return 0; +} + +static int ct_wrapoff(lua_State *L) +{ + luaJIT_setmode(L, 0, LUAJIT_MODE_WRAPCFUNC|LUAJIT_MODE_OFF); + return 0; +} + +static luaL_Reg ct_funcs[] = { + {"isalloc", ct_isalloc }, + {"alloc", ct_alloc }, + {"usereg", ct_usereg }, + {"catch", ct_catch }, + {"throw", ct_throw }, + {"wrapon", ct_wrapon }, + {"wrapoff", ct_wrapoff }, + {NULL, NULL} +}; + +extern "C" { +LUA_API int luaopen_cpptest(lua_State *L) +{ + luaL_register(L, "cpptest", ct_funcs); + return 1; +} +} diff --git a/testsuite/test/src/ctest.c b/testsuite/test/src/ctest.c new file mode 100644 index 0000000000..d257567b98 --- /dev/null +++ b/testsuite/test/src/ctest.c @@ -0,0 +1,339 @@ + +#define LUA_LIB +#include "lua.h" +#include "lauxlib.h" + +/* ------------------------------------------------------------------------ */ + +#ifdef _MSC_VER +typedef __int8 int8_t; +typedef __int16 int16_t; +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +#else +#include +#define complex _Complex +#endif + +#if defined(__i386) || defined(__i386__) || defined(_M_IX86) +#ifdef _MSC_VER +#define LJ_FASTCALL __fastcall +#define LJ_STDCALL __stdcall +#else +#define LJ_FASTCALL __attribute__((fastcall)) +#define LJ_STDCALL __attribute__((stdcall)) +#endif +#endif + +typedef struct s_ii { int x, y; } s_ii; +typedef struct s_jj { int64_t x, y; } s_jj; +typedef struct s_ff { float x, y; } s_ff; +typedef struct s_dd { double x, y; } s_dd; +typedef struct s_8i { int a,b,c,d,e,f,g,h; } s_8i; + +LUA_API int call_i(int a) { return a+1; } +LUA_API int call_ii(int a, int b) { return a+b; } +LUA_API int call_10i(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j) { return a+b+c+d+e+f+g+h+i+j; } + +LUA_API int64_t call_10j(int a, int b, int c, int d, int e, int f, int g, int h, int i, int64_t j) { return a+b+c+d+e+f+g+h+i+j; } + +LUA_API int64_t call_ji(int64_t a, int b) { return a+b; } +LUA_API int64_t call_ij(int a, int64_t b) { return a+b; } +LUA_API int64_t call_jj(int64_t a, int64_t b) { return a+b; } + +LUA_API double call_dd(double a, double b) { return a+b; } +LUA_API double call_10d(double a, double b, double c, double d, double e, double f, double g, double h, double i, double j) { return a+b+c+d+e+f+g+h+i+j; } + +LUA_API float call_ff(float a, float b) { return a+b; } +LUA_API float call_10f(float a, float b, float c, float d, float e, float f, float g, float h, float i, float j) { return a+b+c+d+e+f+g+h+i+j; } + +LUA_API double call_idifjd(int a, double b, int c, float d, int64_t e, double f) { return a+b+c+d+e+f; } + +LUA_API int call_p_i(int *a) { return *a+1; } +LUA_API int *call_p_p(int *a) { return a+1; } +LUA_API int call_pp_i(int *a, int *b) { return (int)(a-b); } + +#include + +LUA_API double call_ividi(int a, ...) +{ + double y; + va_list argp; + va_start(argp, a); + y = a; + y += va_arg(argp, int); + y += va_arg(argp, double); + y += va_arg(argp, int); + va_end(argp); + return y; +} + +#ifdef complex +LUA_API complex call_dd_cd(double a, double b) { return a+b*2i; } +LUA_API complex call_cd(complex a) { return a+1-2i; } +LUA_API complex call_cdcd(complex a, complex b) { return a+b; } + +LUA_API complex float call_ff_cf(float a, float b) { return a+b*2i; } +LUA_API complex float call_cf(complex float a) { return a+1-2i; } +LUA_API complex float call_cfcf(complex float a, complex float b) { return a+b; } +#endif + +LUA_API s_ii call_sii(s_ii a) { return a; } +LUA_API s_jj call_sjj(s_jj a) { return a; } +LUA_API s_ff call_sff(s_ff a) { return a; } +LUA_API s_dd call_sdd(s_dd a) { return a; } +LUA_API s_8i call_s8i(s_8i a) { return a; } +LUA_API s_ii call_siisii(s_ii a, s_ii b) +{ + s_ii c; + c.x = a.x + b.x; + c.y = a.y + b.y; + return c; +} +LUA_API s_ff call_sffsff(s_ff a, s_ff b) +{ + s_ff c; + c.x = a.x + b.x; + c.y = a.y + b.y; + return c; +} +LUA_API s_dd call_sddsdd(s_dd a, s_dd b) +{ + s_dd c; + c.x = a.x + b.x; + c.y = a.y + b.y; + return c; +} +LUA_API s_8i call_s8is8i(s_8i a, s_8i b) +{ + s_8i c; + c.a = a.a + b.a; + c.b = a.b + b.b; + c.c = a.c + b.c; + c.d = a.d + b.d; + c.e = a.e + b.e; + c.f = a.f + b.f; + c.g = a.g + b.g; + c.h = a.h + b.h; + return c; +} +LUA_API s_8i call_is8ii(int a, s_8i b, int c) +{ + b.a += a; + b.c += c; + return b; +} + +#ifdef LJ_FASTCALL +LUA_API int LJ_FASTCALL fastcall_void(void) { return 1; } +LUA_API int LJ_FASTCALL fastcall_i(int a) { return a+1; } +LUA_API int LJ_FASTCALL fastcall_ii(int a, int b) { return a+b; } +LUA_API int LJ_FASTCALL fastcall_iii(int a, int b, int c) { return a+b+c; } +LUA_API int64_t LJ_FASTCALL fastcall_ji(int64_t a, int b) { return a+b; } +LUA_API double LJ_FASTCALL fastcall_dd(double a, double b) { return a+b; } +LUA_API int LJ_FASTCALL fastcall_pp_i(int *a, int *b) { return (int)(a-b); } +LUA_API s_ii LJ_FASTCALL fastcall_siisii(s_ii a, s_ii b) +{ + s_ii c; + c.x = a.x + b.x; + c.y = a.y + b.y; + return c; +} +LUA_API s_dd LJ_FASTCALL fastcall_sddsdd(s_dd a, s_dd b) +{ + s_dd c; + c.x = a.x + b.x; + c.y = a.y + b.y; + return c; +} +#endif + +#if defined(LJ_STDCALL) && defined(_WIN32) +LUA_API int LJ_STDCALL stdcall_i(int a) { return a+1; } +LUA_API int LJ_STDCALL stdcall_ii(int a, int b) { return a+b; } +LUA_API double LJ_STDCALL stdcall_dd(double a, double b) { return a+b; } +LUA_API float LJ_STDCALL stdcall_ff(float a, float b) { return a+b; } +#endif + +/* ------------------------------------------------------------------------ */ + +static int ct_call(lua_State *L) +{ + int nresults = luaL_checkint(L, 1); + luaL_checkstack(L, nresults, "too many results"); + lua_call(L, lua_gettop(L)-2, nresults); + return lua_gettop(L)-1; +} + +static int ct_callon(lua_State *L) +{ + lua_State *co = lua_tothread(L, 1); + int nargs = lua_gettop(L)-1; + int nresults; + lua_xmove(L, co, nargs); + lua_call(co, nargs-1, LUA_MULTRET); + nresults = lua_gettop(co); + lua_xmove(co, L, nresults); + return nresults; +} + +static int ct_pcall_err(lua_State *L) +{ + int nresults = luaL_checkint(L, 1); + luaL_checkstack(L, nresults, "too many results"); + if (lua_pcall(L, lua_gettop(L)-2, nresults, 0)) + lua_error(L); + return lua_gettop(L)-1; +} + +static int ct_pcall(lua_State *L) +{ + int status; + luaL_checkany(L, 1); + status = lua_pcall(L, lua_gettop(L) - 1, LUA_MULTRET, 0); + lua_pushboolean(L, (status == 0)); + lua_insert(L, 1); + return lua_gettop(L); /* return status + all results */ +} + +static int ct_xpcall(lua_State *L) +{ + int status; + luaL_checkany(L, 2); + lua_settop(L, 2); + lua_insert(L, 1); /* put error function under function to be called */ + status = lua_pcall(L, 0, LUA_MULTRET, 1); + lua_pushboolean(L, (status == 0)); + lua_replace(L, 1); + return lua_gettop(L); /* return status + all results */ +} + +#define CO_RUN 0 /* running */ +#define CO_SUS 1 /* suspended */ +#define CO_NOR 2 /* 'normal' (it resumed another coroutine) */ +#define CO_DEAD 3 + +static const char *const statnames[] = + {"running", "suspended", "normal", "dead"}; + +static int costatus(lua_State *L, lua_State *co) { + if (L == co) return CO_RUN; + switch (lua_status(co)) { + case LUA_YIELD: + return CO_SUS; + case 0: { + lua_Debug ar; + if (lua_getstack(co, 0, &ar) > 0) /* does it have frames? */ + return CO_NOR; /* it is running */ + else if (lua_gettop(co) == 0) + return CO_DEAD; + else + return CO_SUS; /* initial state */ + } + default: /* some error occured */ + return CO_DEAD; + } +} + +static int auxresume(lua_State *L, lua_State *co, int narg) { + int status = costatus(L, co); + if (!lua_checkstack(co, narg)) + luaL_error(L, "too many arguments to resume"); + if (status != CO_SUS) { + lua_pushfstring(L, "cannot resume %s coroutine", statnames[status]); + return -1; /* error flag */ + } + lua_xmove(L, co, narg); + status = lua_resume(co, narg); + if (status == 0 || status == LUA_YIELD) { + int nres = lua_gettop(co); + if (!lua_checkstack(L, nres + 1)) + luaL_error(L, "too many results to resume"); + lua_xmove(co, L, nres); /* move yielded values */ + return nres; + } + else { + lua_xmove(co, L, 1); /* move error message */ + return -1; /* error flag */ + } +} + +static int ct_resume(lua_State *L) { + lua_State *co = lua_tothread(L, 1); + int r; + luaL_argcheck(L, co, 1, "coroutine expected"); + r = auxresume(L, co, lua_gettop(L) - 1); + if (r < 0) { + lua_pushboolean(L, 0); + lua_insert(L, -2); + return 2; /* return false + error message */ + } + else { + lua_pushboolean(L, 1); + lua_insert(L, -(r + 1)); + return r + 1; /* return true + `resume' returns */ + } +} + +static int ct_auxwrap(lua_State *L) { + lua_State *co = lua_tothread(L, lua_upvalueindex(1)); + int r = auxresume(L, co, lua_gettop(L)); + if (r < 0) { + if (lua_isstring(L, -1)) { /* error object is a string? */ + luaL_where(L, 1); /* add extra info */ + lua_insert(L, -2); + lua_concat(L, 2); + } + lua_error(L); /* propagate error */ + } + return r; +} + +static int ct_cocreate(lua_State *L) { + lua_State *NL = lua_newthread(L); + luaL_argcheck(L, lua_isfunction(L, 1) && !lua_iscfunction(L, 1), 1, + "Lua function expected"); + lua_pushvalue(L, 1); /* move function to top */ + lua_xmove(L, NL, 1); /* move function from L to NL */ + return 1; +} + + +static int ct_wrap(lua_State *L) { + ct_cocreate(L); + lua_pushcclosure(L, ct_auxwrap, 1); + return 1; +} + +static int ct_yield(lua_State *L) { + return lua_yield(L, lua_gettop(L)); +} + +static int ct_lightud(lua_State *L) +{ + lua_pushlightuserdata(L, (void *)(ptrdiff_t)lua_tonumber(L, 1)); + return 1; +} + +static luaL_Reg ct_funcs[] = { + {"call", ct_call }, + {"callon", ct_callon }, + {"pcall", ct_pcall }, + {"xpcall", ct_xpcall }, + {"pcall_err", ct_pcall_err }, + {"resume", ct_resume }, + {"wrap", ct_wrap }, + {"yield", ct_yield }, + {"lightud", ct_lightud }, + {NULL, NULL} +}; + +LUA_API int luaopen_ctest(lua_State *L) +{ + luaL_register(L, "ctest", ct_funcs); + return 1; +} diff --git a/testsuite/test/sysdep/catch_cpp.lua b/testsuite/test/sysdep/catch_cpp.lua new file mode 100644 index 0000000000..b2251009a5 --- /dev/null +++ b/testsuite/test/sysdep/catch_cpp.lua @@ -0,0 +1,71 @@ + +local cp = require("cpptest") + +do + local a, b = pcall(cp.catch, function() return "x" end) + assert(a == true and b == "x") +end + +do + local a, b = pcall(function() cp.throw("foo") end) + assert(a == false and b == "C++ exception") +end + +local unwind +do + local a, b = pcall(cp.catch, function() cp.throw("foo") end) + unwind = a + assert((a == false and b == "C++ exception") or (a == true and b == "foo")) +end + +do + local st = cp.alloc(function() return cp.isalloc() end) + assert(st == true) + assert(cp.isalloc() == false) +end + +do + local a, b = pcall(cp.alloc, function() + assert(cp.isalloc() == true) + return "foo", cp.throw + end) + assert(a == false and b == "C++ exception") + assert(cp.isalloc() == false) +end + +if unwind then + local a, b = pcall(cp.alloc, function() + assert(cp.isalloc() == true) + return "foo", error + end) + assert(a == false and b == "foo") + assert(cp.isalloc() == false) +end + +do + local a,b,c,d,e,f = cp.usereg(100, 50, function() end, false) + assert(a==164 and b==312 and c==428 and d==3696 and e==404 and f==404) +end + +do + local function test() + cp.usereg(100, 40, error, "foo") + end + local a,b,c,d,e,f = cp.usereg(100, 51, test, false) + assert(a==164 and b==312 and c==428 and d==3696 and e==404 and f==404) +end + +do + local t = {}; + t.t = t; + local function foo() + for i=1,100 do + local a,b,c,d,e,f = t, t.t, t.t.t, t.t.t.t, t.t.t.t.t, t.t.t.t.t.t + local g,h,j,k,l = f.t, f.t.t, f.t.t.t, f.t.t.t.t, f.t.t.t.t.t + local m = { a,b,c,d,e,f,g,h,j,k,l } + end + end + local a,b,c,d,e,f = cp.usereg(100, 50, foo, false) + assert(a==164 and b==312 and c==428 and d==3696 and e==404 and f==404) +end + diff --git a/testsuite/test/sysdep/ffi_include_gtk.lua b/testsuite/test/sysdep/ffi_include_gtk.lua new file mode 100644 index 0000000000..a4bfceacaf --- /dev/null +++ b/testsuite/test/sysdep/ffi_include_gtk.lua @@ -0,0 +1,9 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") + +if cdefs == "" then + cdefs = "-pthread -D_REENTRANT -I/usr/include/gtk-2.0 -I/usr/lib/gtk-2.0/include -I/usr/include/atk-1.0 -I/usr/include/cairo -I/usr/include/pango-1.0 -I/usr/include/gio-unix-2.0/ -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include -I/usr/include/pixman-1 -I/usr/include/freetype2 -I/usr/include/directfb -I/usr/include/libpng12 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -I/usr/lib/x86_64-linux-gnu/gtk-2.0/include -I/usr/include/gdk-pixbuf-2.0" +end + +include"/usr/include/gtk-2.0/gtk/gtk.h" diff --git a/testsuite/test/sysdep/ffi_include_std.lua b/testsuite/test/sysdep/ffi_include_std.lua new file mode 100644 index 0000000000..b88c82bdae --- /dev/null +++ b/testsuite/test/sysdep/ffi_include_std.lua @@ -0,0 +1,36 @@ +local ffi = require("ffi") + +dofile("../common/ffi_util.inc") + +do + local fp = assert(io.open("/tmp/__tmp.c", "w")) + fp:write[[ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +]] + fp:close() + + local flags = ffi.abi("32bit") and "-m32" or "-m64" + fp = assert(io.popen("cc -E -P -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_GNU_SOURCE /tmp/__tmp.c "..flags)) + local s = fp:read("*a") + fp:close() + os.remove("/tmp/__tmp.c") + ffi.cdef(s) +end + diff --git a/testsuite/test/sysdep/ffi_lib_c.lua b/testsuite/test/sysdep/ffi_lib_c.lua new file mode 100644 index 0000000000..a368d750f7 --- /dev/null +++ b/testsuite/test/sysdep/ffi_lib_c.lua @@ -0,0 +1,87 @@ +local ffi = require("ffi") + +ffi.cdef[[ +// libc/libm +int sprintf(char *buf, const char *fmt, ...); +double pow(double x, double y); +int rmdir(const char *name); +int errno; + +// Windows +unsigned int GetSystemDirectoryA(char *buf, unsigned int sz); +char *CharUpperA(char *str); +int GdiFlush(void); +int _rmdir(const char *name); +static const int _O_TEXT = 0x4000; +static const int _O_BINARY = 0x8000; +int *_errno(void); +int _fmode; + +// Lua/C API +typedef struct lua_State lua_State; +typedef double lua_Number; +lua_State *luaL_newstate(void); +void luaL_openlibs(lua_State *L); +void lua_close(lua_State *L); +int luaL_loadstring(lua_State *L, const char *s); +int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc); +lua_Number lua_tonumber(lua_State *L, int idx); +]] + +local C = ffi.C + +do + local buf = ffi.new("char[?]", 100) + local n = C.sprintf(buf, "test %g %s", 12.5, "foo") + assert(ffi.string(buf, n) == "test 12.5 foo") +end + +assert(ffi.C.pow(2.5, 5) == 97.65625) + +if ffi.abi("win") then + do + local buf = ffi.new("char[?]", 4, "abc") + C.CharUpperA(buf) + assert(ffi.string(buf) == "ABC") + end + + do + local buf = ffi.new("char[?]", 256) + local len = C.GetSystemDirectoryA(buf, 255) + local s = ffi.string(buf, len) + assert(string.find(string.lower(s), "\\system32")) + end + + assert(C.GdiFlush() == 1) + + assert(ffi.C._rmdir("/tmp/does_not_exist") == -1) + assert(ffi.C._errno()[0] == 2) + + ffi.C._fmode = ffi.C._O_BINARY + assert(ffi.C._fmode == ffi.C._O_BINARY) + ffi.C._fmode = ffi.C._O_TEXT +else + assert(ffi.C.rmdir("/tmp/does_not_exist") == -1) + assert(ffi.C.errno == 2) + + ffi.C.errno = 17 + assert(ffi.C.errno == 17) + ffi.C.errno = 0 +end + +do + local L = C.luaL_newstate() + local s = "local x = 0; for i=1,100 do x=x+i end; return x" + C.luaL_openlibs(L) + assert(C.luaL_loadstring(L, s) == 0) + assert(C.lua_pcall(L, 0, 1, 0) == 0) + assert(C.lua_tonumber(L, -1) == 5050) + C.lua_close(L) +end + +do + if not (ffi.os == "Windows" or ffi.os == "Other") then + ffi.load("pthread") + end +end + diff --git a/testsuite/test/sysdep/ffi_lib_z.lua b/testsuite/test/sysdep/ffi_lib_z.lua new file mode 100644 index 0000000000..69a19aedcd --- /dev/null +++ b/testsuite/test/sysdep/ffi_lib_z.lua @@ -0,0 +1,107 @@ +local ffi = require("ffi") + +local compress, uncompress + +if ffi.abi("win") then + + ffi.cdef[[ + int RtlGetCompressionWorkSpaceSize(uint16_t fmt, + unsigned long *wsbufsz, unsigned long *wsfragsz); + int RtlCompressBuffer(uint16_t fmt, + const uint8_t *src, unsigned long srclen, + uint8_t *dst, unsigned long dstsz, + unsigned long chunk, unsigned long *dstlen, void *workspace); + int RtlDecompressBuffer(uint16_t fmt, + uint8_t *dst, unsigned long dstsz, + const uint8_t *src, unsigned long srclen, + unsigned long *dstlen); + ]] + + local ntdll = ffi.load("ntdll") + + local fmt = 0x0102 + local workspace + do + local res = ffi.new("unsigned long[2]") + ntdll.RtlGetCompressionWorkSpaceSize(fmt, res, res+1) + workspace = ffi.new("uint8_t[?]", res[0]) + end + + function compress(txt) + local buf = ffi.new("uint8_t[?]", 4096) + local buflen = ffi.new("unsigned long[1]") + local res = ntdll.RtlCompressBuffer(fmt, txt, #txt, buf, 4096, + 4096, buflen, workspace) + assert(res == 0) + return ffi.string(buf, buflen[0]) + end + + function uncompress(comp, n) + local buf = ffi.new("uint8_t[?]", n) + local buflen = ffi.new("unsigned long[1]") + local res = ntdll.RtlDecompressBuffer(fmt, buf, n, comp, #comp, buflen) + assert(res == 0) + return ffi.string(buf, buflen[0]) + end + +else + + ffi.cdef[[ + unsigned long compressBound(unsigned long sourceLen); + int compress2(uint8_t *dest, unsigned long *destLen, + const uint8_t *source, unsigned long sourceLen, int level); + int uncompress(uint8_t *dest, unsigned long *destLen, + const uint8_t *source, unsigned long sourceLen); + ]] + + local zlib = ffi.load("z") + + function compress(txt) + local n = tonumber(zlib.compressBound(#txt)) + local buf = ffi.new("uint8_t[?]", n) + local buflen = ffi.new("unsigned long[1]", n) + local res = zlib.compress2(buf, buflen, txt, #txt, 9) + assert(res == 0) + return ffi.string(buf, tonumber(buflen[0])) + end + + function uncompress(comp, n) + local buf = ffi.new("uint8_t[?]", n) + local buflen = ffi.new("unsigned long[1]", n) + local res = zlib.uncompress(buf, buflen, comp, #comp) + assert(res == 0) + return ffi.string(buf, tonumber(buflen[0])) + end + +end + + local txt = [[Rebellious subjects, enemies to peace, +Profaners of this neighbour-stained steel,-- +Will they not hear? What, ho! you men, you beasts, +That quench the fire of your pernicious rage +With purple fountains issuing from your veins, +On pain of torture, from those bloody hands +Throw your mistemper'd weapons to the ground, +And hear the sentence of your moved prince. +Three civil brawls, bred of an airy word, +By thee, old Capulet, and Montague, +Have thrice disturb'd the quiet of our streets, +And made Verona's ancient citizens +Cast by their grave beseeming ornaments, +To wield old partisans, in hands as old, +Canker'd with peace, to part your canker'd hate: +If ever you disturb our streets again, +Your lives shall pay the forfeit of the peace. +For this time, all the rest depart away: +You Capulet; shall go along with me: +And, Montague, come you this afternoon, +To know our further pleasure in this case, +To old Free-town, our common judgment-place. +Once more, on pain of death, all men depart.]] +txt = txt..txt..txt..txt + +local c = compress(txt) +assert(2*#c < #txt) +local txt2 = uncompress(c, #txt) +assert(txt2 == txt) + diff --git a/testsuite/test/test.lua b/testsuite/test/test.lua new file mode 100644 index 0000000000..b064eff7b4 --- /dev/null +++ b/testsuite/test/test.lua @@ -0,0 +1,416 @@ +local assert, io_open, io_lines, io_write, load, type, xpcall = + assert, io.open, io.lines, io.write, load, type, xpcall +local debug_traceback, math_random, tonumber, loadstring = + debug.traceback, math.random, tonumber, loadstring or load + +local dirsep = package.config:match"^(.-)\n" +local own_file = debug.getinfo(1, "S").source:match"^@(.*)" or arg[0] +local own_dir = own_file:match("^.*[/".. dirsep .."]") + +local function default_tags() + local tags = {} + + -- Lua version and features + tags.lua = tonumber(_VERSION:match"%d+%.%d+") + if table.pack then + tags["compat5.2"] = true + end + if loadstring"return 0xep+9" then + tags.hexfloat = true + end + if loadstring"goto x ::x::" then + tags["goto"] = true + end + + -- Libraries + for _, lib in ipairs{"bit", "ffi", "jit.profile", "table.new"} do + if pcall(require, lib) then + tags[lib] = true + end + end + + -- LuaJIT-specific + if jit then + tags.luajit = tonumber(jit.version:match"%d+%.%d+") + tags[jit.arch:lower()] = true + if jit.os ~= "Other" then + tags[jit.os:lower()] = true + end + if jit.status() then + tags.jit = true + end + for _, flag in ipairs{select(2, jit.status())} do + tags[flag:lower()] = true + end + end + + -- Environment + if dirsep == "\\" then + tags.windows = true + end + if tags.ffi then + local abi = require"ffi".abi + for _, param in ipairs{"le", "be", "fpu", "softfp", "hardfp", "eabi"} do + if abi(param) then + tags[param] = true + end + end + if abi"win" then tags.winabi = true end + if abi"32bit" then tags.abi32 = true end + if abi"64bit" then tags.abi64 = true end + else + local bytecode = string.dump(function()end) + if bytecode:find"^\27Lua[\80-\89]" then + tags[bytecode:byte(7, 7) == 0 and "be" or "le"] = true + tags["abi".. (bytecode:byte(9, 9) * 8)] = true + end + end + + return tags +end + +local function want_meta(opts, meta) + if not opts.want_meta_cache then + opts.want_meta_cache = setmetatable({}, {__index = function(t, meta) + local result = true + for polarity, tag, cond in meta:gmatch"([+-])([^ <>=]+)([<>=0-9.]*)" do + local tagval = opts.tags[tag] + local condresult + if cond == "" or not tagval then + condresult = tagval + else + condresult = assert(loadstring("return (...) ".. cond))(tagval) + end + if polarity == "-" then + condresult = not condresult + end + if not condresult then + result = false + break + end + end + t[meta] = result + return result + end}) + end + return opts.want_meta_cache[meta] +end + +local function parse_args(t) + local opts = { + tags = default_tags(), + want_meta = want_meta, + } + local result = opts + + local i, tlen = 1, #t + local joinedval = "" + local function flagval() + local val + if joinedval ~= "" then + val = joinedval:sub(2) + joinedval = "" + else + val = t[i] + if not val then error("Expected value after ".. t[i-1]) end + i = i + 1 + end + return val + end + + while i <= tlen do + local arg = t[i] + i = i + 1 + if arg:sub(1, 2) == "--" then + arg, joinedval = arg:match"^([^=]+)(=?.*)$" + if arg == "--quiet" then + opts.quiet = true + elseif arg == "--shuffle" then + local seed = tonumber(flagval()) + if not seed then error("Expected numeric seed after --shuffle") end + opts.shuffle = seed + elseif arg == "--shard" then + local i, s = flagval():match"^(%d+)/(%d+)$" + if not s then error("Expected integer/integer after --shard") end + opts.shard = {initial = tonumber(i), step = tonumber(s)} + elseif arg == "--version" then + io_write("LuaJIT test-suite runner v0.1\n") + result = nil + elseif arg == "--help" then + io_write("Usage: ", _G and _G.arg and _G.arg[-1] or "luajit", " ") + io_write(own_file, " [flags] [tags] [root] [numbers]\n") + io_write"\n" + io_write"Root specifies either a directory of tests, or the name of\n" + io_write"a particular .lua test file, defaulting to all tests if not given.\n" + io_write"Tags are specified in the form +tag_name or -tag_name, and\n" + io_write"are used to turn on or off groups of tests. For example,\n" + io_write"pass -ffi to skip tests relating to the ffi library, or\n" + io_write"pass +slow to enable running of slow tests.\n" + io_write"Numbers can be passed to only run particular tests.\n" + io_write"The available flags are:\n" + io_write" --quiet\n" + io_write" --shuffle=SEED\n" + io_write" --shard=INDEX/NUM_SHARDS\n" + io_write" --version\n" + io_write" --help\n" + result = nil + else + error("Unsupported flag: ".. arg) + end + if joinedval ~= "" then + error(arg .." does not expect an argument") + end + elseif arg:find"^[-+]" then + opts.tags[arg:sub(2)] = (arg:sub(1, 1) == "+") + elseif arg:find"^%d+$" then + if not opts.numbers_to_run then + opts.numbers_to_run = {} + end + opts.numbers_to_run[tonumber(arg)] = true + elseif not opts.root then + opts.root = arg + else + error("Unexpected argument ".. arg) + end + end + return result +end + +local function scan_tests(path, opts) + if path:sub(-4, -4) == "." then + local f = assert(io_open(path, "rb")) + local contents = f:read"*a" + f:close() + local prefix = "return {" + local code = contents:gsub("()(do +%-%-%- +)([^\r\n]+)", + function(pos, marker, info) + if pos ~= 1 then + pos = pos - 1 + if contents:sub(pos, pos) ~= "\n" then + return marker .. info + end + end + local result = ("%s%q,function()"):format(prefix, info) + prefix = "," + if info:find" !lex" and not opts:want_meta(info:sub((info:find" +[-+@!]"))) then + result = result .."end--[========[" + prefix = "]========]".. prefix + end + return result + end) + if prefix:sub(-1) ~= "," then + error("No tests found in ".. path) + end + prefix = prefix .."}" + return assert(load(function() + local result = code + code = code ~= prefix and prefix or nil + return result + end, "@".. path))() + else + if path ~= "" and path:sub(-1) ~= "/" and path:sub(-1) ~= dirsep then + path = path .. dirsep + end + local result = {} + local i = 1 + for line in io_lines(path .."index") do + if line ~= "" then + local metaidx = line:find" +[-+@]" + local name = line + local want_these = true + if metaidx then + name = line:sub(1, metaidx - 1) + want_these = opts:want_meta(line:sub(metaidx)) + end + if want_these then + result[i] = line + result[i+1] = scan_tests(path .. name, opts) + i = i + 2 + end + end + end + return result + end +end + +local function upvalue_iterator(f, i) + i = i + 1 + local name, val = debug.getupvalue(f, i) + return name and i, name, val +end + +local function upvalues_of(f) + return upvalue_iterator, f, 0 +end + +local function append_tree_to_plan(test_tree, opts, plan, prefix) + local prefi + for i = 1, #test_tree, 2 do + local info = test_tree[i] + local name = info + local want_these = true + local metaidx = info:find" +[-+@!]" + if metaidx then + name = info:sub(1, metaidx - 1) + want_these = opts:want_meta(info:sub(metaidx)) + end + local planlen = #plan + if want_these then + local test = test_tree[i+1] + if type(test) == "table" then + append_tree_to_plan(test, opts, plan, prefix .. name .. dirsep) + else + if not prefi then + prefi = prefix:sub(1, -2) + end + plan[#plan+1] = {prefi, name, test} + end + end + if metaidx and info:find"!" then + for modifier in info:gmatch"!([^ ]+)" do + if modifier == "private_G" then + local G = setmetatable({}, {__index = _G}) + G._G = G + local function Gfn() return G end + for i = planlen+1, #plan do + local test = plan[i][3] + if setfenv then + setfenv(test, G) + else + for i, name in upvalues_of(test) do + if name == "_ENV" then + debug.upvaluejoin(test, i, Gfn, 1) + break + end + end + end + end + elseif modifier == "lex" then + -- Handled during test scanning + else + error("Unsupported modifier \"".. modifier .."\" in ".. prefix) + end + end + end + end + return plan +end + +local function seal_globals() + local sealed_mt = {__newindex = function() + error("Tests should not mutate global state", 3) + end} + local function seal(t) + if getmetatable(t) then return end + setmetatable(t, sealed_mt) + for k, v in pairs(t) do + if type(v) == "table" then seal(v) end + end + end + seal(_G) + + if getmetatable(package.loaded) == sealed_mt then + setmetatable(package.loaded, nil) + end +end + +local function check_package_path() + local ok, res = pcall(require, "common.test_runner_canary") + if not ok then + if own_dir then + local _, psep, placeholder = package.config:match"^(.-)\n(.-)\n(.-)\n" + package.path = package.path .. psep .. own_dir .. placeholder ..".lua" + ok, res = pcall(require, "common.test_runner_canary") + end + if not ok then + error(res) + end + end + assert(res == "canary is alive") +end + +local function mutate_plan(plan, opts) + if opts.shuffle then + math.randomseed(opts.shuffle) + for i = #plan, 2, -1 do + local n = math_random(1, i) + plan[i], plan[n] = plan[n], plan[i] + end + end + if opts.shard then + local shard_plan = {} + for i = opts.shard.initial, #plan, opts.shard.step do + shard_plan[#shard_plan + 1] = plan[i] + end + plan = shard_plan + end + if opts.numbers_to_run then + for i = 1, #plan do + if not opts.numbers_to_run[i] then + plan[i][3] = false + end + end + for k in pairs(opts.numbers_to_run) do + if not plan[k] then + error("Test number ".. k .." is not part of the plan") + end + end + end + return plan +end + +local function execute_plan(plan, opts) + if #plan == 0 then + error("No tests selected") + end + local progress_format = ("[%%%dd/%d] "):format(#tostring(#plan), #plan) + local num_tests_run = 0 + local fail_numbers = {} + for i = 1, #plan do + local plan_i = plan[i] + local test = plan_i[3] + if test then + local file, name = plan_i[1], plan_i[2] + if not opts.quiet then + io_write(progress_format:format(i), file) + io_write(file == "" and "" or " --- ", name, "\n") + end + local ok, err = xpcall(test, debug_traceback) + if not ok then + if opts.quiet then + io_write(progress_format:format(i), file) + io_write(file == "" and "" or " --- ", name, "\n") + end + fail_numbers[#fail_numbers + 1] = i + io_write(err, "\n") + end + num_tests_run = num_tests_run + 1 + end + end + if #fail_numbers == 0 then + io_write(num_tests_run, " passed\n") + return true + else + io_write(num_tests_run - #fail_numbers, " passed, ") + io_write(#fail_numbers, " failed\n") + if not opts.quiet and num_tests_run ~= #fail_numbers then + io_write("to run just failing tests, pass command line arguments: ") + io_write(table.concat(fail_numbers, " "), "\n") + end + return false + end +end + +local opts = parse_args{...} +if not opts then + return +end +seal_globals() +check_package_path() +local test_tree = scan_tests(opts.root or own_dir or "", opts) +local plan = append_tree_to_plan(test_tree, opts, {}, "") +plan = mutate_plan(plan, opts) +local all_good = execute_plan(plan, opts) +if not all_good then + os.exit(1) +end diff --git a/testsuite/test/trace/exit_frame.lua b/testsuite/test/trace/exit_frame.lua new file mode 100644 index 0000000000..9537c56342 --- /dev/null +++ b/testsuite/test/trace/exit_frame.lua @@ -0,0 +1,79 @@ +do --- global assignments !private_G + g = 0 + gf = 1 + gz = 2 + + local function f(i) + if i == 90 then + gf = gf + 1 + return true + end + g = g + 1 + end + + local function z(i) + if f(i) then + gz = gz + 1 + end + end + + for j=1,5 do + for i=1,100 do z(i) end + end + + assert(g == 495) + assert(gf == 6) + assert(gz == 7) +end + +do --- mutual recursion + local f, g + function f(j) + if j >= 0 then return g(j-1) end + end + function g(j) + for i=1,200 do + if i > 100 then return f(j) end + end + end + for k=1,20 do g(20) end +end + +do --- multi-path mutual recursion + local f, g + function f(j, k) + if j >= 0 then return g(j-1, k) end + if k >= 0 then return g(20, k-1) end + end + function g(j, k) + for i=1,200 do + if i > 100 then return f(j, k) end + end + end + g(20, 20) +end + +do --- late mutual recursion + local k = 0 + local f, g + + function g(a) + -- 'a' is an SLOAD #1 from f's frame and still at slot #1 + -- Avoid losing a in exit if the SLOAD is ignored + if k > 10 then k = 0 end + k= k + 1 + return f(a) + end + + function f(a,b,c,d,e) + if not e then e =1 end + a=a+1 + if a > 1000 then return end + for i=1,100 do + e=e+1 + if i > 90 then return g(a) end + end + end + + f(1,2,3,4,5) +end diff --git a/testsuite/test/trace/exit_growstack.lua b/testsuite/test/trace/exit_growstack.lua new file mode 100644 index 0000000000..658a31a509 --- /dev/null +++ b/testsuite/test/trace/exit_growstack.lua @@ -0,0 +1,28 @@ +do --- Exit needs to grow stack before slot fill. + local function f(i) + local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a; + local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a; + local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a; + if i==90 then return end + end + for j=1,5 do + collectgarbage() -- Shrink stack. + for i=1,100 do f(i) end + end +end + +do --- Exit needs to grow stack after slot fill. + local function g(i) + if i==90 then return end + do return end + do + local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a; + local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a; + local a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a; + end + end + for j=1,5 do + collectgarbage() -- Shrink stack. + for i=1,100 do g(i) end + end +end diff --git a/testsuite/test/trace/exit_jfuncf.lua b/testsuite/test/trace/exit_jfuncf.lua new file mode 100644 index 0000000000..67ad7c369d --- /dev/null +++ b/testsuite/test/trace/exit_jfuncf.lua @@ -0,0 +1,30 @@ +do --- everything + local assert = assert + + local function rec(a, b, c, d, e, f) + assert(f == a+1) + if b == 0 then return 7 end + do local x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63, x64, x65, x66, x67, x68, x69, x70, x71, x72, x73, x74, x75, x76, x77, x78, x79, x80, x81, x82, x83, x84, x85, x86, x87, x88, x89, x90, x91, x92, x93, x94, x95, x96, x97, x98, x99, x100 end + return rec(a, b-1, c, d, e, f)+1 + end + + -- Compile recursive function. + assert(rec(42, 200, 1, 2, 3, 43) == 207) + + local function trec() + return rec(42, 0, 1, 2, 3, 43) + end + + -- Compile function jumping to JFUNCF. + for i=1,200 do + gcinfo() + assert(trec() == 7) + end + + -- Shrink stack. + for j=1,10 do collectgarbage() end + + -- Cause an exit due to stack growth with PC pointing to JFUNCF. + -- Needs to load RD with nres+1 and not with the bytecode RD. + assert(trec() == 7) +end diff --git a/testsuite/test/trace/gc64_slot_revival.lua b/testsuite/test/trace/gc64_slot_revival.lua new file mode 100644 index 0000000000..40b9d87180 --- /dev/null +++ b/testsuite/test/trace/gc64_slot_revival.lua @@ -0,0 +1,18 @@ +do --- BC_KNIL + local function f(x, y) end + for i = 1,100 do + f(i, i) + f(nil, nil) + end +end + +do --- BC_VARG + local function f() end + local function g(...) + f() + f(...) + end + for i = 1,100 do + g() + end +end diff --git a/testsuite/test/trace/index b/testsuite/test/trace/index new file mode 100644 index 0000000000..076bf9be8d --- /dev/null +++ b/testsuite/test/trace/index @@ -0,0 +1,8 @@ +exit_frame.lua +exit_growstack.lua +exit_jfuncf.lua +gc64_slot_revival.lua +phi +snap.lua +stitch.lua +unsink.lua diff --git a/testsuite/test/trace/phi/copyspill.lua b/testsuite/test/trace/phi/copyspill.lua new file mode 100644 index 0000000000..17a8698f56 --- /dev/null +++ b/testsuite/test/trace/phi/copyspill.lua @@ -0,0 +1,53 @@ +do --- mat4mul !private_G +function mat4mul(a11, a21, a31, a41, + a12, a22, a32, a42, + a13, a23, a33, a43, + a14, a24, a34, a44, + b11, b21, b31, b41, + b12, b22, b32, b42, + b13, b23, b33, b43, + b14, b24, b34, b44) + return a11*b11+a21*b12+a31*b13+a41*b14, + a11*b21+a21*b22+a31*b23+a41*b24, + a11*b31+a21*b32+a31*b33+a41*b34, + a11*b41+a21*b42+a31*b43+a41*b44, + a12*b11+a22*b12+a32*b13+a42*b14, + a12*b21+a22*b22+a32*b23+a42*b24, + a12*b31+a22*b32+a32*b33+a42*b34, + a12*b41+a22*b42+a32*b43+a42*b44, + a13*b11+a23*b12+a33*b13+a43*b14, + a13*b21+a23*b22+a33*b23+a43*b24, + a13*b31+a23*b32+a33*b33+a43*b34, + a13*b41+a23*b42+a33*b43+a43*b44, + a14*b11+a24*b12+a34*b13+a44*b14, + a14*b21+a24*b22+a34*b23+a44*b24, + a14*b31+a24*b32+a34*b33+a44*b34, + a14*b41+a24*b42+a34*b43+a44*b44 +end + +local a11, a21, a31, a41 = 1, 0, 0, 0 +local a12, a22, a32, a42 = 0, 1, 0, 0 +local a13, a23, a33, a43 = 0, 0, 1, 0 +local a14, a24, a34, a44 = 0, 0, 0, 1 + +local b11, b21, b31, b41 = 0, 0, -1, 0 +local b12, b22, b32, b42 = 0, 1, 0, 0 +local b13, b23, b33, b43 = 1, 0, 0, 0 +local b14, b24, b34, b44 = 0, 0, 0, 1 + +for i = 1, 1000 do + a11, a21, a31, a41, + a12, a22, a32, a42, + a13, a23, a33, a43, + a14, a24, a34, a44 = mat4mul(a11, a21, a31, a41, + a12, a22, a32, a42, + a13, a23, a33, a43, + a14, a24, a34, a44, + b11, b21, b31, b41, + b12, b22, b32, b42, + b13, b23, b33, b43, + b14, b24, b34, b44) +end +assert(a11 == 1) +assert(a31 == 0) +end diff --git a/testsuite/test/trace/phi/index b/testsuite/test/trace/phi/index new file mode 100644 index 0000000000..74a07333c4 --- /dev/null +++ b/testsuite/test/trace/phi/index @@ -0,0 +1,3 @@ +copyspill.lua +ref.lua +rotate.lua diff --git a/testsuite/test/trace/phi/ref.lua b/testsuite/test/trace/phi/ref.lua new file mode 100644 index 0000000000..3662912d27 --- /dev/null +++ b/testsuite/test/trace/phi/ref.lua @@ -0,0 +1,131 @@ +do --- rref points into invariant part 1 + local x,y=1,2; for i=1,100 do x=x+y; y=i end + assert(y == 100) +end + +do --- rref points into invariant part 2 + local x,y=1,2; for i=1,100.5 do x=x+y; y=i end + assert(y == 100) +end + +do --- rref points into invariant part 3 + local x,y=1,2; for i=1,100 do x,y=y,x end + assert(x == 1) + assert(y == 2) +end + +do --- rref points into invariant part 4 + local x,y,z=1,2,3; for i=1,100 do x,y,z=y,z,x end + assert(x == 2) + assert(y == 3) + assert(z == 1) +end + +do --- rref points into invariant part 5 + local x,y,z=1,2,3; for i=1,100 do x,y,z=z,x,y end + assert(x == 3) + assert(y == 1) + assert(z == 2) +end + +do --- rref points into invariant part 6 + local a,x,y,z=0,1,2,3; for i=1,100 do a=a+x; x=y; y=z; z=i end + assert(a == 4759) + assert(x == 98) + assert(y == 99) + assert(z == 100) +end + +do --- variant slot, but no corresponding SLOAD i-1 + local x,y=1,2; for i=1,100 do x=i; y=i-1 end + assert(x == 100) + assert(y == 99) +end + +do --- variant slot, but no corresponding SLOAD i+1 + local x,y=1,2; for i=1,100 do x=i; y=i+1 end + assert(x == 100) + assert(y == 101) +end + +do --- variant slot, but no corresponding SLOAD side exit + local x=0; for i=1,100 do if i==90 then break end x=i end + assert(x == 89) +end + +do --- dup lref from variant slot (suppressed) + local x,y=1,2; for i=1,100 do x=i; y=i end + assert(x == 100) + assert(y == 100) +end + +do --- const rref + local x,y=1,2 local bxor,tobit=bit.bxor,bit.tobit; + for i=1,100 do x=bxor(i,y); y=tobit(i+1) end + assert(x == 0) + assert(y == 101) +end + +do --- dup rref (ok) + local x,y,z1,z2=1,2,3,4 local bxor,tobit=bit.bxor,bit.tobit; + for i=1,100 do x=bxor(i,y); z2=tobit(i+5); z1=bxor(x,i+5); y=tobit(i+1) end + assert(x == 0) + assert(y == 101) + assert(z1 == 105) + assert(z2 == 105) +end + +do --- variant slot, no corresponding SLOAD + for i=1,5 do + local a, b = 1, 2 + local bits = 0 + while a ~= b do + bits = bits + 1 + a = b + b = bit.lshift(b, 1) + end + assert(bits == 32) + end +end + +do --- don't eliminate PHI if referenced from snapshot + local t = { 0 } + local a = 0 + for i=1,100 do + local b = t[1] + t[1] = i + a + a = b + end + assert(a == 2500) + assert(t[1] == 2550) +end + +do --- don't eliminate PHI if referenced from snapshot + local x = 1 + local function f() + local t = {} + for i=1,200 do t[i] = i end + for i=1,200 do + local x1 = x + x = t[i] + if i > 100 then return x1 end + end + end + assert(f() == 100) +end + +do --- don't eliminate PHI if referenced from another non-redundant PHI + local t = {} + for i=1,256 do + local a, b, k = i, math.floor(i/2), -i + while a > 1 and t[b] > k do + t[a] = t[b] + a = b + b = math.floor(a/2) + end + t[a] = k + end + local x = 0 + for i=1,256 do x = x + bit.bxor(i, t[i]) end + assert(x == -41704) +end diff --git a/testsuite/test/trace/phi/rotate.lua b/testsuite/test/trace/phi/rotate.lua new file mode 100644 index 0000000000..cb751e0b9f --- /dev/null +++ b/testsuite/test/trace/phi/rotate.lua @@ -0,0 +1,149 @@ +do --- rot8 + local function rot8r(n) + local a,b,c,d,e,f,g,h=1,2,3,4,5,6,7,8 + for x=1,n do + a,b,c,d,e,f,g,h=h,a,b,c,d,e,f,g + end + return table.concat{a,b,c,d,e,f,g,h} + end + + local function rot8l(n) + local a,b,c,d,e,f,g,h=1,2,3,4,5,6,7,8 + for x=1,n do + a,b,c,d,e,f,g,h=b,c,d,e,f,g,h,a + end + return table.concat{a,b,c,d,e,f,g,h} + end + + assert(rot8r(0) == "12345678") + assert(rot8r(10) == "78123456") + assert(rot8r(105) == "81234567") + assert(rot8r(0) == "12345678") + assert(rot8r(1) == "81234567") + assert(rot8r(2) == "78123456") + assert(rot8r(0) == "12345678") + assert(rot8r(1) == "81234567") + assert(rot8r(2) == "78123456") + assert(rot8r(105) == "81234567") + + assert(rot8l(0) == "12345678") + assert(rot8l(10) == "34567812") + assert(rot8l(105) == "23456781") + assert(rot8l(0) == "12345678") + assert(rot8l(1) == "23456781") + assert(rot8l(2) == "34567812") + assert(rot8l(0) == "12345678") + assert(rot8l(1) == "23456781") + assert(rot8l(2) == "34567812") + + assert(rot8r(100) == "56781234") + assert(rot8l(100) == "56781234") +end + +do --- rot9 + local function rot9r(n) + local a,b,c,d,e,f,g,h,i=1,2,3,4,5,6,7,8,9 + for x=1,n do + a,b,c,d,e,f,g,h,i=i,a,b,c,d,e,f,g,h + end + return table.concat{a,b,c,d,e,f,g,h,i} + end + + local function rot9l(n) + local a,b,c,d,e,f,g,h,i=1,2,3,4,5,6,7,8,9 + for x=1,n do + a,b,c,d,e,f,g,h,i=b,c,d,e,f,g,h,i,a + end + return table.concat{a,b,c,d,e,f,g,h,i} + end + + assert(rot9r(0) == "123456789") + assert(rot9r(10) == "912345678") + assert(rot9r(105) == "456789123") + assert(rot9r(0) == "123456789") + assert(rot9r(1) == "912345678") + assert(rot9r(2) == "891234567") + assert(rot9r(0) == "123456789") + assert(rot9r(1) == "912345678") + assert(rot9r(2) == "891234567") + assert(rot9r(105) == "456789123") + + assert(rot9l(0) == "123456789") + assert(rot9l(10) == "234567891") + assert(rot9l(105) == "789123456") + assert(rot9l(0) == "123456789") + assert(rot9l(1) == "234567891") + assert(rot9l(2) == "345678912") + assert(rot9l(0) == "123456789") + assert(rot9l(1) == "234567891") + assert(rot9l(2) == "345678912") + + assert(rot9r(100) == "912345678") + assert(rot9l(100) == "234567891") +end + +do --- rot18 + local function rot18r(N) + local a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18 + for x=1,N do + a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r=r,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q + end + return table.concat{a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r} + end + + local function rot18l(N) + local a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18 + for x=1,N do + a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r=b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,a + end + return table.concat{a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r} + end + + assert(rot18r(0) == "123456789101112131415161718") + assert(rot18r(10) == "910111213141516171812345678") + assert(rot18r(105) == "456789101112131415161718123") + assert(rot18r(0) == "123456789101112131415161718") + assert(rot18r(1) == "181234567891011121314151617") + assert(rot18r(2) == "171812345678910111213141516") + assert(rot18r(0) == "123456789101112131415161718") + assert(rot18r(1) == "181234567891011121314151617") + assert(rot18r(2) == "171812345678910111213141516") + assert(rot18r(105) == "456789101112131415161718123") + + assert(rot18l(0) == "123456789101112131415161718") + assert(rot18l(10) == "111213141516171812345678910") + assert(rot18l(105) == "161718123456789101112131415") + assert(rot18l(0) == "123456789101112131415161718") + assert(rot18l(1) == "234567891011121314151617181") + assert(rot18l(2) == "345678910111213141516171812") + assert(rot18l(0) == "123456789101112131415161718") + assert(rot18l(1) == "234567891011121314151617181") + assert(rot18l(2) == "345678910111213141516171812") + + assert(rot18r(100) == "910111213141516171812345678") + assert(rot18l(100) == "111213141516171812345678910") +end + +do --- rotx + local function rot9r(n, m) + local a,b,c,d,e,f,g,h,i=1,2,3,4,5,6,7,8,9 + local s = "" + for x=1,n do + a,b,c,d,e,f,g,h,i=i,a,b,c,d,e,f,g,h + if x == m then s = table.concat{a,b,c,d,e,f,g,h,i} end + c,d = d,c + end + return table.concat{a,b,c,d,e,f,g,h,i, s} + end + + assert(rot9r(0,0) == "123456789") + assert(rot9r(10,0) == "893124567") + assert(rot9r(105,0) == "913245678") + assert(rot9r(105,90) == "913245678891324567") + assert(rot9r(0,0) == "123456789") + assert(rot9r(1,0) == "913245678") + assert(rot9r(2,0) == "893124567") + assert(rot9r(1,1) == "913245678912345678") + assert(rot9r(2,1) == "893124567912345678") + assert(rot9r(2,2) == "893124567891324567") +end diff --git a/testsuite/test/trace/snap.lua b/testsuite/test/trace/snap.lua new file mode 100644 index 0000000000..ba26326e0f --- /dev/null +++ b/testsuite/test/trace/snap.lua @@ -0,0 +1,47 @@ +do --- gcexit + local x = 0 + local t + for i=1,1000 do + if i >= 100 then + -- causes an exit for atomic phase + -- must not merge snapshot #0 with comparison since it has the wrong PC + if i < 150 then x=x+1 end + t = {i} + end + end + assert(x == 50) + assert(t[1] == 1000) +end + + +do --- top !private_G + function randomtable(entries, depth) + if depth == 0 then + return tostring(math.random(2)) -- snapshot between return and CALLMT + end + local t = {} + for k=1,entries do + t[k] = randomtable(entries, depth-1) + end + return t + end + + local t = randomtable(10, 2) +end + +do --- top2 + local function f() + gcinfo() + local _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_ + local _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_ + local _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_ + local _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_ + local _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_ + local _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_ + end + + for i=1,100 do + f() + if i % 3 == 0 then collectgarbage() end + end +end diff --git a/testsuite/test/trace/stitch.lua b/testsuite/test/trace/stitch.lua new file mode 100644 index 0000000000..3f7f97342f --- /dev/null +++ b/testsuite/test/trace/stitch.lua @@ -0,0 +1,19 @@ +do --- octal + local tonumber = tonumber + local function octal(s) return tonumber(s, 8) end + for i=1,100 do + octal("1") + octal("1") + octal("1") + end +end + +do --- coroutines + local t = { + [0] = function() end, + coroutine.wrap(function() while true do coroutine.yield() end end), + } + for i=1,100 do + t[i % 2]() + end +end diff --git a/testsuite/test/trace/unsink.lua b/testsuite/test/trace/unsink.lua new file mode 100644 index 0000000000..084347fcdd --- /dev/null +++ b/testsuite/test/trace/unsink.lua @@ -0,0 +1,39 @@ +local ffi = require("ffi") + +-- Unsinking is what happens when a "sunk" allocation needs to be +-- performed at trace exit time. The JIT has optimized away the +-- allocation within the trace machine code but when we exit back to +-- the interpeter the fully allocated value can be required. +-- +-- (Strictly speaking unsinking is what happens when a sunk allocation +-- is referenced by the snapshot of a taken trace exit and the Lua +-- stack needs to be reconstructed for the interpreter to use.) + +local array = ffi.new("struct { int x; } [1]") + +do --- unsink constant pointer + + -- This test forces the VM to unsink a pointer that was constructed + -- from a constant. The IR will include a 'cnewi' instruction to + -- allocate an FFI pointer object, the pointer value will be an IR + -- constant, the allocation will be sunk, and the allocation will + -- at some point be "unsunk" due to a reference in the snapshot for + -- a taken exit. + + -- Note: JIT will recognize as a "singleton" and allow its + -- address to be inlined ("constified") instead of looking up the + -- upvalue at runtime. + + local function fn (i) + local struct = array[0] -- Load pointer that the JIT will constify. + if i == 1000 then end -- Force trace exit when i==1000. + struct.x = 0 -- Ensure that 'struct' is live after exit. + end + + -- Loop over the function to make it compile and take a trace exit + -- during the final iteration. + for i = 1, 1000 do + fn(i) + end +end + diff --git a/testsuite/test/unportable/ffi_arith_int64.lua b/testsuite/test/unportable/ffi_arith_int64.lua new file mode 100644 index 0000000000..c05e02a974 --- /dev/null +++ b/testsuite/test/unportable/ffi_arith_int64.lua @@ -0,0 +1,68 @@ +local ffi = require("ffi") + +local int = ffi.typeof("int") + +local inp = { + 0, 0.5, -0.5, 1.5, -1.5, 1, -1, 2, -2, 37, -37, false, + int(0), int(1), int(-1), int(2), int(-2), int(37), int(-37), false, + 0ll, 1ll, -1ll, 2ll, -2ll, 37ll, -37ll, false, + 0ull, 1ull, -1ull, 2ull, -2ull, 37ull, -37ull, +} + +local function cksum(s, r) + local z = 0 + for i=1,#s do z = (z + string.byte(s, i)*i) % 2147483629 end + if z ~= r then + error("test failed (got "..z..", expected "..r..") for:\n"..s, 3) + end +end + +local function tostr(n) + if type(n) == "cdata" then return tostring(n) + elseif n ~= n then return "nan" + else return string.format("%+1.5g", n) end +end + +local function check(f, expected, y) + local inp = inp + local out = {} + for i=1,#inp do + if inp[i] then out[i] = tostr(f(inp[i], y)) else out[i] = "\n" end + end + local got = string.gsub(table.concat(out, " ").."\n", "\n ", "\n") + cksum(got, expected) +end + +jit.off(check) + +local function check2(f, exparray) + local k = 1 + for j=1,#inp do + local y = inp[j] + if y then + check(f, exparray[k], y) + k = k + 1 + end + end +end + +check(function(x) return -x end, 1174528) + +check2(function(x, y) return x+y end, +{1171039,1239261,1239303,1011706,1490711,949996,1415869,756412,1682910,768883,2201023,1265370,1015700,1556902,807607,1862947,814710,2423097,1265370,1015700,1556902,807607,1862947,814710,2423097,4833809,2909723,7784653,1736671,10743770,1126700,13324037,}) + +check2(function(x, y) return x-y end, +{1171039,1239303,1239261,1490711,1011706,1415869,949996,1682910,756412,2201023,768883,1265370,1556902,1015700,1862947,807607,2423097,814710,1265370,1556902,1015700,1862947,807607,2423097,814710,4833809,7784653,2909723,10743770,1736671,13324037,1126700,}) + +check2(function(x, y) return x*y end, +{470257,637182,637132,1308150,1311627,1171039,1174528,1083877,1087553,1561321,1564869,564568,1265370,1269122,1265037,1268973,1643392,1647266,564568,1265370,1269122,1265037,1268973,1643392,1647266,827768,4833809,4847593,4823713,4838210,5230281,5244035,}) + +check2(function(x, y) return x/y end, +{7946210,7360895,7360865,1580465,927251,1171039,622069,1252901,704706,1542087,960011,14749620,1265370,695208,1188639,661058,1049280,587329,14749620,1265370,695208,1188639,661058,1049280,587329,15042810,4833809,828129,4559889,828509,4208862,828929,}) + + +check2(function(x, y) return x%y end, +{7653740,7304160,7304160,527871,851988,527061,850910,556674,717022,610671,613599,14749620,564568,894526,618652,785052,641760,644574,14749620,564568,894526,618652,785052,641760,644574,15042810,827768,2913108,829285,1737261,951059,959905,}) + +check2(function(x, y) return x^y end, +{471871,702627,720692,1385612,1803393,1171039,1772007,763817,1583994,4486762,2380423,566647,1265370,2319256,770581,1990479,4566660,2319835,566647,1265370,2319256,770581,1990479,4566660,2319835,830322,4833809,4644705,1071753,2822313,7709069,4647021,}) diff --git a/testsuite/test/unportable/math_special.lua b/testsuite/test/unportable/math_special.lua new file mode 100644 index 0000000000..49161014a7 --- /dev/null +++ b/testsuite/test/unportable/math_special.lua @@ -0,0 +1,55 @@ + +local inp = { 0, -"0", 0.5, -0.5, 1, -1, 1/0, -1/0, 0/0 } + +local function tostr(n) + if n == 0 and 1/n < 0 then return "-0" + elseif 1/n == 0 then return n < 0 and "-inf" or "+inf" + elseif n ~= n then return "nan" + else return string.format("%+1.5g", n) end +end + +local function check(f, expected) + local inp = inp + local out = {} + for i=1,#inp do out[i] = tostr(f(inp[i])) end + local got = table.concat(out, " ") + if got ~= expected then + error("got: \""..got.."\"\nexpected: \""..expected.."\"", 2) + end +end + +check(function(x) return x end, "+0 -0 +0.5 -0.5 +1 -1 +inf -inf nan") + +local powcheck = { + "+1 +1 +1 +1 +1 +1 +1 +1 +1", + "+1 +1 +1 +1 +1 +1 +1 +1 +1", + "+0 +0 +0.70711 nan +1 nan +inf +inf nan", + "+inf +inf +1.4142 nan +1 nan +0 +0 nan", + "+0 -0 +0.5 -0.5 +1 -1 +inf -inf nan", + "+inf -inf +2 -2 +1 -1 +0 -0 nan", + "+0 +0 +0 +0 +1 +1 +inf +inf nan", + "+inf +inf +inf +inf +1 +1 +0 +0 nan", + "nan nan nan nan +1 nan nan nan nan", +} +for j=1,#inp do + local y = inp[j] + check(function(x) return x^y end, powcheck[j]) +end + +check(math.abs, "+0 +0 +0.5 +0.5 +1 +1 +inf +inf nan") +check(math.floor, "+0 -0 +0 -1 +1 -1 +inf -inf nan") +check(math.ceil, "+0 -0 +1 -0 +1 -1 +inf -inf nan") +check(math.sqrt, "+0 -0 +0.70711 nan +1 nan +inf nan nan") +check(math.sin, "+0 -0 +0.47943 -0.47943 +0.84147 -0.84147 nan nan nan") +check(math.cos, "+1 +1 +0.87758 +0.87758 +0.5403 +0.5403 nan nan nan") +check(math.tan, "+0 -0 +0.5463 -0.5463 +1.5574 -1.5574 nan nan nan") +check(math.asin, "+0 -0 +0.5236 -0.5236 +1.5708 -1.5708 nan nan nan") +check(math.acos, "+1.5708 +1.5708 +1.0472 +2.0944 +0 +3.1416 nan nan nan") +check(math.atan, "+0 -0 +0.46365 -0.46365 +0.7854 -0.7854 +1.5708 -1.5708 nan") +check(math.log, "-inf -inf -0.69315 nan +0 nan +inf nan nan") +check(math.log10, "-inf -inf -0.30103 nan +0 nan +inf nan nan") +check(math.exp, "+1 +1 +1.6487 +0.60653 +2.7183 +0.36788 +inf +0 nan") + +-- Pointless: deg, rad, min, max, pow +-- LATER: %, fmod, frexp, ldexp, modf, sinh, cosh, tanh +