diff --git a/doc/building.md b/doc/building.md index 459dbaa4c4101..9e6687f6b8169 100644 --- a/doc/building.md +++ b/doc/building.md @@ -135,6 +135,14 @@ space is required. If you do not have access to sufficiently powerful hardware, it is also possible to use [cross-compiling](#cross-compiling). +#### Branch Protection + +In order to use Branch Protection features in the VM, `--enable-branch-protection` +must be used. This option requires C++ compiler support (GCC 9.1.0+ or Clang +10+). The resulting build can be run on both machines with and without support +for branch protection in hardware. Branch Protection is only supported for +Linux targets. + ### Building on 32-bit arm This is not recommended. Instead, see the section on [Cross-compiling]( diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4 index 76724235ec4d7..4872ce71c6c61 100644 --- a/make/autoconf/flags-cflags.m4 +++ b/make/autoconf/flags-cflags.m4 @@ -1,5 +1,5 @@ # -# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -803,17 +803,19 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_CPU_DEP], fi AC_SUBST(FILE_MACRO_CFLAGS) + FLAGS_SETUP_BRANCH_PROTECTION + # EXPORT to API CFLAGS_JVM_COMMON="$ALWAYS_CFLAGS_JVM $ALWAYS_DEFINES_JVM \ $TOOLCHAIN_CFLAGS_JVM ${$1_TOOLCHAIN_CFLAGS_JVM} \ $OS_CFLAGS $OS_CFLAGS_JVM $CFLAGS_OS_DEF_JVM $DEBUG_CFLAGS_JVM \ $WARNING_CFLAGS $WARNING_CFLAGS_JVM $JVM_PICFLAG $FILE_MACRO_CFLAGS \ - $REPRODUCIBLE_CFLAGS" + $REPRODUCIBLE_CFLAGS $BRANCH_PROTECTION_CFLAGS" CFLAGS_JDK_COMMON="$ALWAYS_CFLAGS_JDK $ALWAYS_DEFINES_JDK $TOOLCHAIN_CFLAGS_JDK \ $OS_CFLAGS $CFLAGS_OS_DEF_JDK $DEBUG_CFLAGS_JDK $DEBUG_OPTIONS_FLAGS_JDK \ $WARNING_CFLAGS $WARNING_CFLAGS_JDK $DEBUG_SYMBOLS_CFLAGS_JDK \ - $FILE_MACRO_CFLAGS $REPRODUCIBLE_CFLAGS" + $FILE_MACRO_CFLAGS $REPRODUCIBLE_CFLAGS $BRANCH_PROTECTION_CFLAGS" # Use ${$2EXTRA_CFLAGS} to block EXTRA_CFLAGS to be added to build flags. # (Currently we don't have any OPENJDK_BUILD_EXTRA_CFLAGS, but that might @@ -879,3 +881,24 @@ AC_DEFUN([FLAGS_SETUP_GCC6_COMPILER_FLAGS], PREFIX: $2, IF_FALSE: [NO_LIFETIME_DSE_CFLAG=""]) $1_GCC6_CFLAGS="${NO_DELETE_NULL_POINTER_CHECKS_CFLAG} ${NO_LIFETIME_DSE_CFLAG}" ]) + +AC_DEFUN_ONCE([FLAGS_SETUP_BRANCH_PROTECTION], +[ + # Is branch protection available? + BRANCH_PROTECTION_AVAILABLE=false + BRANCH_PROTECTION_FLAG="-mbranch-protection=standard" + + if test "x$OPENJDK_TARGET_CPU" = xaarch64; then + if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then + FLAGS_COMPILER_CHECK_ARGUMENTS(ARGUMENT: [${BRANCH_PROTECTION_FLAG}], + IF_TRUE: [BRANCH_PROTECTION_AVAILABLE=true]) + fi + fi + + BRANCH_PROTECTION_CFLAGS="" + UTIL_ARG_ENABLE(NAME: branch-protection, DEFAULT: false, + RESULT: USE_BRANCH_PROTECTION, AVAILABLE: $BRANCH_PROTECTION_AVAILABLE, + DESC: [enable branch protection when compiling C/C++], + IF_ENABLED: [ BRANCH_PROTECTION_CFLAGS=${BRANCH_PROTECTION_FLAG}]) + AC_SUBST(BRANCH_PROTECTION_CFLAGS) +]) diff --git a/make/autoconf/spec.gmk.in b/make/autoconf/spec.gmk.in index d286d5cc2cc5e..ab1c475f3898e 100644 --- a/make/autoconf/spec.gmk.in +++ b/make/autoconf/spec.gmk.in @@ -407,6 +407,7 @@ LIBFFI_CFLAGS:=@LIBFFI_CFLAGS@ ENABLE_LIBFFI_BUNDLING:=@ENABLE_LIBFFI_BUNDLING@ LIBFFI_LIB_FILE:=@LIBFFI_LIB_FILE@ FILE_MACRO_CFLAGS := @FILE_MACRO_CFLAGS@ +BRANCH_PROTECTION_CFLAGS := @BRANCH_PROTECTION_CFLAGS@ STATIC_LIBS_CFLAGS := @STATIC_LIBS_CFLAGS@ diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 41893bc4e0a1a..f21835f9de3ef 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -1853,6 +1853,10 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { if (C->output()->need_stack_bang(framesize)) st->print("# stack bang size=%d\n\t", framesize); + if (VM_Version::use_rop_protection()) { + st->print("ldr zr, [lr]\n\t"); + st->print("pacia lr, rfp\n\t"); + } if (framesize < ((1 << 9) + 2 * wordSize)) { st->print("sub sp, sp, #%d\n\t", framesize); st->print("stp rfp, lr, [sp, #%d]", framesize - 2 * wordSize); @@ -1961,6 +1965,10 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { st->print("add sp, sp, rscratch1\n\t"); st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize)); } + if (VM_Version::use_rop_protection()) { + st->print("autia lr, rfp\n\t"); + st->print("ldr zr, [lr]\n\t"); + } if (do_polling() && C->is_method_compilation()) { st->print("# test polling word\n\t"); diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index 9482c3a65c2ec..10fcdaa243c00 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -987,33 +987,35 @@ class Assembler : public AbstractAssembler { rf(rt, 0); } - void hint(int imm) { - system(0b00, 0b011, 0b0010, 0b0000, imm); - } - - void nop() { - hint(0); - } - - void yield() { - hint(1); - } + // Hint instructions - void wfe() { - hint(2); +#define INSN(NAME, crm, op2) \ + void NAME() { \ + system(0b00, 0b011, 0b0010, crm, op2); \ } - void wfi() { - hint(3); - } + INSN(nop, 0b000, 0b0000); + INSN(yield, 0b000, 0b0001); + INSN(wfe, 0b000, 0b0010); + INSN(wfi, 0b000, 0b0011); + INSN(sev, 0b000, 0b0100); + INSN(sevl, 0b000, 0b0101); - void sev() { - hint(4); - } + INSN(autia1716, 0b0001, 0b100); + INSN(autiasp, 0b0011, 0b101); + INSN(autiaz, 0b0011, 0b100); + INSN(autib1716, 0b0001, 0b110); + INSN(autibsp, 0b0011, 0b111); + INSN(autibz, 0b0011, 0b110); + INSN(pacia1716, 0b0001, 0b000); + INSN(paciasp, 0b0011, 0b001); + INSN(paciaz, 0b0011, 0b000); + INSN(pacib1716, 0b0001, 0b010); + INSN(pacibsp, 0b0011, 0b011); + INSN(pacibz, 0b0011, 0b010); + INSN(xpaclri, 0b0000, 0b111); - void sevl() { - hint(5); - } +#undef INSN // we only provide mrs and msr for the special purpose system // registers where op1 (instr[20:19]) == 11 and, (currently) only @@ -1099,18 +1101,21 @@ class Assembler : public AbstractAssembler { } // Unconditional branch (register) - void branch_reg(Register R, int opc) { + + void branch_reg(int OP, int A, int M, Register RN, Register RM) { starti; f(0b1101011, 31, 25); - f(opc, 24, 21); - f(0b11111000000, 20, 10); - rf(R, 5); - f(0b00000, 4, 0); + f(OP, 24, 21); + f(0b111110000, 20, 12); + f(A, 11, 11); + f(M, 10, 10); + rf(RN, 5); + rf(RM, 0); } -#define INSN(NAME, opc) \ - void NAME(Register R) { \ - branch_reg(R, opc); \ +#define INSN(NAME, opc) \ + void NAME(Register RN) { \ + branch_reg(opc, 0, 0, RN, r0); \ } INSN(br, 0b0000); @@ -1121,14 +1126,48 @@ class Assembler : public AbstractAssembler { #undef INSN -#define INSN(NAME, opc) \ - void NAME() { \ - branch_reg(dummy_reg, opc); \ +#define INSN(NAME, opc) \ + void NAME() { \ + branch_reg(opc, 0, 0, dummy_reg, r0); \ } INSN(eret, 0b0100); INSN(drps, 0b0101); +#undef INSN + +#define INSN(NAME, M) \ + void NAME() { \ + branch_reg(0b0010, 1, M, dummy_reg, dummy_reg); \ + } + + INSN(retaa, 0); + INSN(retab, 1); + +#undef INSN + +#define INSN(NAME, OP, M) \ + void NAME(Register rn) { \ + branch_reg(OP, 1, M, rn, dummy_reg); \ + } + + INSN(braaz, 0b0000, 0); + INSN(brabz, 0b0000, 1); + INSN(blraaz, 0b0001, 0); + INSN(blrabz, 0b0001, 1); + +#undef INSN + +#define INSN(NAME, OP, M) \ + void NAME(Register rn, Register rm) { \ + branch_reg(OP, 1, M, rn, rm); \ + } + + INSN(braa, 0b1000, 0); + INSN(brab, 0b1000, 1); + INSN(blraa, 0b1001, 0); + INSN(blrab, 0b1001, 1); + #undef INSN // Load/store exclusive @@ -1792,6 +1831,37 @@ void mvnw(Register Rd, Register Rm, INSN(clz, 0b110, 0b00000, 0b00100); INSN(cls, 0b110, 0b00000, 0b00101); + // PAC instructions + INSN(pacia, 0b110, 0b00001, 0b00000); + INSN(pacib, 0b110, 0b00001, 0b00001); + INSN(pacda, 0b110, 0b00001, 0b00010); + INSN(pacdb, 0b110, 0b00001, 0b00011); + INSN(autia, 0b110, 0b00001, 0b00100); + INSN(autib, 0b110, 0b00001, 0b00101); + INSN(autda, 0b110, 0b00001, 0b00110); + INSN(autdb, 0b110, 0b00001, 0b00111); + +#undef INSN + +#define INSN(NAME, op29, opcode2, opcode) \ + void NAME(Register Rd) { \ + starti; \ + f(opcode2, 20, 16); \ + data_processing(current_insn, op29, opcode, Rd, dummy_reg); \ + } + + // PAC instructions (with zero modifier) + INSN(paciza, 0b110, 0b00001, 0b01000); + INSN(pacizb, 0b110, 0b00001, 0b01001); + INSN(pacdza, 0b110, 0b00001, 0b01010); + INSN(pacdzb, 0b110, 0b00001, 0b01011); + INSN(autiza, 0b110, 0b00001, 0b01100); + INSN(autizb, 0b110, 0b00001, 0b01101); + INSN(autdza, 0b110, 0b00001, 0b01110); + INSN(autdzb, 0b110, 0b00001, 0b01111); + INSN(xpaci, 0b110, 0b00001, 0b10000); + INSN(xpacd, 0b110, 0b00001, 0b10001); + #undef INSN // (2 sources) diff --git a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp index 005f739f0aa05..342aa87a6208d 100644 --- a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -385,6 +385,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { // load issuing PC (the return address for this stub) into r3 __ ldr(exception_pc, Address(rfp, 1*BytesPerWord)); + __ authenticate_return_address(exception_pc, rscratch1); // make sure that the vm_results are cleared (may be unnecessary) __ str(zr, Address(rthread, JavaThread::vm_result_offset())); @@ -433,6 +434,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { __ str(exception_pc, Address(rthread, JavaThread::exception_pc_offset())); // patch throwing pc into return address (has bci & oop map) + __ protect_return_address(exception_pc, rscratch1); __ str(exception_pc, Address(rfp, 1*BytesPerWord)); // compute the exception handler. @@ -448,6 +450,7 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { __ invalidate_registers(false, true, true, true, true, true); // patch the return address, this stub will directly return to the exception handler + __ protect_return_address(r0, rscratch1); __ str(r0, Address(rfp, 1*BytesPerWord)); switch (id) { @@ -496,10 +499,12 @@ void Runtime1::generate_unwind_exception(StubAssembler *sasm) { // Save our return address because // exception_handler_for_return_address will destroy it. We also // save exception_oop + __ mov(r3, lr); + __ protect_return_address(); __ stp(lr, exception_oop, Address(__ pre(sp, -2 * wordSize))); // search the exception handler address of the caller (using the return address) - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), rthread, lr); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), rthread, r3); // r0: exception handler address of the caller // Only R0 is valid at this time; all other registers have been @@ -512,6 +517,7 @@ void Runtime1::generate_unwind_exception(StubAssembler *sasm) { // get throwing pc (= return address). // lr has been destroyed by the call __ ldp(lr, exception_oop, Address(__ post(sp, 2 * wordSize))); + __ authenticate_return_address(); __ mov(r3, lr); __ verify_not_null_oop(exception_oop); diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.cpp b/src/hotspot/cpu/aarch64/frame_aarch64.cpp index cb59e8b12afc7..3363e53690e47 100644 --- a/src/hotspot/cpu/aarch64/frame_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/frame_aarch64.cpp @@ -128,13 +128,13 @@ bool frame::safe_for_sender(JavaThread *thread) { return false; } - sender_pc = (address) this->fp()[return_addr_offset]; // for interpreted frames, the value below is the sender "raw" sp, // which can be different from the sender unextended sp (the sp seen // by the sender) because of current frame local variables sender_sp = (intptr_t*) addr_at(sender_sp_offset); sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; saved_fp = (intptr_t*) this->fp()[link_offset]; + sender_pc = pauth_strip_verifiable((address) this->fp()[return_addr_offset], (address)saved_fp); } else { // must be some sort of compiled/runtime frame @@ -151,9 +151,9 @@ bool frame::safe_for_sender(JavaThread *thread) { return false; } sender_unextended_sp = sender_sp; - sender_pc = (address) *(sender_sp-1); // Note: frame::sender_sp_offset is only valid for compiled frame saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); + sender_pc = pauth_strip_verifiable((address) *(sender_sp-1), (address)saved_fp); } @@ -268,17 +268,22 @@ bool frame::safe_for_sender(JavaThread *thread) { void frame::patch_pc(Thread* thread, address pc) { assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); address* pc_addr = &(((address*) sp())[-1]); + address signing_sp = (((address*) sp())[-2]); + address signed_pc = pauth_sign_return_address(pc, (address)signing_sp); + address pc_old = pauth_strip_verifiable(*pc_addr, (address)signing_sp); if (TracePcPatching) { - tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", - p2i(pc_addr), p2i(*pc_addr), p2i(pc)); + tty->print("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", + p2i(pc_addr), p2i(pc_old), p2i(pc)); + if (VM_Version::use_rop_protection()) { + tty->print(" [signed " INTPTR_FORMAT " -> " INTPTR_FORMAT "]", p2i(*pc_addr), p2i(signed_pc)); + } + tty->print_cr(""); } - // Only generated code frames should be patched, therefore the return address will not be signed. - assert(pauth_ptr_is_raw(*pc_addr), "cannot be signed"); // Either the return address is the original one or we are going to // patch in the same address that's already there. - assert(_pc == *pc_addr || pc == *pc_addr, "must be"); - *pc_addr = pc; + assert(_pc == pc_old || pc == pc_old, "must be"); + *pc_addr = signed_pc; address original_pc = CompiledMethod::get_deopt_original_pc(this); if (original_pc != NULL) { assert(original_pc == _pc, "expected original PC to be stored before patching"); @@ -455,12 +460,12 @@ frame frame::sender_for_interpreter_frame(RegisterMap* map) const { } #endif // COMPILER2_OR_JVMCI - // Use the raw version of pc - the interpreter should not have signed it. + // For ROP protection, Interpreter will have signed the sender_pc, but there is no requirement to authenticate it here. + address sender_pc = pauth_strip_verifiable(sender_pc_maybe_signed(), (address)link()); - return frame(sender_sp, unextended_sp, link(), sender_pc_maybe_signed()); + return frame(sender_sp, unextended_sp, link(), sender_pc); } - //------------------------------------------------------------------------------ // frame::sender_for_compiled_frame frame frame::sender_for_compiled_frame(RegisterMap* map) const { @@ -482,7 +487,9 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const { intptr_t* unextended_sp = l_sender_sp; // the return_address is always the word on the stack - address sender_pc = (address) *(l_sender_sp-1); + + // For ROP protection, C1/C2 will have signed the sender_pc, but there is no requirement to authenticate it here. + address sender_pc = pauth_strip_verifiable((address) *(l_sender_sp-1), (address) *(l_sender_sp-2)); intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp - frame::sender_sp_offset); @@ -530,6 +537,9 @@ frame frame::sender_raw(RegisterMap* map) const { // Must be native-compiled frame, i.e. the marshaling code for native // methods that exists in the core system. + // Native code may or may not have signed the return address, we have no way to be sure or what + // signing methods they used. Instead, just ensure the stripped value is used. + return frame(sender_sp(), link(), sender_pc()); } diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp index cd689b008e05e..01aff54c96d55 100644 --- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -271,7 +271,7 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); if (on_oop && on_reference) { // LR is live. It must be saved around calls. - __ enter(); // barrier may call runtime + __ enter(/*strip_ret_addr*/true); // barrier may call runtime // Generate the G1 pre-barrier code to log the value of // the referent field in an SATB buffer. g1_write_barrier_pre(masm /* masm */, diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp index 53de1d921fca3..bcabb40e63cbe 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved. + * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -237,7 +237,7 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, bool is_narrow = UseCompressedOops && !is_native; Label heap_stable, not_cset; - __ enter(); + __ enter(/*strip_ret_addr*/true); Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); __ ldrb(rscratch2, gc_state); @@ -359,7 +359,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d // 3: apply keep-alive barrier if needed if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { - __ enter(); + __ enter(/*strip_ret_addr*/true); __ push_call_clobbered_registers(); satb_write_barrier_pre(masm /* masm */, noreg /* obj */, diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp index 10b1cf20ef910..6820be15950ec 100644 --- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -78,7 +78,7 @@ void ZBarrierSetAssembler::load_at(MacroAssembler* masm, __ tst(dst, rscratch1); __ br(Assembler::EQ, done); - __ enter(); + __ enter(/*strip_ret_addr*/true); __ push_call_clobbered_registers_except(RegSet::of(dst)); diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp index 82760cc3bcf06..443eb46b720ab 100644 --- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015, 2019, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -118,7 +118,9 @@ define_pd_global(intx, InlineSmallCode, 1000); product(uint, OnSpinWaitInstCount, 1, DIAGNOSTIC, \ "The number of OnSpinWaitInst instructions to generate." \ "It cannot be used with OnSpinWaitInst=none.") \ - range(1, 99) + range(1, 99) \ + product(ccstr, UseBranchProtection, "none", \ + "Branch Protection to use: none, standard, pac-ret") \ // end of ARCH_FLAGS diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 69124c299c151..80287fb6949cc 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -1137,6 +1137,8 @@ void MacroAssembler::verify_oop(Register reg, const char* s) { } BLOCK_COMMENT("verify_oop {"); + strip_return_address(); // This might happen within a stack frame. + protect_return_address(); stp(r0, rscratch1, Address(pre(sp, -2 * wordSize))); stp(rscratch2, lr, Address(pre(sp, -2 * wordSize))); @@ -1150,6 +1152,7 @@ void MacroAssembler::verify_oop(Register reg, const char* s) { ldp(rscratch2, lr, Address(post(sp, 2 * wordSize))); ldp(r0, rscratch1, Address(post(sp, 2 * wordSize))); + authenticate_return_address(); BLOCK_COMMENT("} verify_oop"); } @@ -1166,6 +1169,8 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) { } BLOCK_COMMENT("verify_oop_addr {"); + strip_return_address(); // This might happen within a stack frame. + protect_return_address(); stp(r0, rscratch1, Address(pre(sp, -2 * wordSize))); stp(rscratch2, lr, Address(pre(sp, -2 * wordSize))); @@ -1186,6 +1191,7 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) { ldp(rscratch2, lr, Address(post(sp, 2 * wordSize))); ldp(r0, rscratch1, Address(post(sp, 2 * wordSize))); + authenticate_return_address(); BLOCK_COMMENT("} verify_oop_addr"); } @@ -4296,6 +4302,7 @@ void MacroAssembler::load_byte_map_base(Register reg) { void MacroAssembler::build_frame(int framesize) { assert(framesize >= 2 * wordSize, "framesize must include space for FP/LR"); assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); + protect_return_address(); if (framesize < ((1 << 9) + 2 * wordSize)) { sub(sp, sp, framesize); stp(rfp, lr, Address(sp, framesize - 2 * wordSize)); @@ -4328,6 +4335,7 @@ void MacroAssembler::remove_frame(int framesize) { } ldp(rfp, lr, Address(post(sp, 2 * wordSize))); } + authenticate_return_address(); } @@ -5169,6 +5177,7 @@ void MacroAssembler::get_thread(Register dst) { LINUX_ONLY(RegSet::range(r0, r1) + lr - dst) NOT_LINUX (RegSet::range(r0, r17) + lr - dst); + protect_return_address(); push(saved_regs, sp); mov(lr, CAST_FROM_FN_PTR(address, JavaThread::aarch64_get_thread_helper)); @@ -5178,6 +5187,7 @@ void MacroAssembler::get_thread(Register dst) { } pop(saved_regs, sp); + authenticate_return_address(); } void MacroAssembler::cache_wb(Address line) { @@ -5269,3 +5279,102 @@ void MacroAssembler::spin_wait() { } } } + +// Stack frame creation/removal + +void MacroAssembler::enter(bool strip_ret_addr) { + if (strip_ret_addr) { + // Addresses can only be signed once. If there are multiple nested frames being created + // in the same function, then the return address needs stripping first. + strip_return_address(); + } + protect_return_address(); + stp(rfp, lr, Address(pre(sp, -2 * wordSize))); + mov(rfp, sp); +} + +void MacroAssembler::leave() { + mov(sp, rfp); + ldp(rfp, lr, Address(post(sp, 2 * wordSize))); + authenticate_return_address(); +} + +// ROP Protection +// Use the AArch64 PAC feature to add ROP protection for generated code. Use whenever creating/ +// destroying stack frames or whenever directly loading/storing the LR to memory. +// If ROP protection is not set then these functions are no-ops. +// For more details on PAC see pauth_aarch64.hpp. + +// Sign the LR. Use during construction of a stack frame, before storing the LR to memory. +// Uses the FP as the modifier. +// +void MacroAssembler::protect_return_address() { + if (VM_Version::use_rop_protection()) { + check_return_address(); + // The standard convention for C code is to use paciasp, which uses SP as the modifier. This + // works because in C code, FP and SP match on function entry. In the JDK, SP and FP may not + // match, so instead explicitly use the FP. + pacia(lr, rfp); + } +} + +// Sign the return value in the given register. Use before updating the LR in the exisiting stack +// frame for the current function. +// Uses the FP from the start of the function as the modifier - which is stored at the address of +// the current FP. +// +void MacroAssembler::protect_return_address(Register return_reg, Register temp_reg) { + if (VM_Version::use_rop_protection()) { + assert(PreserveFramePointer, "PreserveFramePointer must be set for ROP protection"); + check_return_address(return_reg); + ldr(temp_reg, Address(rfp)); + pacia(return_reg, temp_reg); + } +} + +// Authenticate the LR. Use before function return, after restoring FP and loading LR from memory. +// +void MacroAssembler::authenticate_return_address(Register return_reg) { + if (VM_Version::use_rop_protection()) { + autia(return_reg, rfp); + check_return_address(return_reg); + } +} + +// Authenticate the return value in the given register. Use before updating the LR in the exisiting +// stack frame for the current function. +// Uses the FP from the start of the function as the modifier - which is stored at the address of +// the current FP. +// +void MacroAssembler::authenticate_return_address(Register return_reg, Register temp_reg) { + if (VM_Version::use_rop_protection()) { + assert(PreserveFramePointer, "PreserveFramePointer must be set for ROP protection"); + ldr(temp_reg, Address(rfp)); + autia(return_reg, temp_reg); + check_return_address(return_reg); + } +} + +// Strip any PAC data from LR without performing any authentication. Use with caution - only if +// there is no guaranteed way of authenticating the LR. +// +void MacroAssembler::strip_return_address() { + if (VM_Version::use_rop_protection()) { + xpaclri(); + } +} + +#ifndef PRODUCT +// PAC failures can be difficult to debug. After an authentication failure, a segfault will only +// occur when the pointer is used - ie when the program returns to the invalid LR. At this point +// it is difficult to debug back to the callee function. +// This function simply loads from the address in the given register. +// Use directly after authentication to catch authentication failures. +// Also use before signing to check that the pointer is valid and hasn't already been signed. +// +void MacroAssembler::check_return_address(Register return_reg) { + if (VM_Version::use_rop_protection()) { + ldr(zr, Address(return_reg)); + } +} +#endif diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 16f9790bde42c..29d4b8ac119e6 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -688,16 +688,16 @@ class MacroAssembler: public Assembler { void align(int modulus); // Stack frame creation/removal - void enter() - { - stp(rfp, lr, Address(pre(sp, -2 * wordSize))); - mov(rfp, sp); - } - void leave() - { - mov(sp, rfp); - ldp(rfp, lr, Address(post(sp, 2 * wordSize))); - } + void enter(bool strip_ret_addr = false); + void leave(); + + // ROP Protection + void protect_return_address(); + void protect_return_address(Register return_reg, Register temp_reg); + void authenticate_return_address(Register return_reg = lr); + void authenticate_return_address(Register return_reg, Register temp_reg); + void strip_return_address(); + void check_return_address(Register return_reg=lr) PRODUCT_RETURN; // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) // The pointer will be loaded into the thread register. diff --git a/src/hotspot/cpu/aarch64/pauth_aarch64.hpp b/src/hotspot/cpu/aarch64/pauth_aarch64.hpp index e12a671daf1e2..fe5fbbce9f05f 100644 --- a/src/hotspot/cpu/aarch64/pauth_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/pauth_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, Arm Limited. All rights reserved. + * Copyright (c) 2021, 2022, Arm Limited. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,9 +27,58 @@ #include OS_CPU_HEADER_INLINE(pauth) +// Support for ROP Protection in VM code. +// This is provided via the AArch64 PAC feature. +// For more details on PAC see The Arm ARM, section "Pointer authentication in AArch64 state". +// +// PAC provides a method to sign and authenticate pointer values. Signing combines the register +// being signed, an additional modifier and a per-process secret key, writing the result to unused +// high bits of the signed register. Once signed a register must be authenticated or stripped +// before it can be used. +// Authentication reverses the signing operation, clearing the high bits. If the signed register +// or modifier has changed then authentication will fail and invalid data will be written to the +// high bits and the next time the pointer is used a segfault will be raised. +// +// Assume a malicious attacker is able to edit the stack via an exploit. Control flow can be +// changed by re-writing the return values stored on the stack. ROP protection prevents this by +// signing return addresses before saving them on the stack, then authenticating when they are +// loaded back. The scope of this protection is per function (a value is signed and authenticated +// by the same function), therefore it is possible for different functions within the same +// program to use different signing methods. +// +// The VM and native code is protected by compiling with the GCC AArch64 branch protection flag. +// +// All generated code is protected via the ROP functions provided in macroAssembler. +// +// In addition, the VM needs to be aware of PAC whenever viewing or editing the stack. Functions +// are provided here and in the OS specific files. We should assume all stack frames for generated +// code have signed return values. Rewriting the stack should ensure new values are correctly +// signed. However, we cannot make any assumptions about how (or if) native code uses PAC - here +// we should limit access to viewing via stripping. +// + + +// Confirm the given pointer has not been signed - ie none of the high bits are set. +// +// Note this can give false positives. The PAC signing can generate a signature with all signing +// bits as zeros, causing this function to return true. Therefore this should only be used for +// assert style checking. In addition, this function should never be used with a "not" to confirm +// a pointer is signed, as it will fail the above case. The only safe way to do this is to instead +// authenticate the pointer. +// inline bool pauth_ptr_is_raw(address ptr) { - // Confirm none of the high bits are set in the pointer. return ptr == pauth_strip_pointer(ptr); } +// Strip a return value (same as pauth_strip_pointer). When debug is enabled then authenticate +// instead. +// +inline address pauth_strip_verifiable(address ret_addr, address modifier) { + if (VM_Version::use_rop_protection()) { + DEBUG_ONLY(ret_addr = pauth_authenticate_return_address(ret_addr, modifier);) + NOT_DEBUG(ret_addr = pauth_strip_pointer(ret_addr)); + } + return ret_addr; +} + #endif // CPU_AARCH64_PAUTH_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp index 08cc2b20a61e2..18c6d22782307 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. * Copyright (c) 2021, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -410,6 +410,7 @@ static void patch_callers_callsite(MacroAssembler *masm) { __ mov(c_rarg0, rmethod); __ mov(c_rarg1, lr); + __ authenticate_return_address(c_rarg1, rscratch1); __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); __ blr(rscratch1); @@ -2178,8 +2179,8 @@ void SharedRuntime::generate_deopt_blob() { // load throwing pc from JavaThread and patch it as the return address // of the current frame. Then clear the field in JavaThread - __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset())); + __ protect_return_address(r3, rscratch1); __ str(r3, Address(rfp, wordSize)); __ str(zr, Address(rthread, JavaThread::exception_pc_offset())); @@ -2287,6 +2288,7 @@ void SharedRuntime::generate_deopt_blob() { __ sub(r2, r2, 2 * wordSize); __ add(sp, sp, r2); __ ldp(rfp, lr, __ post(sp, 2 * wordSize)); + __ authenticate_return_address(); // LR should now be the return address to the caller (3) #ifdef ASSERT @@ -2428,6 +2430,7 @@ void SharedRuntime::generate_uncommon_trap_blob() { // Push self-frame. We get here with a return address in LR // and sp should be 16 byte aligned // push rfp and retaddr by hand + __ protect_return_address(); __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize))); // we don't expect an arg reg save area #ifndef PRODUCT @@ -2502,6 +2505,7 @@ void SharedRuntime::generate_uncommon_trap_blob() { __ sub(r2, r2, 2 * wordSize); __ add(sp, sp, r2); __ ldp(rfp, lr, __ post(sp, 2 * wordSize)); + __ authenticate_return_address(); // LR should now be the return address to the caller (3) frame #ifdef ASSERT @@ -2624,6 +2628,11 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t bool cause_return = (poll_type == POLL_AT_RETURN); RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); + // When the signal occured, the LR was either signed and stored on the stack (in which + // case it will be restored from the stack before being used) or unsigned and not stored + // on the stack. Stipping ensures we get the right value. + __ strip_return_address(); + // Save Integer and Float registers. map = reg_save.save_live_registers(masm, 0, &frame_size_in_words); @@ -2643,6 +2652,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t // it later to determine if someone changed the return address for // us! __ ldr(r20, Address(rthread, JavaThread::saved_exception_pc_offset())); + __ protect_return_address(r20, rscratch1); __ str(r20, Address(rfp, wordSize)); } @@ -2683,6 +2693,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t __ ldr(rscratch1, Address(rfp, wordSize)); __ cmp(r20, rscratch1); __ br(Assembler::NE, no_adjust); + __ authenticate_return_address(r20, rscratch1); #ifdef ASSERT // Verify the correct encoding of the poll we're about to skip. @@ -2697,6 +2708,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t #endif // Adjust return pc forward to step over the safepoint poll instruction __ add(r20, r20, NativeInstruction::instruction_size); + __ protect_return_address(r20, rscratch1); __ str(r20, Address(rfp, wordSize)); } @@ -2857,6 +2869,7 @@ void OptoRuntime::generate_exception_blob() { // push rfp and retaddr by hand // Exception pc is 'return address' for stack walker + __ protect_return_address(); __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize))); // there are no callee save registers and we don't expect an // arg reg save area @@ -2910,6 +2923,7 @@ void OptoRuntime::generate_exception_blob() { // there are no callee save registers now that adapter frames are gone. // and we dont' expect an arg reg save area __ ldp(rfp, r3, Address(__ post(sp, 2 * wordSize))); + __ authenticate_return_address(r3); // r0: exception handler diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp index e20cffd57670b..bf0a4e4472927 100644 --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -832,6 +832,7 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { __ ldr(rcpool, Address(rcpool, ConstantPool::cache_offset_in_bytes())); __ stp(rlocals, rcpool, Address(sp, 2 * wordSize)); + __ protect_return_address(); __ stp(rfp, lr, Address(sp, 10 * wordSize)); __ lea(rfp, Address(sp, 10 * wordSize)); @@ -1748,6 +1749,8 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // adapter frames in C2. Label caller_not_deoptimized; __ ldr(c_rarg1, Address(rfp, frame::return_addr_offset * wordSize)); + // This is a return address, so requires authenticating for PAC. + __ authenticate_return_address(c_rarg1, rscratch1); __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), c_rarg1); __ cbnz(r0, caller_not_deoptimized); @@ -1937,6 +1940,7 @@ void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, address TemplateInterpreterGenerator::generate_trace_code(TosState state) { address entry = __ pc(); + __ protect_return_address(); __ push(lr); __ push(state); __ push(RegSet::range(r0, r15), sp); @@ -1947,6 +1951,7 @@ address TemplateInterpreterGenerator::generate_trace_code(TosState state) { __ pop(RegSet::range(r0, r15), sp); __ pop(state); __ pop(lr); + __ authenticate_return_address(); __ ret(lr); // return from result handler return entry; diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp index b0c0c64f6d93b..d2a573ac63bd6 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp @@ -45,6 +45,7 @@ int VM_Version::_zva_length; int VM_Version::_dcache_line_size; int VM_Version::_icache_line_size; int VM_Version::_initial_sve_vector_length; +bool VM_Version::_rop_protection; SpinWait VM_Version::_spin_wait; @@ -409,6 +410,39 @@ void VM_Version::initialize() { UsePopCountInstruction = true; } + if (UseBranchProtection == nullptr || strcmp(UseBranchProtection, "none") == 0) { + _rop_protection = false; + } else if (strcmp(UseBranchProtection, "standard") == 0) { + _rop_protection = false; + // Enable PAC if this code has been built with branch-protection and the CPU/OS supports it. +#ifdef __ARM_FEATURE_PAC_DEFAULT + if ((_features & CPU_PACA) != 0) { + _rop_protection = true; + } +#endif + } else if (strcmp(UseBranchProtection, "pac-ret") == 0) { + _rop_protection = true; +#ifdef __ARM_FEATURE_PAC_DEFAULT + if ((_features & CPU_PACA) == 0) { + warning("ROP-protection specified, but not supported on this CPU."); + // Disable PAC to prevent illegal instruction crashes. + _rop_protection = false; + } +#else + warning("ROP-protection specified, but this VM was built without ROP-protection support."); +#endif + } else { + vm_exit_during_initialization(err_msg("Unsupported UseBranchProtection: %s", UseBranchProtection)); + } + + // The frame pointer must be preserved for ROP protection. + if (_rop_protection == true) { + if (FLAG_IS_DEFAULT(PreserveFramePointer) == false && PreserveFramePointer == false ) { + vm_exit_during_initialization(err_msg("PreserveFramePointer cannot be disabled for ROP-protection")); + } + PreserveFramePointer = true; + } + #ifdef COMPILER2 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { UseMultiplyToLenIntrinsic = true; diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp index b6aec7ed01f98..e979f62b926c7 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -45,6 +45,7 @@ class VM_Version : public Abstract_VM_Version { static int _dcache_line_size; static int _icache_line_size; static int _initial_sve_vector_length; + static bool _rop_protection; static SpinWait _spin_wait; @@ -114,10 +115,11 @@ class VM_Version : public Abstract_VM_Version { decl(SHA3, "sha3", 17) \ decl(SHA512, "sha512", 21) \ decl(SVE, "sve", 22) \ + decl(PACA, "paca", 30) \ /* flags above must follow Linux HWCAP */ \ decl(SVE2, "sve2", 28) \ decl(STXR_PREFETCH, "stxr_prefetch", 29) \ - decl(A53MAC, "a53mac", 30) + decl(A53MAC, "a53mac", 31) #define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit), CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG) @@ -156,6 +158,7 @@ class VM_Version : public Abstract_VM_Version { static void initialize_cpu_information(void); + static bool use_rop_protection() { return _rop_protection; } }; #endif // CPU_AARCH64_VM_VERSION_AARCH64_HPP diff --git a/src/hotspot/os_cpu/bsd_aarch64/pauth_bsd_aarch64.inline.hpp b/src/hotspot/os_cpu/bsd_aarch64/pauth_bsd_aarch64.inline.hpp index a4d416d384e29..4d07bbef30332 100644 --- a/src/hotspot/os_cpu/bsd_aarch64/pauth_bsd_aarch64.inline.hpp +++ b/src/hotspot/os_cpu/bsd_aarch64/pauth_bsd_aarch64.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, Arm Limited. All rights reserved. + * Copyright (c) 2021, 2022, Arm Limited. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,29 +25,23 @@ #ifndef OS_CPU_BSD_AARCH64_PAUTH_BSD_AARCH64_INLINE_HPP #define OS_CPU_BSD_AARCH64_PAUTH_BSD_AARCH64_INLINE_HPP -#ifdef __APPLE__ -#include -#endif - -// Only the PAC instructions in the NOP space can be used. This ensures the -// binaries work on systems without PAC. Write these instructions using their -// alternate "hint" instructions to ensure older compilers can still be used. -// For Apple, use the provided interface as this may provide additional -// optimization. - -#define XPACLRI "hint #0x7;" +// OS specific Support for ROP Protection in VM code. +// For more details on PAC see pauth_aarch64.hpp. inline address pauth_strip_pointer(address ptr) { -#ifdef __APPLE__ - return ptrauth_strip(ptr, ptrauth_key_asib); -#else - register address result __asm__("x30") = ptr; - asm (XPACLRI : "+r"(result)); - return result; -#endif + // No PAC support in BSD as of yet. + return ptr; } -#undef XPACLRI +inline address pauth_sign_return_address(address ret_addr, address sp) { + // No PAC support in BSD as of yet. + return ret_addr; +} + +inline address pauth_authenticate_return_address(address ret_addr, address sp) { + // No PAC support in BSD as of yet. + return ret_addr; +} #endif // OS_CPU_BSD_AARCH64_PAUTH_BSD_AARCH64_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_aarch64/pauth_linux_aarch64.inline.hpp b/src/hotspot/os_cpu/linux_aarch64/pauth_linux_aarch64.inline.hpp index 6f3fd41539c62..1eb1b92b9365c 100644 --- a/src/hotspot/os_cpu/linux_aarch64/pauth_linux_aarch64.inline.hpp +++ b/src/hotspot/os_cpu/linux_aarch64/pauth_linux_aarch64.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, Arm Limited. All rights reserved. + * Copyright (c) 2021, 2022, Arm Limited. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,18 +25,57 @@ #ifndef OS_CPU_LINUX_AARCH64_PAUTH_LINUX_AARCH64_INLINE_HPP #define OS_CPU_LINUX_AARCH64_PAUTH_LINUX_AARCH64_INLINE_HPP -// Only the PAC instructions in the NOP space can be used. This ensures the -// binaries work on systems without PAC. Write these instructions using their -// alternate "hint" instructions to ensure older compilers can still be used. +// OS specific Support for ROP Protection in VM code. +// For more details on PAC see pauth_aarch64.hpp. -#define XPACLRI "hint #0x7;" +inline bool pauth_ptr_is_raw(address ptr); +// Use only the PAC instructions in the NOP space. This ensures the binaries work on systems +// without PAC. Write these instructions using their alternate "hint" instructions to ensure older +// compilers can still be used. +#define XPACLRI "hint #0x7;" +#define PACIA1716 "hint #0x8;" +#define AUTIA1716 "hint #0xc;" + +// Strip an address. Use with caution - only if there is no guaranteed way of authenticating the +// value. +// inline address pauth_strip_pointer(address ptr) { register address result __asm__("x30") = ptr; asm (XPACLRI : "+r"(result)); return result; } +// Sign a return value, using the given modifier. +// +inline address pauth_sign_return_address(address ret_addr, address sp) { + if (VM_Version::use_rop_protection()) { + // A pointer cannot be double signed. + guarantee(pauth_ptr_is_raw(ret_addr), "Return address is already signed"); + register address r17 __asm("r17") = ret_addr; + register address r16 __asm("r16") = sp; + asm (PACIA1716 : "+r"(r17) : "r"(r16)); + ret_addr = r17; + } + return ret_addr; +} + +// Authenticate a return value, using the given modifier. +// +inline address pauth_authenticate_return_address(address ret_addr, address sp) { + if (VM_Version::use_rop_protection()) { + register address r17 __asm("r17") = ret_addr; + register address r16 __asm("r16") = sp; + asm (AUTIA1716 : "+r"(r17) : "r"(r16)); + ret_addr = r17; + // Ensure that the pointer authenticated. + guarantee(pauth_ptr_is_raw(ret_addr), "Return address did not authenticate"); + } + return ret_addr; +} + #undef XPACLRI +#undef PACIA1716 +#undef AUTIA1716 #endif // OS_CPU_LINUX_AARCH64_PAUTH_LINUX_AARCH64_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_aarch64/threadLS_linux_aarch64.S b/src/hotspot/os_cpu/linux_aarch64/threadLS_linux_aarch64.S index f541844b9d6df..ac60d6aa94168 100644 --- a/src/hotspot/os_cpu/linux_aarch64/threadLS_linux_aarch64.S +++ b/src/hotspot/os_cpu/linux_aarch64/threadLS_linux_aarch64.S @@ -1,4 +1,4 @@ -// Copyright (c) 2015, Red Hat Inc. All rights reserved. +// Copyright (c) 2015, 2022, Red Hat Inc. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -29,6 +29,7 @@ .type _ZN10JavaThread25aarch64_get_thread_helperEv, %function _ZN10JavaThread25aarch64_get_thread_helperEv: + hint #0x19 // paciasp stp x29, x30, [sp, -16]! adrp x0, :tlsdesc:_ZN6Thread12_thr_currentE ldr x1, [x0, #:tlsdesc_lo12:_ZN6Thread12_thr_currentE] @@ -39,6 +40,7 @@ _ZN10JavaThread25aarch64_get_thread_helperEv: add x0, x1, x0 ldr x0, [x0] ldp x29, x30, [sp], 16 + hint #0x1d // autiasp ret .size _ZN10JavaThread25aarch64_get_thread_helperEv, .-_ZN10JavaThread25aarch64_get_thread_helperEv diff --git a/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp index b5f5a0787e91a..b1080e77c908c 100644 --- a/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp +++ b/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp @@ -72,6 +72,10 @@ #define HWCAP_SVE (1 << 22) #endif +#ifndef HWCAP_PACA +#define HWCAP_PACA (1 << 30) +#endif + #ifndef HWCAP2_SVE2 #define HWCAP2_SVE2 (1 << 1) #endif @@ -111,6 +115,7 @@ void VM_Version::get_os_cpu_info() { static_assert(CPU_SHA3 == HWCAP_SHA3, "Flag CPU_SHA3 must follow Linux HWCAP"); static_assert(CPU_SHA512 == HWCAP_SHA512, "Flag CPU_SHA512 must follow Linux HWCAP"); static_assert(CPU_SVE == HWCAP_SVE, "Flag CPU_SVE must follow Linux HWCAP"); + static_assert(CPU_PACA == HWCAP_PACA, "Flag CPU_PACA must follow Linux HWCAP"); _features = auxv & ( HWCAP_FP | HWCAP_ASIMD | @@ -124,7 +129,8 @@ void VM_Version::get_os_cpu_info() { HWCAP_DCPOP | HWCAP_SHA3 | HWCAP_SHA512 | - HWCAP_SVE); + HWCAP_SVE | + HWCAP_PACA); if (auxv2 & HWCAP2_SVE2) _features |= CPU_SVE2; diff --git a/src/hotspot/os_cpu/windows_aarch64/pauth_windows_aarch64.inline.hpp b/src/hotspot/os_cpu/windows_aarch64/pauth_windows_aarch64.inline.hpp index 844291ee1e412..6b5c9eecb72a4 100644 --- a/src/hotspot/os_cpu/windows_aarch64/pauth_windows_aarch64.inline.hpp +++ b/src/hotspot/os_cpu/windows_aarch64/pauth_windows_aarch64.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, Arm Limited. All rights reserved. + * Copyright (c) 2021, 2022, Arm Limited. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,10 +25,22 @@ #ifndef OS_CPU_WINDOWS_AARCH64_PAUTH_WINDOWS_AARCH64_INLINE_HPP #define OS_CPU_WINDOWS_AARCH64_PAUTH_WINDOWS_AARCH64_INLINE_HPP +// OS specific Support for ROP Protection in VM code. +// For more details on PAC see pauth_aarch64.hpp. + inline address pauth_strip_pointer(address ptr) { // No PAC support in windows as of yet. return ptr; } -#endif // OS_CPU_WINDOWS_AARCH64_PAUTH_WINDOWS_AARCH64_INLINE_HPP +inline address pauth_sign_return_address(address ret_addr, address sp) { + // No PAC support in windows as of yet. + return ret_addr; +} +inline address pauth_authenticate_return_address(address ret_addr, address sp) { + // No PAC support in windows as of yet. + return ret_addr; +} + +#endif // OS_CPU_WINDOWS_AARCH64_PAUTH_WINDOWS_AARCH64_INLINE_HPP diff --git a/src/hotspot/share/gc/shared/barrierSetNMethod.cpp b/src/hotspot/share/gc/shared/barrierSetNMethod.cpp index f98675d0b23d2..e7d08d9a1fffd 100644 --- a/src/hotspot/share/gc/shared/barrierSetNMethod.cpp +++ b/src/hotspot/share/gc/shared/barrierSetNMethod.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,6 +28,7 @@ #include "gc/shared/barrierSet.hpp" #include "gc/shared/barrierSetNMethod.hpp" #include "logging/log.hpp" +#include "runtime/frame.inline.hpp" #include "runtime/thread.hpp" #include "runtime/threadWXSetters.inline.hpp" #include "utilities/debug.hpp" @@ -54,6 +55,7 @@ int BarrierSetNMethod::nmethod_stub_entry_barrier(address* return_address_ptr) { MACOS_AARCH64_ONLY(ThreadWXEnable wx(WXWrite, Thread::current())); address return_address = *return_address_ptr; + AARCH64_ONLY(return_address = pauth_strip_pointer(return_address)); CodeBlob* cb = CodeCache::find_blob(return_address); assert(cb != NULL, "invariant"); diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp index c40807419fe6f..998f1d1ad4f21 100644 --- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -744,6 +744,7 @@ static_field(VM_Version, _zva_length, int) \ static_field(StubRoutines::aarch64, _has_negatives, address) \ static_field(StubRoutines::aarch64, _has_negatives_long, address) \ + static_field(VM_Version, _rop_protection, bool) \ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) #define DECLARE_INT_CPU_FEATURE_CONSTANT(id, name, bit) GENERATE_VM_INT_CONSTANT_ENTRY(VM_Version::CPU_##id) diff --git a/src/hotspot/share/opto/runtime.cpp b/src/hotspot/share/opto/runtime.cpp index c2df7b3b2a13d..eb31a4f16a525 100644 --- a/src/hotspot/share/opto/runtime.cpp +++ b/src/hotspot/share/opto/runtime.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1465,6 +1465,12 @@ address OptoRuntime::rethrow_C(oopDesc* exception, JavaThread* thread, address r // Enable WXWrite: the function called directly by compiled code. MACOS_AARCH64_ONLY(ThreadWXEnable wx(WXWrite, thread)); + // ret_pc will have been loaded from the stack, so for AArch64 will be signed. + // This needs authenticating, but to do that here requires the fp of the previous frame. + // A better way of doing it would be authenticate in the caller by adding a + // AuthPAuthNode and using it in GraphKit::gen_stub. For now, just strip it. + AARCH64_ONLY(ret_pc = pauth_strip_pointer(ret_pc)); + #ifndef PRODUCT SharedRuntime::_rethrow_ctr++; // count rethrows #endif diff --git a/src/hotspot/share/runtime/frame.cpp b/src/hotspot/share/runtime/frame.cpp index 7a6456ef658a6..10db699ac2012 100644 --- a/src/hotspot/share/runtime/frame.cpp +++ b/src/hotspot/share/runtime/frame.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1110,6 +1110,11 @@ void frame::verify(const RegisterMap* map) const { #ifdef ASSERT bool frame::verify_return_pc(address x) { +#ifdef AARCH64 + if (!pauth_ptr_is_raw(x)) { + return false; + } +#endif if (StubRoutines::returns_to_call_stub(x)) { return true; } diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp index a5de65ea5ab94..99d294a1b8508 100644 --- a/src/hotspot/share/runtime/sharedRuntime.cpp +++ b/src/hotspot/share/runtime/sharedRuntime.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1947,6 +1947,8 @@ bool SharedRuntime::should_fixup_call_destination(address destination, address e JRT_LEAF(void, SharedRuntime::fixup_callers_callsite(Method* method, address caller_pc)) Method* moop(method); + AARCH64_ONLY(assert(pauth_ptr_is_raw(caller_pc), "should be raw")); + address entry_point = moop->from_compiled_entry_no_trampoline(); // It's possible that deoptimization can occur at a call site which hasn't diff --git a/src/java.base/share/man/java.1 b/src/java.base/share/man/java.1 index ea7edb8ef99d7..6b6999bb992e3 100644 --- a/src/java.base/share/man/java.1 +++ b/src/java.base/share/man/java.1 @@ -1981,6 +1981,32 @@ Allows user to specify VM options in a file, for example, \f[CB]java\ \-XX:VMOptionsFile=/var/my_vm_options\ HelloWorld\f[R]. .RS .RE +.TP +.B \f[CB]\-XX:UseBranchProtection=\f[R]\f[I]mode\f[R] +Specifies the branch protection mode. All options other than none require +the VM to have been built with branch protection enabled. In addition, for +full protection, any native libraries provided by applications should be +compiled with the same level of protection. (AArch64 Linux only). +.RS +.PP +Possible \f[I]mode\f[R] arguments for this option include the +following: +.RS +.TP +.B \f[CB]none\f[R] +Do not use branch protection. This is the default value. +.RS +.RE +.TP +.B \f[CB]standard\f[R] +Enables all branch protection modes available on the current platform. +.RS +.RE +.TP +.B \f[CB]pac-ret\f[R] +Enables protection against ROP based attacks. (AArch64 8.3+ only) +.RS +.RE .SH ADVANCED JIT COMPILER OPTIONS FOR JAVA .PP These \f[CB]java\f[R] options control the dynamic just\-in\-time (JIT) diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.aarch64/src/jdk/vm/ci/aarch64/AArch64.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.aarch64/src/jdk/vm/ci/aarch64/AArch64.java index c2a7fa2b5cd9f..a4b5ba3ffa5da 100644 --- a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.aarch64/src/jdk/vm/ci/aarch64/AArch64.java +++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.aarch64/src/jdk/vm/ci/aarch64/AArch64.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -177,6 +177,7 @@ public enum CPUFeature implements CPUFeatureName { SHA3, SHA512, SVE, + PACA, SVE2, STXR_PREFETCH, A53MAC, diff --git a/test/hotspot/gtest/aarch64/aarch64-asmtest.py b/test/hotspot/gtest/aarch64/aarch64-asmtest.py index 1bbcdbb103a01..40892ae0d94f9 100644 --- a/test/hotspot/gtest/aarch64/aarch64-asmtest.py +++ b/test/hotspot/gtest/aarch64/aarch64-asmtest.py @@ -209,15 +209,17 @@ def __init__(self, name): self._name = name self.isWord = name.endswith("w") | name.endswith("wi") self.asmRegPrefix = ["x", "w"][self.isWord] + self.isPostfixException = False def aname(self): - if (self._name.endswith("wi")): + if self.isPostfixException: + return self._name + elif (self._name.endswith("wi")): return self._name[:len(self._name)-2] + elif (self._name.endswith("i") | self._name.endswith("w")): + return self._name[:len(self._name)-1] else: - if (self._name.endswith("i") | self._name.endswith("w")): - return self._name[:len(self._name)-1] - else: - return self._name + return self._name def emit(self) : pass @@ -348,6 +350,12 @@ def astr(self): return (super(OneRegOp, self).astr() + '%s' % self.reg.astr(self.asmRegPrefix)) +class PostfixExceptionOneRegOp(OneRegOp): + + def __init__(self, op): + OneRegOp.__init__(self, op) + self.isPostfixException=True + class ArithOp(ThreeRegInstruction): def generate(self): @@ -597,6 +605,13 @@ def cstr(self): def astr(self): return self.aname(); + +class PostfixExceptionOp(Op): + + def __init__(self, op): + Op.__init__(self, op) + self.isPostfixException=True + class SystemOp(Instruction): def __init__(self, op): @@ -1335,14 +1350,26 @@ def generate(kind, names): generate (ImmOp, ["svc", "hvc", "smc", "brk", "hlt", # "dcps1", "dcps2", "dcps3" ]) -generate (Op, ["nop", "eret", "drps", "isb"]) +generate (Op, ["nop", "yield", "wfe", "sev", "sevl", + "autia1716", "autiasp", "autiaz", "autib1716", "autibsp", "autibz", + "pacia1716", "paciasp", "paciaz", "pacib1716", "pacibsp", "pacibz", + "eret", "drps", "isb",]) + +# Ensure the "i" is not stripped off the end of the instruction +generate (PostfixExceptionOp, ["wfi", "xpaclri"]) barriers = ["OSHLD", "OSHST", "OSH", "NSHLD", "NSHST", "NSH", "ISHLD", "ISHST", "ISH", "LD", "ST", "SY"] generate (SystemOp, [["dsb", barriers], ["dmb", barriers]]) -generate (OneRegOp, ["br", "blr"]) +generate (OneRegOp, ["br", "blr", + "paciza", "pacizb", "pacdza", "pacdzb", + "autiza", "autizb", "autdza", "autdzb", "xpacd", + "braaz", "brabz", "blraaz", "blrabz"]) + +# Ensure the "i" is not stripped off the end of the instruction +generate (PostfixExceptionOneRegOp, ["xpaci"]) for mode in 'xwhb': generate (LoadStoreExclusiveOp, [["stxr", mode, 3], ["stlxr", mode, 3], @@ -1387,7 +1414,10 @@ def generate(kind, names): generate(TwoRegOp, ["rbitw", "rev16w", "revw", "clzw", "clsw", "rbit", - "rev16", "rev32", "rev", "clz", "cls"]) + "rev16", "rev32", "rev", "clz", "cls", + "pacia", "pacib", "pacda", "pacdb", "autia", "autib", "autda", "autdb", + "braa", "brab", "blraa", "blrab"]) + generate(ThreeRegOp, ["udivw", "sdivw", "lslvw", "lsrvw", "asrvw", "rorvw", "udiv", "sdiv", "lslv", "lsrv", "asrv", "rorv", "umulh", "smulh"]) @@ -1839,8 +1869,8 @@ def generate(kind, names): outfile.close() -# compile for sve with 8.2 and sha3 because of SHA3 crypto extension. -subprocess.check_call([AARCH64_AS, "-march=armv8.2-a+sha3+sve", "aarch64ops.s", "-o", "aarch64ops.o"]) +# compile for sve with 8.3 and sha3 because of SHA3 crypto extension. +subprocess.check_call([AARCH64_AS, "-march=armv8.3-a+sha3+sve", "aarch64ops.s", "-o", "aarch64ops.o"]) print print "/*" diff --git a/test/hotspot/gtest/aarch64/asmtest.out.h b/test/hotspot/gtest/aarch64/asmtest.out.h index 2dbb48d3d01b7..03147cba0496c 100644 --- a/test/hotspot/gtest/aarch64/asmtest.out.h +++ b/test/hotspot/gtest/aarch64/asmtest.out.h @@ -168,10 +168,30 @@ // Op __ nop(); // nop + __ yield(); // yield + __ wfe(); // wfe + __ sev(); // sev + __ sevl(); // sevl + __ autia1716(); // autia1716 + __ autiasp(); // autiasp + __ autiaz(); // autiaz + __ autib1716(); // autib1716 + __ autibsp(); // autibsp + __ autibz(); // autibz + __ pacia1716(); // pacia1716 + __ paciasp(); // paciasp + __ paciaz(); // paciaz + __ pacib1716(); // pacib1716 + __ pacibsp(); // pacibsp + __ pacibz(); // pacibz __ eret(); // eret __ drps(); // drps __ isb(); // isb +// PostfixExceptionOp + __ wfi(); // wfi + __ xpaclri(); // xpaclri + // SystemOp __ dsb(Assembler::ST); // dsb ST __ dmb(Assembler::OSHST); // dmb OSHST @@ -179,527 +199,555 @@ // OneRegOp __ br(r16); // br x16 __ blr(r20); // blr x20 + __ paciza(r10); // paciza x10 + __ pacizb(r27); // pacizb x27 + __ pacdza(r8); // pacdza x8 + __ pacdzb(r0); // pacdzb x0 + __ autiza(r1); // autiza x1 + __ autizb(r21); // autizb x21 + __ autdza(r17); // autdza x17 + __ autdzb(r29); // autdzb x29 + __ xpacd(r29); // xpacd x29 + __ braaz(r28); // braaz x28 + __ brabz(r1); // brabz x1 + __ blraaz(r23); // blraaz x23 + __ blrabz(r21); // blrabz x21 + +// PostfixExceptionOneRegOp + __ xpaci(r20); // xpaci x20 // LoadStoreExclusiveOp - __ stxr(r10, r27, r8); // stxr w10, x27, [x8] - __ stlxr(r0, r1, r21); // stlxr w0, x1, [x21] - __ ldxr(r17, r29); // ldxr x17, [x29] - __ ldaxr(r29, r28); // ldaxr x29, [x28] - __ stlr(r1, r23); // stlr x1, [x23] - __ ldar(r21, r20); // ldar x21, [x20] + __ stxr(r22, r27, r19); // stxr w22, x27, [x19] + __ stlxr(r11, r16, r6); // stlxr w11, x16, [x6] + __ ldxr(r17, r0); // ldxr x17, [x0] + __ ldaxr(r4, r10); // ldaxr x4, [x10] + __ stlr(r24, r22); // stlr x24, [x22] + __ ldar(r10, r19); // ldar x10, [x19] // LoadStoreExclusiveOp - __ stxrw(r22, r27, r19); // stxr w22, w27, [x19] - __ stlxrw(r11, r16, r6); // stlxr w11, w16, [x6] - __ ldxrw(r17, r0); // ldxr w17, [x0] - __ ldaxrw(r4, r10); // ldaxr w4, [x10] - __ stlrw(r24, r22); // stlr w24, [x22] - __ ldarw(r10, r19); // ldar w10, [x19] + __ stxrw(r1, r5, r30); // stxr w1, w5, [x30] + __ stlxrw(r8, r12, r17); // stlxr w8, w12, [x17] + __ ldxrw(r9, r14); // ldxr w9, [x14] + __ ldaxrw(r7, r1); // ldaxr w7, [x1] + __ stlrw(r5, r16); // stlr w5, [x16] + __ ldarw(r2, r12); // ldar w2, [x12] // LoadStoreExclusiveOp - __ stxrh(r1, r5, r30); // stxrh w1, w5, [x30] - __ stlxrh(r8, r12, r17); // stlxrh w8, w12, [x17] - __ ldxrh(r9, r14); // ldxrh w9, [x14] - __ ldaxrh(r7, r1); // ldaxrh w7, [x1] - __ stlrh(r5, r16); // stlrh w5, [x16] - __ ldarh(r2, r12); // ldarh w2, [x12] + __ stxrh(r10, r12, r3); // stxrh w10, w12, [x3] + __ stlxrh(r28, r14, r26); // stlxrh w28, w14, [x26] + __ ldxrh(r30, r10); // ldxrh w30, [x10] + __ ldaxrh(r14, r21); // ldaxrh w14, [x21] + __ stlrh(r13, r9); // stlrh w13, [x9] + __ ldarh(r22, r27); // ldarh w22, [x27] // LoadStoreExclusiveOp - __ stxrb(r10, r12, r3); // stxrb w10, w12, [x3] - __ stlxrb(r28, r14, r26); // stlxrb w28, w14, [x26] - __ ldxrb(r30, r10); // ldxrb w30, [x10] - __ ldaxrb(r14, r21); // ldaxrb w14, [x21] - __ stlrb(r13, r9); // stlrb w13, [x9] - __ ldarb(r22, r27); // ldarb w22, [x27] + __ stxrb(r28, r19, r11); // stxrb w28, w19, [x11] + __ stlxrb(r30, r19, r2); // stlxrb w30, w19, [x2] + __ ldxrb(r2, r23); // ldxrb w2, [x23] + __ ldaxrb(r1, r0); // ldaxrb w1, [x0] + __ stlrb(r12, r16); // stlrb w12, [x16] + __ ldarb(r13, r15); // ldarb w13, [x15] // LoadStoreExclusiveOp - __ ldxp(r28, r19, r11); // ldxp x28, x19, [x11] - __ ldaxp(r30, r19, r2); // ldaxp x30, x19, [x2] - __ stxp(r2, r23, r1, r0); // stxp w2, x23, x1, [x0] - __ stlxp(r12, r16, r13, r15); // stlxp w12, x16, x13, [x15] + __ ldxp(r17, r21, r13); // ldxp x17, x21, [x13] + __ ldaxp(r11, r30, r8); // ldaxp x11, x30, [x8] + __ stxp(r24, r13, r11, r1); // stxp w24, x13, x11, [x1] + __ stlxp(r26, r21, r27, r13); // stlxp w26, x21, x27, [x13] // LoadStoreExclusiveOp - __ ldxpw(r17, r21, r13); // ldxp w17, w21, [x13] - __ ldaxpw(r11, r30, r8); // ldaxp w11, w30, [x8] - __ stxpw(r24, r13, r11, r1); // stxp w24, w13, w11, [x1] - __ stlxpw(r26, r21, r27, r13); // stlxp w26, w21, w27, [x13] + __ ldxpw(r20, r3, r12); // ldxp w20, w3, [x12] + __ ldaxpw(r6, r1, r29); // ldaxp w6, w1, [x29] + __ stxpw(r6, r4, r11, r16); // stxp w6, w4, w11, [x16] + __ stlxpw(r4, r30, r12, r21); // stlxp w4, w30, w12, [x21] // base_plus_unscaled_offset // LoadStoreOp - __ str(r11, Address(r20, -103)); // str x11, [x20, -103] - __ strw(r28, Address(r16, 62)); // str w28, [x16, 62] - __ strb(r27, Address(r9, -9)); // strb w27, [x9, -9] - __ strh(r2, Address(r25, -50)); // strh w2, [x25, -50] - __ ldr(r4, Address(r2, -241)); // ldr x4, [x2, -241] - __ ldrw(r30, Address(r20, -31)); // ldr w30, [x20, -31] - __ ldrb(r17, Address(r23, -23)); // ldrb w17, [x23, -23] - __ ldrh(r29, Address(r26, -1)); // ldrh w29, [x26, -1] - __ ldrsb(r1, Address(r9, 6)); // ldrsb x1, [x9, 6] - __ ldrsh(r11, Address(r12, 19)); // ldrsh x11, [x12, 19] - __ ldrshw(r11, Address(r1, -50)); // ldrsh w11, [x1, -50] - __ ldrsw(r19, Address(r24, 41)); // ldrsw x19, [x24, 41] - __ ldrd(v24, Address(r24, 95)); // ldr d24, [x24, 95] - __ ldrs(v15, Address(r5, -43)); // ldr s15, [x5, -43] - __ strd(v21, Address(r27, 1)); // str d21, [x27, 1] - __ strs(v23, Address(r13, -107)); // str s23, [x13, -107] + __ str(r6, Address(r27, 97)); // str x6, [x27, 97] + __ strw(r17, Address(r10, 45)); // str w17, [x10, 45] + __ strb(r26, Address(r22, -29)); // strb w26, [x22, -29] + __ strh(r21, Address(r10, -50)); // strh w21, [x10, -50] + __ ldr(r14, Address(r24, 125)); // ldr x14, [x24, 125] + __ ldrw(r7, Address(r24, -16)); // ldr w7, [x24, -16] + __ ldrb(r8, Address(r2, 13)); // ldrb w8, [x2, 13] + __ ldrh(r30, Address(r25, -61)); // ldrh w30, [x25, -61] + __ ldrsb(r3, Address(r12, -14)); // ldrsb x3, [x12, -14] + __ ldrsh(r10, Address(r17, -28)); // ldrsh x10, [x17, -28] + __ ldrshw(r21, Address(r3, -5)); // ldrsh w21, [x3, -5] + __ ldrsw(r2, Address(r25, 23)); // ldrsw x2, [x25, 23] + __ ldrd(v25, Address(r1, -69)); // ldr d25, [x1, -69] + __ ldrs(v29, Address(r27, 6)); // ldr s29, [x27, 6] + __ strd(v29, Address(r12, 41)); // str d29, [x12, 41] + __ strs(v2, Address(r22, -115)); // str s2, [x22, -115] // pre // LoadStoreOp - __ str(r10, Address(__ pre(r0, 8))); // str x10, [x0, 8]! - __ strw(r3, Address(__ pre(r0, 29))); // str w3, [x0, 29]! - __ strb(r10, Address(__ pre(r14, 9))); // strb w10, [x14, 9]! - __ strh(r29, Address(__ pre(r25, -3))); // strh w29, [x25, -3]! - __ ldr(r12, Address(__ pre(r16, -144))); // ldr x12, [x16, -144]! - __ ldrw(r12, Address(__ pre(r22, -6))); // ldr w12, [x22, -6]! - __ ldrb(r13, Address(__ pre(r11, -10))); // ldrb w13, [x11, -10]! - __ ldrh(r0, Address(__ pre(r21, -21))); // ldrh w0, [x21, -21]! - __ ldrsb(r23, Address(__ pre(r6, 4))); // ldrsb x23, [x6, 4]! - __ ldrsh(r3, Address(__ pre(r7, -53))); // ldrsh x3, [x7, -53]! - __ ldrshw(r28, Address(__ pre(r4, -7))); // ldrsh w28, [x4, -7]! - __ ldrsw(r24, Address(__ pre(r8, -18))); // ldrsw x24, [x8, -18]! - __ ldrd(v14, Address(__ pre(r11, 12))); // ldr d14, [x11, 12]! - __ ldrs(v19, Address(__ pre(r12, -67))); // ldr s19, [x12, -67]! - __ strd(v20, Address(__ pre(r0, -253))); // str d20, [x0, -253]! - __ strs(v8, Address(__ pre(r0, 64))); // str s8, [x0, 64]! + __ str(r26, Address(__ pre(r5, 3))); // str x26, [x5, 3]! + __ strw(r20, Address(__ pre(r5, -103))); // str w20, [x5, -103]! + __ strb(r8, Address(__ pre(r12, -25))); // strb w8, [x12, -25]! + __ strh(r20, Address(__ pre(r2, -57))); // strh w20, [x2, -57]! + __ ldr(r14, Address(__ pre(r29, -234))); // ldr x14, [x29, -234]! + __ ldrw(r13, Address(__ pre(r29, 4))); // ldr w13, [x29, 4]! + __ ldrb(r24, Address(__ pre(r19, -9))); // ldrb w24, [x19, -9]! + __ ldrh(r3, Address(__ pre(r27, -19))); // ldrh w3, [x27, -19]! + __ ldrsb(r17, Address(__ pre(r1, -5))); // ldrsb x17, [x1, -5]! + __ ldrsh(r17, Address(__ pre(r19, -13))); // ldrsh x17, [x19, -13]! + __ ldrshw(r21, Address(__ pre(r11, -26))); // ldrsh w21, [x11, -26]! + __ ldrsw(r1, Address(__ pre(r9, -60))); // ldrsw x1, [x9, -60]! + __ ldrd(v26, Address(__ pre(r23, -247))); // ldr d26, [x23, -247]! + __ ldrs(v22, Address(__ pre(r21, -127))); // ldr s22, [x21, -127]! + __ strd(v13, Address(__ pre(r7, -216))); // str d13, [x7, -216]! + __ strs(v12, Address(__ pre(r13, -104))); // str s12, [x13, -104]! // post // LoadStoreOp - __ str(r3, Address(__ post(r28, -94))); // str x3, [x28], -94 - __ strw(r11, Address(__ post(r7, -54))); // str w11, [x7], -54 - __ strb(r27, Address(__ post(r10, -24))); // strb w27, [x10], -24 - __ strh(r6, Address(__ post(r7, 27))); // strh w6, [x7], 27 - __ ldr(r13, Address(__ post(r10, -202))); // ldr x13, [x10], -202 - __ ldrw(r15, Address(__ post(r5, -41))); // ldr w15, [x5], -41 - __ ldrb(r2, Address(__ post(r13, 9))); // ldrb w2, [x13], 9 - __ ldrh(r28, Address(__ post(r13, -20))); // ldrh w28, [x13], -20 - __ ldrsb(r9, Address(__ post(r13, -31))); // ldrsb x9, [x13], -31 - __ ldrsh(r3, Address(__ post(r24, -36))); // ldrsh x3, [x24], -36 - __ ldrshw(r20, Address(__ post(r3, 6))); // ldrsh w20, [x3], 6 - __ ldrsw(r7, Address(__ post(r19, -1))); // ldrsw x7, [x19], -1 - __ ldrd(v30, Address(__ post(r8, -130))); // ldr d30, [x8], -130 - __ ldrs(v25, Address(__ post(r15, 21))); // ldr s25, [x15], 21 - __ strd(v14, Address(__ post(r23, 90))); // str d14, [x23], 90 - __ strs(v8, Address(__ post(r0, -33))); // str s8, [x0], -33 + __ str(r20, Address(__ post(r5, -237))); // str x20, [x5], -237 + __ strw(r29, Address(__ post(r28, -74))); // str w29, [x28], -74 + __ strb(r4, Address(__ post(r24, -22))); // strb w4, [x24], -22 + __ strh(r13, Address(__ post(r9, -21))); // strh w13, [x9], -21 + __ ldr(r26, Address(__ post(r7, -55))); // ldr x26, [x7], -55 + __ ldrw(r13, Address(__ post(r3, -115))); // ldr w13, [x3], -115 + __ ldrb(r1, Address(__ post(r5, 12))); // ldrb w1, [x5], 12 + __ ldrh(r8, Address(__ post(r13, -34))); // ldrh w8, [x13], -34 + __ ldrsb(r23, Address(__ post(r20, -27))); // ldrsb x23, [x20], -27 + __ ldrsh(r20, Address(__ post(r6, -2))); // ldrsh x20, [x6], -2 + __ ldrshw(r9, Address(__ post(r17, -42))); // ldrsh w9, [x17], -42 + __ ldrsw(r21, Address(__ post(r6, -30))); // ldrsw x21, [x6], -30 + __ ldrd(v16, Address(__ post(r22, -29))); // ldr d16, [x22], -29 + __ ldrs(v9, Address(__ post(r11, -3))); // ldr s9, [x11], -3 + __ strd(v22, Address(__ post(r26, 60))); // str d22, [x26], 60 + __ strs(v16, Address(__ post(r29, -2))); // str s16, [x29], -2 // base_plus_reg // LoadStoreOp - __ str(r10, Address(r17, r21, Address::sxtw(3))); // str x10, [x17, w21, sxtw #3] - __ strw(r4, Address(r13, r22, Address::sxtw(2))); // str w4, [x13, w22, sxtw #2] - __ strb(r13, Address(r0, r19, Address::uxtw(0))); // strb w13, [x0, w19, uxtw #0] - __ strh(r12, Address(r27, r6, Address::sxtw(0))); // strh w12, [x27, w6, sxtw #0] - __ ldr(r0, Address(r8, r16, Address::lsl(0))); // ldr x0, [x8, x16, lsl #0] - __ ldrw(r0, Address(r4, r26, Address::sxtx(0))); // ldr w0, [x4, x26, sxtx #0] - __ ldrb(r14, Address(r25, r5, Address::sxtw(0))); // ldrb w14, [x25, w5, sxtw #0] - __ ldrh(r9, Address(r4, r17, Address::uxtw(0))); // ldrh w9, [x4, w17, uxtw #0] - __ ldrsb(r27, Address(r4, r7, Address::lsl(0))); // ldrsb x27, [x4, x7, lsl #0] - __ ldrsh(r15, Address(r17, r30, Address::sxtw(0))); // ldrsh x15, [x17, w30, sxtw #0] - __ ldrshw(r16, Address(r0, r22, Address::sxtw(0))); // ldrsh w16, [x0, w22, sxtw #0] - __ ldrsw(r22, Address(r10, r30, Address::sxtx(2))); // ldrsw x22, [x10, x30, sxtx #2] - __ ldrd(v29, Address(r21, r10, Address::sxtx(3))); // ldr d29, [x21, x10, sxtx #3] - __ ldrs(v3, Address(r11, r19, Address::uxtw(0))); // ldr s3, [x11, w19, uxtw #0] - __ strd(v13, Address(r28, r29, Address::uxtw(3))); // str d13, [x28, w29, uxtw #3] - __ strs(v23, Address(r29, r5, Address::sxtx(2))); // str s23, [x29, x5, sxtx #2] + __ str(r1, Address(r22, r4, Address::sxtw(0))); // str x1, [x22, w4, sxtw #0] + __ strw(r23, Address(r30, r13, Address::lsl(2))); // str w23, [x30, x13, lsl #2] + __ strb(r12, Address(r11, r12, Address::uxtw(0))); // strb w12, [x11, w12, uxtw #0] + __ strh(r25, Address(r12, r0, Address::lsl(1))); // strh w25, [x12, x0, lsl #1] + __ ldr(r17, Address(r7, r0, Address::uxtw(3))); // ldr x17, [x7, w0, uxtw #3] + __ ldrw(r1, Address(r19, r14, Address::uxtw(2))); // ldr w1, [x19, w14, uxtw #2] + __ ldrb(r12, Address(r2, r9, Address::lsl(0))); // ldrb w12, [x2, x9, lsl #0] + __ ldrh(r22, Address(r9, r27, Address::sxtw(0))); // ldrh w22, [x9, w27, sxtw #0] + __ ldrsb(r21, Address(r12, r15, Address::sxtx(0))); // ldrsb x21, [x12, x15, sxtx #0] + __ ldrsh(r28, Address(r6, r16, Address::lsl(1))); // ldrsh x28, [x6, x16, lsl #1] + __ ldrshw(r25, Address(r17, r22, Address::sxtw(0))); // ldrsh w25, [x17, w22, sxtw #0] + __ ldrsw(r4, Address(r17, r29, Address::sxtx(0))); // ldrsw x4, [x17, x29, sxtx #0] + __ ldrd(v5, Address(r1, r3, Address::sxtx(3))); // ldr d5, [x1, x3, sxtx #3] + __ ldrs(v24, Address(r17, r13, Address::uxtw(2))); // ldr s24, [x17, w13, uxtw #2] + __ strd(v17, Address(r17, r23, Address::sxtx(3))); // str d17, [x17, x23, sxtx #3] + __ strs(v17, Address(r30, r5, Address::sxtw(2))); // str s17, [x30, w5, sxtw #2] // base_plus_scaled_offset // LoadStoreOp - __ str(r5, Address(r8, 12600)); // str x5, [x8, 12600] - __ strw(r29, Address(r24, 7880)); // str w29, [x24, 7880] - __ strb(r19, Address(r17, 1566)); // strb w19, [x17, 1566] - __ strh(r13, Address(r19, 3984)); // strh w13, [x19, 3984] - __ ldr(r19, Address(r23, 13632)); // ldr x19, [x23, 13632] - __ ldrw(r23, Address(r29, 6264)); // ldr w23, [x29, 6264] - __ ldrb(r22, Address(r11, 2012)); // ldrb w22, [x11, 2012] - __ ldrh(r3, Address(r10, 3784)); // ldrh w3, [x10, 3784] - __ ldrsb(r8, Address(r16, 1951)); // ldrsb x8, [x16, 1951] - __ ldrsh(r23, Address(r20, 3346)); // ldrsh x23, [x20, 3346] - __ ldrshw(r2, Address(r1, 3994)); // ldrsh w2, [x1, 3994] - __ ldrsw(r4, Address(r17, 7204)); // ldrsw x4, [x17, 7204] - __ ldrd(v20, Address(r27, 14400)); // ldr d20, [x27, 14400] - __ ldrs(v25, Address(r14, 8096)); // ldr s25, [x14, 8096] - __ strd(v26, Address(r10, 15024)); // str d26, [x10, 15024] - __ strs(v9, Address(r3, 6936)); // str s9, [x3, 6936] + __ str(r29, Address(r11, 14160)); // str x29, [x11, 14160] + __ strw(r28, Address(r21, 7752)); // str w28, [x21, 7752] + __ strb(r28, Address(r2, 1746)); // strb w28, [x2, 1746] + __ strh(r0, Address(r28, 3296)); // strh w0, [x28, 3296] + __ ldr(r25, Address(r7, 15408)); // ldr x25, [x7, 15408] + __ ldrw(r0, Address(r3, 6312)); // ldr w0, [x3, 6312] + __ ldrb(r30, Address(r5, 1992)); // ldrb w30, [x5, 1992] + __ ldrh(r14, Address(r23, 3194)); // ldrh w14, [x23, 3194] + __ ldrsb(r10, Address(r19, 1786)); // ldrsb x10, [x19, 1786] + __ ldrsh(r29, Address(r17, 3482)); // ldrsh x29, [x17, 3482] + __ ldrshw(r25, Address(r30, 3362)); // ldrsh w25, [x30, 3362] + __ ldrsw(r17, Address(r2, 7512)); // ldrsw x17, [x2, 7512] + __ ldrd(v15, Address(r16, 15176)); // ldr d15, [x16, 15176] + __ ldrs(v12, Address(r30, 6220)); // ldr s12, [x30, 6220] + __ strd(v1, Address(r1, 15216)); // str d1, [x1, 15216] + __ strs(v5, Address(r11, 7832)); // str s5, [x11, 7832] // pcrel // LoadStoreOp - __ ldr(r27, forth); // ldr x27, forth - __ ldrw(r11, __ pc()); // ldr w11, . + __ ldr(r17, back); // ldr x17, back + __ ldrw(r2, back); // ldr w2, back // LoadStoreOp - __ prfm(Address(r3, -187)); // prfm PLDL1KEEP, [x3, -187] + __ prfm(Address(r25, 111)); // prfm PLDL1KEEP, [x25, 111] // LoadStoreOp - __ prfm(__ pc()); // prfm PLDL1KEEP, . + __ prfm(back); // prfm PLDL1KEEP, back // LoadStoreOp - __ prfm(Address(r29, r14, Address::lsl(0))); // prfm PLDL1KEEP, [x29, x14, lsl #0] + __ prfm(Address(r14, r27, Address::uxtw(0))); // prfm PLDL1KEEP, [x14, w27, uxtw #0] // LoadStoreOp - __ prfm(Address(r4, 13312)); // prfm PLDL1KEEP, [x4, 13312] + __ prfm(Address(r14, 12328)); // prfm PLDL1KEEP, [x14, 12328] // AddSubCarryOp - __ adcw(r21, r1, r7); // adc w21, w1, w7 - __ adcsw(r8, r5, r7); // adcs w8, w5, w7 - __ sbcw(r7, r27, r14); // sbc w7, w27, w14 - __ sbcsw(r27, r4, r17); // sbcs w27, w4, w17 - __ adc(r0, r28, r0); // adc x0, x28, x0 - __ adcs(r12, r24, r30); // adcs x12, x24, x30 - __ sbc(r0, r25, r15); // sbc x0, x25, x15 - __ sbcs(r1, r24, r3); // sbcs x1, x24, x3 + __ adcw(r0, r25, r15); // adc w0, w25, w15 + __ adcsw(r1, r24, r3); // adcs w1, w24, w3 + __ sbcw(r17, r24, r20); // sbc w17, w24, w20 + __ sbcsw(r11, r0, r13); // sbcs w11, w0, w13 + __ adc(r28, r10, r7); // adc x28, x10, x7 + __ adcs(r4, r15, r16); // adcs x4, x15, x16 + __ sbc(r2, r12, r20); // sbc x2, x12, x20 + __ sbcs(r29, r13, r13); // sbcs x29, x13, x13 // AddSubExtendedOp - __ addw(r17, r24, r20, ext::uxtb, 2); // add w17, w24, w20, uxtb #2 - __ addsw(r13, r28, r10, ext::uxth, 1); // adds w13, w28, w10, uxth #1 - __ sub(r15, r16, r2, ext::sxth, 2); // sub x15, x16, x2, sxth #2 - __ subsw(r29, r13, r13, ext::uxth, 2); // subs w29, w13, w13, uxth #2 - __ add(r12, r20, r12, ext::sxtw, 3); // add x12, x20, x12, sxtw #3 - __ adds(r30, r27, r11, ext::sxtb, 1); // adds x30, x27, x11, sxtb #1 - __ sub(r14, r7, r1, ext::sxtw, 2); // sub x14, x7, x1, sxtw #2 - __ subs(r29, r3, r27, ext::sxth, 1); // subs x29, x3, x27, sxth #1 + __ addw(r14, r6, r12, ext::uxtx, 3); // add w14, w6, w12, uxtx #3 + __ addsw(r17, r25, r30, ext::uxtw, 4); // adds w17, w25, w30, uxtw #4 + __ sub(r0, r17, r14, ext::uxtb, 1); // sub x0, x17, x14, uxtb #1 + __ subsw(r9, r24, r29, ext::sxtx, 1); // subs w9, w24, w29, sxtx #1 + __ add(r1, r22, r0, ext::sxtw, 2); // add x1, x22, x0, sxtw #2 + __ adds(r12, r28, r22, ext::uxth, 3); // adds x12, x28, x22, uxth #3 + __ sub(r10, r12, r17, ext::uxtw, 4); // sub x10, x12, x17, uxtw #4 + __ subs(r15, r28, r10, ext::sxtw, 3); // subs x15, x28, x10, sxtw #3 // ConditionalCompareOp - __ ccmnw(r0, r13, 14u, Assembler::MI); // ccmn w0, w13, #14, MI - __ ccmpw(r22, r17, 6u, Assembler::CC); // ccmp w22, w17, #6, CC - __ ccmn(r17, r30, 14u, Assembler::VS); // ccmn x17, x30, #14, VS - __ ccmp(r10, r19, 12u, Assembler::HI); // ccmp x10, x19, #12, HI + __ ccmnw(r19, r23, 2u, Assembler::LE); // ccmn w19, w23, #2, LE + __ ccmpw(r17, r9, 6u, Assembler::LO); // ccmp w17, w9, #6, LO + __ ccmn(r21, r8, 2u, Assembler::CC); // ccmn x21, x8, #2, CC + __ ccmp(r19, r5, 1u, Assembler::MI); // ccmp x19, x5, #1, MI // ConditionalCompareImmedOp - __ ccmnw(r6, 18, 2, Assembler::LE); // ccmn w6, #18, #2, LE - __ ccmpw(r9, 13, 4, Assembler::HI); // ccmp w9, #13, #4, HI - __ ccmn(r21, 11, 11, Assembler::LO); // ccmn x21, #11, #11, LO - __ ccmp(r4, 13, 2, Assembler::VC); // ccmp x4, #13, #2, VC + __ ccmnw(r22, 17, 12, Assembler::HI); // ccmn w22, #17, #12, HI + __ ccmpw(r17, 7, 3, Assembler::HS); // ccmp w17, #7, #3, HS + __ ccmn(r16, 28, 5, Assembler::LT); // ccmn x16, #28, #5, LT + __ ccmp(r22, 3, 5, Assembler::LS); // ccmp x22, #3, #5, LS // ConditionalSelectOp - __ cselw(r12, r2, r22, Assembler::HI); // csel w12, w2, w22, HI - __ csincw(r24, r16, r17, Assembler::HS); // csinc w24, w16, w17, HS - __ csinvw(r6, r7, r16, Assembler::LT); // csinv w6, w7, w16, LT - __ csnegw(r11, r27, r22, Assembler::LS); // csneg w11, w27, w22, LS - __ csel(r10, r3, r29, Assembler::LT); // csel x10, x3, x29, LT - __ csinc(r12, r26, r27, Assembler::CC); // csinc x12, x26, x27, CC - __ csinv(r15, r10, r21, Assembler::GT); // csinv x15, x10, x21, GT - __ csneg(r30, r23, r9, Assembler::GT); // csneg x30, x23, x9, GT + __ cselw(r29, r26, r12, Assembler::LT); // csel w29, w26, w12, LT + __ csincw(r27, r10, r15, Assembler::CC); // csinc w27, w10, w15, CC + __ csinvw(r21, r28, r30, Assembler::LS); // csinv w21, w28, w30, LS + __ csnegw(r9, r27, r30, Assembler::CC); // csneg w9, w27, w30, CC + __ csel(r29, r15, r29, Assembler::LE); // csel x29, x15, x29, LE + __ csinc(r25, r21, r4, Assembler::EQ); // csinc x25, x21, x4, EQ + __ csinv(r17, r21, r29, Assembler::VS); // csinv x17, x21, x29, VS + __ csneg(r21, r20, r6, Assembler::HI); // csneg x21, x20, x6, HI // TwoRegOp - __ rbitw(r30, r10); // rbit w30, w10 - __ rev16w(r29, r15); // rev16 w29, w15 - __ revw(r29, r30); // rev w29, w30 - __ clzw(r25, r21); // clz w25, w21 - __ clsw(r4, r0); // cls w4, w0 - __ rbit(r17, r21); // rbit x17, x21 - __ rev16(r29, r16); // rev16 x29, x16 - __ rev32(r21, r20); // rev32 x21, x20 - __ rev(r6, r19); // rev x6, x19 - __ clz(r30, r3); // clz x30, x3 - __ cls(r21, r19); // cls x21, x19 + __ rbitw(r30, r3); // rbit w30, w3 + __ rev16w(r21, r19); // rev16 w21, w19 + __ revw(r11, r24); // rev w11, w24 + __ clzw(r0, r27); // clz w0, w27 + __ clsw(r25, r14); // cls w25, w14 + __ rbit(r3, r14); // rbit x3, x14 + __ rev16(r17, r7); // rev16 x17, x7 + __ rev32(r15, r24); // rev32 x15, x24 + __ rev(r28, r17); // rev x28, x17 + __ clz(r25, r2); // clz x25, x2 + __ cls(r26, r28); // cls x26, x28 + __ pacia(r5, r25); // pacia x5, x25 + __ pacib(r26, r27); // pacib x26, x27 + __ pacda(r16, r17); // pacda x16, x17 + __ pacdb(r6, r21); // pacdb x6, x21 + __ autia(r12, r0); // autia x12, x0 + __ autib(r4, r12); // autib x4, x12 + __ autda(r27, r17); // autda x27, x17 + __ autdb(r28, r28); // autdb x28, x28 + __ braa(r2, r17); // braa x2, x17 + __ brab(r10, r15); // brab x10, x15 + __ blraa(r14, r14); // blraa x14, x14 + __ blrab(r3, r25); // blrab x3, x25 // ThreeRegOp - __ udivw(r11, r24, r0); // udiv w11, w24, w0 - __ sdivw(r27, r25, r14); // sdiv w27, w25, w14 - __ lslvw(r3, r14, r17); // lslv w3, w14, w17 - __ lsrvw(r7, r15, r24); // lsrv w7, w15, w24 - __ asrvw(r28, r17, r25); // asrv w28, w17, w25 - __ rorvw(r2, r26, r28); // rorv w2, w26, w28 - __ udiv(r5, r25, r26); // udiv x5, x25, x26 - __ sdiv(r27, r16, r17); // sdiv x27, x16, x17 - __ lslv(r6, r21, r12); // lslv x6, x21, x12 - __ lsrv(r0, r4, r12); // lsrv x0, x4, x12 - __ asrv(r27, r17, r28); // asrv x27, x17, x28 - __ rorv(r28, r2, r17); // rorv x28, x2, x17 - __ umulh(r10, r15, r14); // umulh x10, x15, x14 - __ smulh(r14, r3, r25); // smulh x14, x3, x25 + __ udivw(r15, r19, r14); // udiv w15, w19, w14 + __ sdivw(r5, r16, r4); // sdiv w5, w16, w4 + __ lslvw(r26, r25, r4); // lslv w26, w25, w4 + __ lsrvw(r2, r2, r12); // lsrv w2, w2, w12 + __ asrvw(r29, r17, r8); // asrv w29, w17, w8 + __ rorvw(r7, r3, r4); // rorv w7, w3, w4 + __ udiv(r25, r4, r26); // udiv x25, x4, x26 + __ sdiv(r25, r4, r17); // sdiv x25, x4, x17 + __ lslv(r0, r26, r17); // lslv x0, x26, x17 + __ lsrv(r23, r15, r21); // lsrv x23, x15, x21 + __ asrv(r28, r17, r27); // asrv x28, x17, x27 + __ rorv(r10, r3, r0); // rorv x10, x3, x0 + __ umulh(r7, r25, r9); // umulh x7, x25, x9 + __ smulh(r6, r15, r29); // smulh x6, x15, x29 // FourRegMulOp - __ maddw(r15, r19, r14, r5); // madd w15, w19, w14, w5 - __ msubw(r16, r4, r26, r25); // msub w16, w4, w26, w25 - __ madd(r4, r2, r2, r12); // madd x4, x2, x2, x12 - __ msub(r29, r17, r8, r7); // msub x29, x17, x8, x7 - __ smaddl(r3, r4, r25, r4); // smaddl x3, w4, w25, x4 - __ smsubl(r26, r25, r4, r17); // smsubl x26, w25, w4, x17 - __ umaddl(r0, r26, r17, r23); // umaddl x0, w26, w17, x23 - __ umsubl(r15, r21, r28, r17); // umsubl x15, w21, w28, x17 + __ maddw(r15, r10, r2, r17); // madd w15, w10, w2, w17 + __ msubw(r7, r11, r11, r23); // msub w7, w11, w11, w23 + __ madd(r7, r29, r23, r14); // madd x7, x29, x23, x14 + __ msub(r27, r11, r11, r4); // msub x27, x11, x11, x4 + __ smaddl(r24, r12, r15, r14); // smaddl x24, w12, w15, x14 + __ smsubl(r20, r11, r28, r13); // smsubl x20, w11, w28, x13 + __ umaddl(r11, r12, r23, r30); // umaddl x11, w12, w23, x30 + __ umsubl(r26, r14, r9, r13); // umsubl x26, w14, w9, x13 // ThreeRegFloatOp - __ fabds(v27, v10, v3); // fabd s27, s10, s3 - __ fmuls(v0, v7, v25); // fmul s0, s7, s25 - __ fdivs(v9, v6, v15); // fdiv s9, s6, s15 - __ fadds(v29, v15, v10); // fadd s29, s15, s10 - __ fsubs(v2, v17, v7); // fsub s2, s17, s7 - __ fabdd(v11, v11, v23); // fabd d11, d11, d23 - __ fmuld(v7, v29, v23); // fmul d7, d29, d23 - __ fdivd(v14, v27, v11); // fdiv d14, d27, d11 - __ faddd(v11, v4, v24); // fadd d11, d4, d24 - __ fsubd(v12, v15, v14); // fsub d12, d15, d14 + __ fabds(v10, v7, v5); // fabd s10, s7, s5 + __ fmuls(v29, v15, v3); // fmul s29, s15, s3 + __ fdivs(v11, v12, v15); // fdiv s11, s12, s15 + __ fadds(v30, v30, v17); // fadd s30, s30, s17 + __ fsubs(v19, v20, v15); // fsub s19, s20, s15 + __ fabdd(v15, v9, v21); // fabd d15, d9, d21 + __ fmuld(v2, v9, v27); // fmul d2, d9, d27 + __ fdivd(v7, v29, v30); // fdiv d7, d29, d30 + __ faddd(v17, v1, v2); // fadd d17, d1, d2 + __ fsubd(v6, v10, v3); // fsub d6, d10, d3 // FourRegFloatOp - __ fmadds(v20, v11, v28, v13); // fmadd s20, s11, s28, s13 - __ fmsubs(v11, v12, v23, v30); // fmsub s11, s12, s23, s30 - __ fnmadds(v26, v14, v9, v13); // fnmadd s26, s14, s9, s13 - __ fnmadds(v10, v7, v5, v29); // fnmadd s10, s7, s5, s29 - __ fmaddd(v15, v3, v11, v12); // fmadd d15, d3, d11, d12 - __ fmsubd(v15, v30, v30, v17); // fmsub d15, d30, d30, d17 - __ fnmaddd(v19, v20, v15, v15); // fnmadd d19, d20, d15, d15 - __ fnmaddd(v9, v21, v2, v9); // fnmadd d9, d21, d2, d9 + __ fmadds(v24, v11, v7, v1); // fmadd s24, s11, s7, s1 + __ fmsubs(v11, v0, v3, v17); // fmsub s11, s0, s3, s17 + __ fnmadds(v28, v6, v22, v6); // fnmadd s28, s6, s22, s6 + __ fnmadds(v0, v27, v26, v2); // fnmadd s0, s27, s26, s2 + __ fmaddd(v5, v7, v28, v11); // fmadd d5, d7, d28, d11 + __ fmsubd(v25, v13, v11, v23); // fmsub d25, d13, d11, d23 + __ fnmaddd(v19, v8, v17, v21); // fnmadd d19, d8, d17, d21 + __ fnmaddd(v25, v20, v19, v17); // fnmadd d25, d20, d19, d17 // TwoRegFloatOp - __ fmovs(v27, v7); // fmov s27, s7 - __ fabss(v29, v30); // fabs s29, s30 - __ fnegs(v17, v1); // fneg s17, s1 - __ fsqrts(v2, v6); // fsqrt s2, s6 - __ fcvts(v10, v3); // fcvt d10, s3 - __ fmovd(v24, v11); // fmov d24, d11 - __ fabsd(v7, v1); // fabs d7, d1 - __ fnegd(v11, v0); // fneg d11, d0 - __ fsqrtd(v3, v17); // fsqrt d3, d17 - __ fcvtd(v28, v6); // fcvt s28, d6 + __ fmovs(v2, v29); // fmov s2, s29 + __ fabss(v22, v8); // fabs s22, s8 + __ fnegs(v21, v19); // fneg s21, s19 + __ fsqrts(v20, v11); // fsqrt s20, s11 + __ fcvts(v17, v20); // fcvt d17, s20 + __ fmovd(v6, v15); // fmov d6, d15 + __ fabsd(v3, v3); // fabs d3, d3 + __ fnegd(v28, v3); // fneg d28, d3 + __ fsqrtd(v27, v14); // fsqrt d27, d14 + __ fcvtd(v14, v10); // fcvt s14, d10 // FloatConvertOp - __ fcvtzsw(r22, v6); // fcvtzs w22, s6 - __ fcvtzs(r0, v27); // fcvtzs x0, s27 - __ fcvtzdw(r26, v2); // fcvtzs w26, d2 - __ fcvtzd(r5, v7); // fcvtzs x5, d7 - __ scvtfws(v28, r11); // scvtf s28, w11 - __ scvtfs(v25, r13); // scvtf s25, x13 - __ scvtfwd(v11, r23); // scvtf d11, w23 - __ scvtfd(v19, r8); // scvtf d19, x8 - __ fmovs(r17, v21); // fmov w17, s21 - __ fmovd(r25, v20); // fmov x25, d20 - __ fmovs(v19, r17); // fmov s19, w17 - __ fmovd(v2, r29); // fmov d2, x29 + __ fcvtzsw(r12, v11); // fcvtzs w12, s11 + __ fcvtzs(r17, v10); // fcvtzs x17, s10 + __ fcvtzdw(r25, v7); // fcvtzs w25, d7 + __ fcvtzd(r7, v14); // fcvtzs x7, d14 + __ scvtfws(v28, r0); // scvtf s28, w0 + __ scvtfs(v22, r0); // scvtf s22, x0 + __ scvtfwd(v12, r23); // scvtf d12, w23 + __ scvtfd(v13, r13); // scvtf d13, x13 + __ fmovs(r7, v14); // fmov w7, s14 + __ fmovd(r7, v8); // fmov x7, d8 + __ fmovs(v20, r17); // fmov s20, w17 + __ fmovd(v28, r30); // fmov d28, x30 // TwoRegFloatOp - __ fcmps(v22, v8); // fcmp s22, s8 - __ fcmpd(v21, v19); // fcmp d21, d19 + __ fcmps(v16, v2); // fcmp s16, s2 + __ fcmpd(v9, v16); // fcmp d9, d16 __ fcmps(v20, 0.0); // fcmp s20, #0.0 - __ fcmpd(v11, 0.0); // fcmp d11, #0.0 + __ fcmpd(v29, 0.0); // fcmp d29, #0.0 // LoadStorePairOp - __ stpw(r20, r6, Address(r15, -32)); // stp w20, w6, [x15, #-32] - __ ldpw(r27, r14, Address(r3, -208)); // ldp w27, w14, [x3, #-208] - __ ldpsw(r16, r10, Address(r11, -80)); // ldpsw x16, x10, [x11, #-80] - __ stp(r7, r7, Address(r14, 64)); // stp x7, x7, [x14, #64] - __ ldp(r12, r23, Address(r0, 112)); // ldp x12, x23, [x0, #112] + __ stpw(r1, r26, Address(r24, -208)); // stp w1, w26, [x24, #-208] + __ ldpw(r5, r11, Address(r12, 48)); // ldp w5, w11, [x12, #48] + __ ldpsw(r21, r15, Address(r27, 48)); // ldpsw x21, x15, [x27, #48] + __ stp(r5, r28, Address(r22, 32)); // stp x5, x28, [x22, #32] + __ ldp(r27, r17, Address(r19, -32)); // ldp x27, x17, [x19, #-32] // LoadStorePairOp - __ stpw(r13, r7, Address(__ pre(r6, -80))); // stp w13, w7, [x6, #-80]! - __ ldpw(r30, r15, Address(__ pre(r2, -144))); // ldp w30, w15, [x2, #-144]! - __ ldpsw(r4, r1, Address(__ pre(r27, -144))); // ldpsw x4, x1, [x27, #-144]! - __ stp(r23, r14, Address(__ pre(r11, 64))); // stp x23, x14, [x11, #64]! - __ ldp(r29, r27, Address(__ pre(r21, -192))); // ldp x29, x27, [x21, #-192]! + __ stpw(r13, r7, Address(__ pre(r26, -176))); // stp w13, w7, [x26, #-176]! + __ ldpw(r13, r21, Address(__ pre(r6, -48))); // ldp w13, w21, [x6, #-48]! + __ ldpsw(r20, r30, Address(__ pre(r27, 16))); // ldpsw x20, x30, [x27, #16]! + __ stp(r21, r5, Address(__ pre(r10, -128))); // stp x21, x5, [x10, #-128]! + __ ldp(r14, r4, Address(__ pre(r23, -96))); // ldp x14, x4, [x23, #-96]! // LoadStorePairOp - __ stpw(r22, r5, Address(__ post(r21, -48))); // stp w22, w5, [x21], #-48 - __ ldpw(r27, r17, Address(__ post(r6, -32))); // ldp w27, w17, [x6], #-32 - __ ldpsw(r16, r5, Address(__ post(r1, -80))); // ldpsw x16, x5, [x1], #-80 - __ stp(r13, r20, Address(__ post(r22, -208))); // stp x13, x20, [x22], #-208 - __ ldp(r30, r27, Address(__ post(r10, 80))); // ldp x30, x27, [x10], #80 + __ stpw(r29, r12, Address(__ post(r16, 32))); // stp w29, w12, [x16], #32 + __ ldpw(r26, r17, Address(__ post(r27, 96))); // ldp w26, w17, [x27], #96 + __ ldpsw(r4, r20, Address(__ post(r14, -96))); // ldpsw x4, x20, [x14], #-96 + __ stp(r16, r2, Address(__ post(r14, -112))); // stp x16, x2, [x14], #-112 + __ ldp(r23, r24, Address(__ post(r7, -256))); // ldp x23, x24, [x7], #-256 // LoadStorePairOp - __ stnpw(r5, r17, Address(r11, 16)); // stnp w5, w17, [x11, #16] - __ ldnpw(r14, r4, Address(r26, -96)); // ldnp w14, w4, [x26, #-96] - __ stnp(r23, r29, Address(r12, 32)); // stnp x23, x29, [x12, #32] - __ ldnp(r0, r6, Address(r21, -80)); // ldnp x0, x6, [x21, #-80] + __ stnpw(r0, r26, Address(r15, 128)); // stnp w0, w26, [x15, #128] + __ ldnpw(r26, r6, Address(r8, -208)); // ldnp w26, w6, [x8, #-208] + __ stnp(r15, r10, Address(r25, -112)); // stnp x15, x10, [x25, #-112] + __ ldnp(r16, r1, Address(r19, -160)); // ldnp x16, x1, [x19, #-160] // LdStNEONOp - __ ld1(v15, __ T8B, Address(r26)); // ld1 {v15.8B}, [x26] - __ ld1(v23, v24, __ T16B, Address(__ post(r11, 32))); // ld1 {v23.16B, v24.16B}, [x11], 32 - __ ld1(v8, v9, v10, __ T1D, Address(__ post(r23, r7))); // ld1 {v8.1D, v9.1D, v10.1D}, [x23], x7 - __ ld1(v19, v20, v21, v22, __ T8H, Address(__ post(r25, 64))); // ld1 {v19.8H, v20.8H, v21.8H, v22.8H}, [x25], 64 - __ ld1r(v29, __ T8B, Address(r17)); // ld1r {v29.8B}, [x17] - __ ld1r(v24, __ T4S, Address(__ post(r23, 4))); // ld1r {v24.4S}, [x23], 4 - __ ld1r(v10, __ T1D, Address(__ post(r5, r25))); // ld1r {v10.1D}, [x5], x25 - __ ld2(v17, v18, __ T2D, Address(r10)); // ld2 {v17.2D, v18.2D}, [x10] - __ ld2(v12, v13, __ T4H, Address(__ post(r15, 16))); // ld2 {v12.4H, v13.4H}, [x15], 16 - __ ld2r(v25, v26, __ T16B, Address(r17)); // ld2r {v25.16B, v26.16B}, [x17] - __ ld2r(v1, v2, __ T2S, Address(__ post(r30, 8))); // ld2r {v1.2S, v2.2S}, [x30], 8 - __ ld2r(v16, v17, __ T2D, Address(__ post(r17, r9))); // ld2r {v16.2D, v17.2D}, [x17], x9 - __ ld3(v25, v26, v27, __ T4S, Address(__ post(r12, r2))); // ld3 {v25.4S, v26.4S, v27.4S}, [x12], x2 - __ ld3(v26, v27, v28, __ T2S, Address(r19)); // ld3 {v26.2S, v27.2S, v28.2S}, [x19] - __ ld3r(v15, v16, v17, __ T8H, Address(r21)); // ld3r {v15.8H, v16.8H, v17.8H}, [x21] - __ ld3r(v25, v26, v27, __ T4S, Address(__ post(r13, 12))); // ld3r {v25.4S, v26.4S, v27.4S}, [x13], 12 - __ ld3r(v14, v15, v16, __ T1D, Address(__ post(r28, r29))); // ld3r {v14.1D, v15.1D, v16.1D}, [x28], x29 - __ ld4(v17, v18, v19, v20, __ T8H, Address(__ post(r29, 64))); // ld4 {v17.8H, v18.8H, v19.8H, v20.8H}, [x29], 64 - __ ld4(v27, v28, v29, v30, __ T8B, Address(__ post(r7, r0))); // ld4 {v27.8B, v28.8B, v29.8B, v30.8B}, [x7], x0 - __ ld4r(v24, v25, v26, v27, __ T8B, Address(r17)); // ld4r {v24.8B, v25.8B, v26.8B, v27.8B}, [x17] - __ ld4r(v0, v1, v2, v3, __ T4H, Address(__ post(r26, 8))); // ld4r {v0.4H, v1.4H, v2.4H, v3.4H}, [x26], 8 - __ ld4r(v12, v13, v14, v15, __ T2S, Address(__ post(r25, r2))); // ld4r {v12.2S, v13.2S, v14.2S, v15.2S}, [x25], x2 + __ ld1(v27, __ T8B, Address(r30)); // ld1 {v27.8B}, [x30] + __ ld1(v25, v26, __ T16B, Address(__ post(r3, 32))); // ld1 {v25.16B, v26.16B}, [x3], 32 + __ ld1(v30, v31, v0, __ T1D, Address(__ post(r16, r10))); // ld1 {v30.1D, v31.1D, v0.1D}, [x16], x10 + __ ld1(v16, v17, v18, v19, __ T8H, Address(__ post(r19, 64))); // ld1 {v16.8H, v17.8H, v18.8H, v19.8H}, [x19], 64 + __ ld1r(v23, __ T8B, Address(r24)); // ld1r {v23.8B}, [x24] + __ ld1r(v8, __ T4S, Address(__ post(r10, 4))); // ld1r {v8.4S}, [x10], 4 + __ ld1r(v9, __ T1D, Address(__ post(r20, r23))); // ld1r {v9.1D}, [x20], x23 + __ ld2(v2, v3, __ T2D, Address(r3)); // ld2 {v2.2D, v3.2D}, [x3] + __ ld2(v8, v9, __ T4H, Address(__ post(r30, 16))); // ld2 {v8.4H, v9.4H}, [x30], 16 + __ ld2r(v4, v5, __ T16B, Address(r26)); // ld2r {v4.16B, v5.16B}, [x26] + __ ld2r(v3, v4, __ T2S, Address(__ post(r17, 8))); // ld2r {v3.2S, v4.2S}, [x17], 8 + __ ld2r(v29, v30, __ T2D, Address(__ post(r11, r16))); // ld2r {v29.2D, v30.2D}, [x11], x16 + __ ld3(v1, v2, v3, __ T4S, Address(__ post(r0, r23))); // ld3 {v1.4S, v2.4S, v3.4S}, [x0], x23 + __ ld3(v0, v1, v2, __ T2S, Address(r21)); // ld3 {v0.2S, v1.2S, v2.2S}, [x21] + __ ld3r(v5, v6, v7, __ T8H, Address(r7)); // ld3r {v5.8H, v6.8H, v7.8H}, [x7] + __ ld3r(v1, v2, v3, __ T4S, Address(__ post(r7, 12))); // ld3r {v1.4S, v2.4S, v3.4S}, [x7], 12 + __ ld3r(v2, v3, v4, __ T1D, Address(__ post(r5, r15))); // ld3r {v2.1D, v3.1D, v4.1D}, [x5], x15 + __ ld4(v27, v28, v29, v30, __ T8H, Address(__ post(r29, 64))); // ld4 {v27.8H, v28.8H, v29.8H, v30.8H}, [x29], 64 + __ ld4(v24, v25, v26, v27, __ T8B, Address(__ post(r4, r7))); // ld4 {v24.8B, v25.8B, v26.8B, v27.8B}, [x4], x7 + __ ld4r(v15, v16, v17, v18, __ T8B, Address(r23)); // ld4r {v15.8B, v16.8B, v17.8B, v18.8B}, [x23] + __ ld4r(v14, v15, v16, v17, __ T4H, Address(__ post(r21, 8))); // ld4r {v14.4H, v15.4H, v16.4H, v17.4H}, [x21], 8 + __ ld4r(v20, v21, v22, v23, __ T2S, Address(__ post(r9, r25))); // ld4r {v20.2S, v21.2S, v22.2S, v23.2S}, [x9], x25 // NEONReduceInstruction - __ addv(v22, __ T8B, v23); // addv b22, v23.8B - __ addv(v27, __ T16B, v28); // addv b27, v28.16B - __ addv(v4, __ T4H, v5); // addv h4, v5.4H - __ addv(v7, __ T8H, v8); // addv h7, v8.8H - __ addv(v6, __ T4S, v7); // addv s6, v7.4S - __ smaxv(v1, __ T8B, v2); // smaxv b1, v2.8B - __ smaxv(v26, __ T16B, v27); // smaxv b26, v27.16B - __ smaxv(v15, __ T4H, v16); // smaxv h15, v16.4H - __ smaxv(v2, __ T8H, v3); // smaxv h2, v3.8H - __ smaxv(v13, __ T4S, v14); // smaxv s13, v14.4S - __ fmaxv(v13, __ T4S, v14); // fmaxv s13, v14.4S - __ sminv(v24, __ T8B, v25); // sminv b24, v25.8B - __ uminv(v23, __ T8B, v24); // uminv b23, v24.8B - __ sminv(v4, __ T16B, v5); // sminv b4, v5.16B - __ uminv(v19, __ T16B, v20); // uminv b19, v20.16B - __ sminv(v15, __ T4H, v16); // sminv h15, v16.4H - __ uminv(v0, __ T4H, v1); // uminv h0, v1.4H - __ sminv(v4, __ T8H, v5); // sminv h4, v5.8H - __ uminv(v20, __ T8H, v21); // uminv h20, v21.8H - __ sminv(v11, __ T4S, v12); // sminv s11, v12.4S - __ uminv(v29, __ T4S, v30); // uminv s29, v30.4S - __ fminv(v15, __ T4S, v16); // fminv s15, v16.4S - __ fmaxp(v21, v22, __ S); // fmaxp s21, v22.2S - __ fmaxp(v4, v5, __ D); // fmaxp d4, v5.2D - __ fminp(v14, v15, __ S); // fminp s14, v15.2S - __ fminp(v22, v23, __ D); // fminp d22, v23.2D + __ addv(v23, __ T8B, v24); // addv b23, v24.8B + __ addv(v26, __ T16B, v27); // addv b26, v27.16B + __ addv(v5, __ T4H, v6); // addv h5, v6.4H + __ addv(v6, __ T8H, v7); // addv h6, v7.8H + __ addv(v15, __ T4S, v16); // addv s15, v16.4S + __ smaxv(v15, __ T8B, v16); // smaxv b15, v16.8B + __ smaxv(v25, __ T16B, v26); // smaxv b25, v26.16B + __ smaxv(v16, __ T4H, v17); // smaxv h16, v17.4H + __ smaxv(v27, __ T8H, v28); // smaxv h27, v28.8H + __ smaxv(v24, __ T4S, v25); // smaxv s24, v25.4S + __ fmaxv(v15, __ T4S, v16); // fmaxv s15, v16.4S + __ sminv(v25, __ T8B, v26); // sminv b25, v26.8B + __ uminv(v14, __ T8B, v15); // uminv b14, v15.8B + __ sminv(v10, __ T16B, v11); // sminv b10, v11.16B + __ uminv(v13, __ T16B, v14); // uminv b13, v14.16B + __ sminv(v14, __ T4H, v15); // sminv h14, v15.4H + __ uminv(v20, __ T4H, v21); // uminv h20, v21.4H + __ sminv(v1, __ T8H, v2); // sminv h1, v2.8H + __ uminv(v22, __ T8H, v23); // uminv h22, v23.8H + __ sminv(v30, __ T4S, v31); // sminv s30, v31.4S + __ uminv(v14, __ T4S, v15); // uminv s14, v15.4S + __ fminv(v2, __ T4S, v3); // fminv s2, v3.4S + __ fmaxp(v6, v7, __ S); // fmaxp s6, v7.2S + __ fmaxp(v3, v4, __ D); // fmaxp d3, v4.2D + __ fminp(v7, v8, __ S); // fminp s7, v8.2S + __ fminp(v24, v25, __ D); // fminp d24, v25.2D // TwoRegNEONOp - __ absr(v25, __ T8B, v26); // abs v25.8B, v26.8B - __ absr(v6, __ T16B, v7); // abs v6.16B, v7.16B - __ absr(v12, __ T4H, v13); // abs v12.4H, v13.4H - __ absr(v14, __ T8H, v15); // abs v14.8H, v15.8H - __ absr(v13, __ T2S, v14); // abs v13.2S, v14.2S - __ absr(v14, __ T4S, v15); // abs v14.4S, v15.4S - __ absr(v9, __ T2D, v10); // abs v9.2D, v10.2D + __ absr(v0, __ T8B, v1); // abs v0.8B, v1.8B + __ absr(v27, __ T16B, v28); // abs v27.16B, v28.16B + __ absr(v29, __ T4H, v30); // abs v29.4H, v30.4H + __ absr(v5, __ T8H, v6); // abs v5.8H, v6.8H + __ absr(v5, __ T2S, v6); // abs v5.2S, v6.2S + __ absr(v29, __ T4S, v30); // abs v29.4S, v30.4S + __ absr(v11, __ T2D, v12); // abs v11.2D, v12.2D __ fabs(v25, __ T2S, v26); // fabs v25.2S, v26.2S - __ fabs(v28, __ T4S, v29); // fabs v28.4S, v29.4S - __ fabs(v10, __ T2D, v11); // fabs v10.2D, v11.2D - __ fneg(v19, __ T2S, v20); // fneg v19.2S, v20.2S - __ fneg(v11, __ T4S, v12); // fneg v11.4S, v12.4S - __ fneg(v17, __ T2D, v18); // fneg v17.2D, v18.2D - __ fsqrt(v21, __ T2S, v22); // fsqrt v21.2S, v22.2S - __ fsqrt(v15, __ T4S, v16); // fsqrt v15.4S, v16.4S - __ fsqrt(v20, __ T2D, v21); // fsqrt v20.2D, v21.2D - __ notr(v23, __ T8B, v24); // not v23.8B, v24.8B - __ notr(v26, __ T16B, v27); // not v26.16B, v27.16B + __ fabs(v0, __ T4S, v1); // fabs v0.4S, v1.4S + __ fabs(v30, __ T2D, v31); // fabs v30.2D, v31.2D + __ fneg(v0, __ T2S, v1); // fneg v0.2S, v1.2S + __ fneg(v17, __ T4S, v18); // fneg v17.4S, v18.4S + __ fneg(v28, __ T2D, v29); // fneg v28.2D, v29.2D + __ fsqrt(v25, __ T2S, v26); // fsqrt v25.2S, v26.2S + __ fsqrt(v9, __ T4S, v10); // fsqrt v9.4S, v10.4S + __ fsqrt(v25, __ T2D, v26); // fsqrt v25.2D, v26.2D + __ notr(v12, __ T8B, v13); // not v12.8B, v13.8B + __ notr(v15, __ T16B, v16); // not v15.16B, v16.16B // ThreeRegNEONOp - __ andr(v5, __ T8B, v6, v7); // and v5.8B, v6.8B, v7.8B - __ andr(v6, __ T16B, v7, v8); // and v6.16B, v7.16B, v8.16B - __ orr(v15, __ T8B, v16, v17); // orr v15.8B, v16.8B, v17.8B - __ orr(v15, __ T16B, v16, v17); // orr v15.16B, v16.16B, v17.16B - __ eor(v25, __ T8B, v26, v27); // eor v25.8B, v26.8B, v27.8B - __ eor(v16, __ T16B, v17, v18); // eor v16.16B, v17.16B, v18.16B - __ addv(v27, __ T8B, v28, v29); // add v27.8B, v28.8B, v29.8B - __ addv(v24, __ T16B, v25, v26); // add v24.16B, v25.16B, v26.16B - __ addv(v15, __ T4H, v16, v17); // add v15.4H, v16.4H, v17.4H - __ addv(v25, __ T8H, v26, v27); // add v25.8H, v26.8H, v27.8H - __ addv(v14, __ T2S, v15, v16); // add v14.2S, v15.2S, v16.2S - __ addv(v10, __ T4S, v11, v12); // add v10.4S, v11.4S, v12.4S - __ addv(v13, __ T2D, v14, v15); // add v13.2D, v14.2D, v15.2D - __ fadd(v14, __ T2S, v15, v16); // fadd v14.2S, v15.2S, v16.2S + __ andr(v11, __ T8B, v12, v13); // and v11.8B, v12.8B, v13.8B + __ andr(v10, __ T16B, v11, v12); // and v10.16B, v11.16B, v12.16B + __ orr(v17, __ T8B, v18, v19); // orr v17.8B, v18.8B, v19.8B + __ orr(v24, __ T16B, v25, v26); // orr v24.16B, v25.16B, v26.16B + __ eor(v21, __ T8B, v22, v23); // eor v21.8B, v22.8B, v23.8B + __ eor(v23, __ T16B, v24, v25); // eor v23.16B, v24.16B, v25.16B + __ addv(v0, __ T8B, v1, v2); // add v0.8B, v1.8B, v2.8B + __ addv(v16, __ T16B, v17, v18); // add v16.16B, v17.16B, v18.16B + __ addv(v10, __ T4H, v11, v12); // add v10.4H, v11.4H, v12.4H + __ addv(v6, __ T8H, v7, v8); // add v6.8H, v7.8H, v8.8H + __ addv(v28, __ T2S, v29, v30); // add v28.2S, v29.2S, v30.2S + __ addv(v6, __ T4S, v7, v8); // add v6.4S, v7.4S, v8.4S + __ addv(v5, __ T2D, v6, v7); // add v5.2D, v6.2D, v7.2D + __ fadd(v5, __ T2S, v6, v7); // fadd v5.2S, v6.2S, v7.2S __ fadd(v20, __ T4S, v21, v22); // fadd v20.4S, v21.4S, v22.4S - __ fadd(v1, __ T2D, v2, v3); // fadd v1.2D, v2.2D, v3.2D - __ subv(v22, __ T8B, v23, v24); // sub v22.8B, v23.8B, v24.8B - __ subv(v30, __ T16B, v31, v0); // sub v30.16B, v31.16B, v0.16B - __ subv(v14, __ T4H, v15, v16); // sub v14.4H, v15.4H, v16.4H - __ subv(v2, __ T8H, v3, v4); // sub v2.8H, v3.8H, v4.8H - __ subv(v6, __ T2S, v7, v8); // sub v6.2S, v7.2S, v8.2S - __ subv(v3, __ T4S, v4, v5); // sub v3.4S, v4.4S, v5.4S - __ subv(v7, __ T2D, v8, v9); // sub v7.2D, v8.2D, v9.2D - __ fsub(v24, __ T2S, v25, v26); // fsub v24.2S, v25.2S, v26.2S - __ fsub(v0, __ T4S, v1, v2); // fsub v0.4S, v1.4S, v2.4S - __ fsub(v27, __ T2D, v28, v29); // fsub v27.2D, v28.2D, v29.2D - __ mulv(v29, __ T8B, v30, v31); // mul v29.8B, v30.8B, v31.8B - __ mulv(v5, __ T16B, v6, v7); // mul v5.16B, v6.16B, v7.16B - __ mulv(v5, __ T4H, v6, v7); // mul v5.4H, v6.4H, v7.4H - __ mulv(v29, __ T8H, v30, v31); // mul v29.8H, v30.8H, v31.8H - __ mulv(v11, __ T2S, v12, v13); // mul v11.2S, v12.2S, v13.2S - __ mulv(v25, __ T4S, v26, v27); // mul v25.4S, v26.4S, v27.4S - __ fabd(v0, __ T2S, v1, v2); // fabd v0.2S, v1.2S, v2.2S - __ fabd(v30, __ T4S, v31, v0); // fabd v30.4S, v31.4S, v0.4S - __ fabd(v0, __ T2D, v1, v2); // fabd v0.2D, v1.2D, v2.2D - __ fmul(v17, __ T2S, v18, v19); // fmul v17.2S, v18.2S, v19.2S - __ fmul(v28, __ T4S, v29, v30); // fmul v28.4S, v29.4S, v30.4S - __ fmul(v25, __ T2D, v26, v27); // fmul v25.2D, v26.2D, v27.2D - __ mlav(v9, __ T4H, v10, v11); // mla v9.4H, v10.4H, v11.4H - __ mlav(v25, __ T8H, v26, v27); // mla v25.8H, v26.8H, v27.8H - __ mlav(v12, __ T2S, v13, v14); // mla v12.2S, v13.2S, v14.2S - __ mlav(v15, __ T4S, v16, v17); // mla v15.4S, v16.4S, v17.4S - __ fmla(v11, __ T2S, v12, v13); // fmla v11.2S, v12.2S, v13.2S - __ fmla(v10, __ T4S, v11, v12); // fmla v10.4S, v11.4S, v12.4S - __ fmla(v17, __ T2D, v18, v19); // fmla v17.2D, v18.2D, v19.2D - __ mlsv(v24, __ T4H, v25, v26); // mls v24.4H, v25.4H, v26.4H - __ mlsv(v21, __ T8H, v22, v23); // mls v21.8H, v22.8H, v23.8H - __ mlsv(v23, __ T2S, v24, v25); // mls v23.2S, v24.2S, v25.2S - __ mlsv(v0, __ T4S, v1, v2); // mls v0.4S, v1.4S, v2.4S - __ fmls(v16, __ T2S, v17, v18); // fmls v16.2S, v17.2S, v18.2S - __ fmls(v10, __ T4S, v11, v12); // fmls v10.4S, v11.4S, v12.4S - __ fmls(v6, __ T2D, v7, v8); // fmls v6.2D, v7.2D, v8.2D - __ fdiv(v28, __ T2S, v29, v30); // fdiv v28.2S, v29.2S, v30.2S - __ fdiv(v6, __ T4S, v7, v8); // fdiv v6.4S, v7.4S, v8.4S - __ fdiv(v5, __ T2D, v6, v7); // fdiv v5.2D, v6.2D, v7.2D - __ maxv(v5, __ T8B, v6, v7); // smax v5.8B, v6.8B, v7.8B - __ maxv(v20, __ T16B, v21, v22); // smax v20.16B, v21.16B, v22.16B - __ maxv(v17, __ T4H, v18, v19); // smax v17.4H, v18.4H, v19.4H - __ maxv(v15, __ T8H, v16, v17); // smax v15.8H, v16.8H, v17.8H - __ maxv(v17, __ T2S, v18, v19); // smax v17.2S, v18.2S, v19.2S - __ maxv(v29, __ T4S, v30, v31); // smax v29.4S, v30.4S, v31.4S - __ smaxp(v26, __ T8B, v27, v28); // smaxp v26.8B, v27.8B, v28.8B - __ smaxp(v28, __ T16B, v29, v30); // smaxp v28.16B, v29.16B, v30.16B - __ smaxp(v1, __ T4H, v2, v3); // smaxp v1.4H, v2.4H, v3.4H - __ smaxp(v27, __ T8H, v28, v29); // smaxp v27.8H, v28.8H, v29.8H - __ smaxp(v0, __ T2S, v1, v2); // smaxp v0.2S, v1.2S, v2.2S - __ smaxp(v20, __ T4S, v21, v22); // smaxp v20.4S, v21.4S, v22.4S - __ fmax(v28, __ T2S, v29, v30); // fmax v28.2S, v29.2S, v30.2S - __ fmax(v15, __ T4S, v16, v17); // fmax v15.4S, v16.4S, v17.4S - __ fmax(v12, __ T2D, v13, v14); // fmax v12.2D, v13.2D, v14.2D - __ minv(v10, __ T8B, v11, v12); // smin v10.8B, v11.8B, v12.8B - __ minv(v28, __ T16B, v29, v30); // smin v28.16B, v29.16B, v30.16B - __ minv(v28, __ T4H, v29, v30); // smin v28.4H, v29.4H, v30.4H - __ minv(v19, __ T8H, v20, v21); // smin v19.8H, v20.8H, v21.8H - __ minv(v22, __ T2S, v23, v24); // smin v22.2S, v23.2S, v24.2S - __ minv(v10, __ T4S, v11, v12); // smin v10.4S, v11.4S, v12.4S - __ sminp(v4, __ T8B, v5, v6); // sminp v4.8B, v5.8B, v6.8B - __ sminp(v30, __ T16B, v31, v0); // sminp v30.16B, v31.16B, v0.16B - __ sminp(v20, __ T4H, v21, v22); // sminp v20.4H, v21.4H, v22.4H - __ sminp(v8, __ T8H, v9, v10); // sminp v8.8H, v9.8H, v10.8H - __ sminp(v30, __ T2S, v31, v0); // sminp v30.2S, v31.2S, v0.2S - __ sminp(v17, __ T4S, v18, v19); // sminp v17.4S, v18.4S, v19.4S - __ fmin(v10, __ T2S, v11, v12); // fmin v10.2S, v11.2S, v12.2S - __ fmin(v27, __ T4S, v28, v29); // fmin v27.4S, v28.4S, v29.4S - __ fmin(v2, __ T2D, v3, v4); // fmin v2.2D, v3.2D, v4.2D - __ cmeq(v24, __ T8B, v25, v26); // cmeq v24.8B, v25.8B, v26.8B - __ cmeq(v4, __ T16B, v5, v6); // cmeq v4.16B, v5.16B, v6.16B - __ cmeq(v3, __ T4H, v4, v5); // cmeq v3.4H, v4.4H, v5.4H - __ cmeq(v8, __ T8H, v9, v10); // cmeq v8.8H, v9.8H, v10.8H - __ cmeq(v22, __ T2S, v23, v24); // cmeq v22.2S, v23.2S, v24.2S + __ fadd(v17, __ T2D, v18, v19); // fadd v17.2D, v18.2D, v19.2D + __ subv(v15, __ T8B, v16, v17); // sub v15.8B, v16.8B, v17.8B + __ subv(v17, __ T16B, v18, v19); // sub v17.16B, v18.16B, v19.16B + __ subv(v29, __ T4H, v30, v31); // sub v29.4H, v30.4H, v31.4H + __ subv(v26, __ T8H, v27, v28); // sub v26.8H, v27.8H, v28.8H + __ subv(v28, __ T2S, v29, v30); // sub v28.2S, v29.2S, v30.2S + __ subv(v1, __ T4S, v2, v3); // sub v1.4S, v2.4S, v3.4S + __ subv(v27, __ T2D, v28, v29); // sub v27.2D, v28.2D, v29.2D + __ fsub(v0, __ T2S, v1, v2); // fsub v0.2S, v1.2S, v2.2S + __ fsub(v20, __ T4S, v21, v22); // fsub v20.4S, v21.4S, v22.4S + __ fsub(v28, __ T2D, v29, v30); // fsub v28.2D, v29.2D, v30.2D + __ mulv(v15, __ T8B, v16, v17); // mul v15.8B, v16.8B, v17.8B + __ mulv(v12, __ T16B, v13, v14); // mul v12.16B, v13.16B, v14.16B + __ mulv(v10, __ T4H, v11, v12); // mul v10.4H, v11.4H, v12.4H + __ mulv(v28, __ T8H, v29, v30); // mul v28.8H, v29.8H, v30.8H + __ mulv(v28, __ T2S, v29, v30); // mul v28.2S, v29.2S, v30.2S + __ mulv(v19, __ T4S, v20, v21); // mul v19.4S, v20.4S, v21.4S + __ fabd(v22, __ T2S, v23, v24); // fabd v22.2S, v23.2S, v24.2S + __ fabd(v10, __ T4S, v11, v12); // fabd v10.4S, v11.4S, v12.4S + __ fabd(v4, __ T2D, v5, v6); // fabd v4.2D, v5.2D, v6.2D + __ fmul(v30, __ T2S, v31, v0); // fmul v30.2S, v31.2S, v0.2S + __ fmul(v20, __ T4S, v21, v22); // fmul v20.4S, v21.4S, v22.4S + __ fmul(v8, __ T2D, v9, v10); // fmul v8.2D, v9.2D, v10.2D + __ mlav(v30, __ T4H, v31, v0); // mla v30.4H, v31.4H, v0.4H + __ mlav(v17, __ T8H, v18, v19); // mla v17.8H, v18.8H, v19.8H + __ mlav(v10, __ T2S, v11, v12); // mla v10.2S, v11.2S, v12.2S + __ mlav(v27, __ T4S, v28, v29); // mla v27.4S, v28.4S, v29.4S + __ fmla(v2, __ T2S, v3, v4); // fmla v2.2S, v3.2S, v4.2S + __ fmla(v24, __ T4S, v25, v26); // fmla v24.4S, v25.4S, v26.4S + __ fmla(v4, __ T2D, v5, v6); // fmla v4.2D, v5.2D, v6.2D + __ mlsv(v3, __ T4H, v4, v5); // mls v3.4H, v4.4H, v5.4H + __ mlsv(v8, __ T8H, v9, v10); // mls v8.8H, v9.8H, v10.8H + __ mlsv(v22, __ T2S, v23, v24); // mls v22.2S, v23.2S, v24.2S + __ mlsv(v17, __ T4S, v18, v19); // mls v17.4S, v18.4S, v19.4S + __ fmls(v13, __ T2S, v14, v15); // fmls v13.2S, v14.2S, v15.2S + __ fmls(v4, __ T4S, v5, v6); // fmls v4.4S, v5.4S, v6.4S + __ fmls(v28, __ T2D, v29, v30); // fmls v28.2D, v29.2D, v30.2D + __ fdiv(v23, __ T2S, v24, v25); // fdiv v23.2S, v24.2S, v25.2S + __ fdiv(v21, __ T4S, v22, v23); // fdiv v21.4S, v22.4S, v23.4S + __ fdiv(v25, __ T2D, v26, v27); // fdiv v25.2D, v26.2D, v27.2D + __ maxv(v24, __ T8B, v25, v26); // smax v24.8B, v25.8B, v26.8B + __ maxv(v3, __ T16B, v4, v5); // smax v3.16B, v4.16B, v5.16B + __ maxv(v23, __ T4H, v24, v25); // smax v23.4H, v24.4H, v25.4H + __ maxv(v26, __ T8H, v27, v28); // smax v26.8H, v27.8H, v28.8H + __ maxv(v23, __ T2S, v24, v25); // smax v23.2S, v24.2S, v25.2S + __ maxv(v14, __ T4S, v15, v16); // smax v14.4S, v15.4S, v16.4S + __ smaxp(v21, __ T8B, v22, v23); // smaxp v21.8B, v22.8B, v23.8B + __ smaxp(v3, __ T16B, v4, v5); // smaxp v3.16B, v4.16B, v5.16B + __ smaxp(v23, __ T4H, v24, v25); // smaxp v23.4H, v24.4H, v25.4H + __ smaxp(v8, __ T8H, v9, v10); // smaxp v8.8H, v9.8H, v10.8H + __ smaxp(v24, __ T2S, v25, v26); // smaxp v24.2S, v25.2S, v26.2S + __ smaxp(v19, __ T4S, v20, v21); // smaxp v19.4S, v20.4S, v21.4S + __ fmax(v15, __ T2S, v16, v17); // fmax v15.2S, v16.2S, v17.2S + __ fmax(v16, __ T4S, v17, v18); // fmax v16.4S, v17.4S, v18.4S + __ fmax(v2, __ T2D, v3, v4); // fmax v2.2D, v3.2D, v4.2D + __ minv(v1, __ T8B, v2, v3); // smin v1.8B, v2.8B, v3.8B + __ minv(v0, __ T16B, v1, v2); // smin v0.16B, v1.16B, v2.16B + __ minv(v24, __ T4H, v25, v26); // smin v24.4H, v25.4H, v26.4H + __ minv(v4, __ T8H, v5, v6); // smin v4.8H, v5.8H, v6.8H + __ minv(v3, __ T2S, v4, v5); // smin v3.2S, v4.2S, v5.2S + __ minv(v11, __ T4S, v12, v13); // smin v11.4S, v12.4S, v13.4S + __ sminp(v30, __ T8B, v31, v0); // sminp v30.8B, v31.8B, v0.8B + __ sminp(v27, __ T16B, v28, v29); // sminp v27.16B, v28.16B, v29.16B + __ sminp(v9, __ T4H, v10, v11); // sminp v9.4H, v10.4H, v11.4H + __ sminp(v25, __ T8H, v26, v27); // sminp v25.8H, v26.8H, v27.8H + __ sminp(v2, __ T2S, v3, v4); // sminp v2.2S, v3.2S, v4.2S + __ sminp(v12, __ T4S, v13, v14); // sminp v12.4S, v13.4S, v14.4S + __ fmin(v17, __ T2S, v18, v19); // fmin v17.2S, v18.2S, v19.2S + __ fmin(v30, __ T4S, v31, v0); // fmin v30.4S, v31.4S, v0.4S + __ fmin(v1, __ T2D, v2, v3); // fmin v1.2D, v2.2D, v3.2D + __ cmeq(v12, __ T8B, v13, v14); // cmeq v12.8B, v13.8B, v14.8B + __ cmeq(v28, __ T16B, v29, v30); // cmeq v28.16B, v29.16B, v30.16B + __ cmeq(v0, __ T4H, v1, v2); // cmeq v0.4H, v1.4H, v2.4H + __ cmeq(v17, __ T8H, v18, v19); // cmeq v17.8H, v18.8H, v19.8H + __ cmeq(v12, __ T2S, v13, v14); // cmeq v12.2S, v13.2S, v14.2S __ cmeq(v17, __ T4S, v18, v19); // cmeq v17.4S, v18.4S, v19.4S - __ cmeq(v13, __ T2D, v14, v15); // cmeq v13.2D, v14.2D, v15.2D - __ fcmeq(v4, __ T2S, v5, v6); // fcmeq v4.2S, v5.2S, v6.2S - __ fcmeq(v28, __ T4S, v29, v30); // fcmeq v28.4S, v29.4S, v30.4S - __ fcmeq(v23, __ T2D, v24, v25); // fcmeq v23.2D, v24.2D, v25.2D - __ cmgt(v21, __ T8B, v22, v23); // cmgt v21.8B, v22.8B, v23.8B - __ cmgt(v25, __ T16B, v26, v27); // cmgt v25.16B, v26.16B, v27.16B - __ cmgt(v24, __ T4H, v25, v26); // cmgt v24.4H, v25.4H, v26.4H - __ cmgt(v3, __ T8H, v4, v5); // cmgt v3.8H, v4.8H, v5.8H - __ cmgt(v23, __ T2S, v24, v25); // cmgt v23.2S, v24.2S, v25.2S - __ cmgt(v26, __ T4S, v27, v28); // cmgt v26.4S, v27.4S, v28.4S - __ cmgt(v23, __ T2D, v24, v25); // cmgt v23.2D, v24.2D, v25.2D - __ cmhi(v14, __ T8B, v15, v16); // cmhi v14.8B, v15.8B, v16.8B - __ cmhi(v21, __ T16B, v22, v23); // cmhi v21.16B, v22.16B, v23.16B - __ cmhi(v3, __ T4H, v4, v5); // cmhi v3.4H, v4.4H, v5.4H - __ cmhi(v23, __ T8H, v24, v25); // cmhi v23.8H, v24.8H, v25.8H + __ cmeq(v21, __ T2D, v22, v23); // cmeq v21.2D, v22.2D, v23.2D + __ fcmeq(v12, __ T2S, v13, v14); // fcmeq v12.2S, v13.2S, v14.2S + __ fcmeq(v27, __ T4S, v28, v29); // fcmeq v27.4S, v28.4S, v29.4S + __ fcmeq(v29, __ T2D, v30, v31); // fcmeq v29.2D, v30.2D, v31.2D + __ cmgt(v30, __ T8B, v31, v0); // cmgt v30.8B, v31.8B, v0.8B + __ cmgt(v1, __ T16B, v2, v3); // cmgt v1.16B, v2.16B, v3.16B + __ cmgt(v25, __ T4H, v26, v27); // cmgt v25.4H, v26.4H, v27.4H + __ cmgt(v27, __ T8H, v28, v29); // cmgt v27.8H, v28.8H, v29.8H + __ cmgt(v4, __ T2S, v5, v6); // cmgt v4.2S, v5.2S, v6.2S + __ cmgt(v29, __ T4S, v30, v31); // cmgt v29.4S, v30.4S, v31.4S + __ cmgt(v3, __ T2D, v4, v5); // cmgt v3.2D, v4.2D, v5.2D + __ cmhi(v6, __ T8B, v7, v8); // cmhi v6.8B, v7.8B, v8.8B + __ cmhi(v29, __ T16B, v30, v31); // cmhi v29.16B, v30.16B, v31.16B + __ cmhi(v25, __ T4H, v26, v27); // cmhi v25.4H, v26.4H, v27.4H + __ cmhi(v17, __ T8H, v18, v19); // cmhi v17.8H, v18.8H, v19.8H __ cmhi(v8, __ T2S, v9, v10); // cmhi v8.2S, v9.2S, v10.2S - __ cmhi(v24, __ T4S, v25, v26); // cmhi v24.4S, v25.4S, v26.4S - __ cmhi(v19, __ T2D, v20, v21); // cmhi v19.2D, v20.2D, v21.2D - __ cmhs(v15, __ T8B, v16, v17); // cmhs v15.8B, v16.8B, v17.8B - __ cmhs(v16, __ T16B, v17, v18); // cmhs v16.16B, v17.16B, v18.16B - __ cmhs(v2, __ T4H, v3, v4); // cmhs v2.4H, v3.4H, v4.4H - __ cmhs(v1, __ T8H, v2, v3); // cmhs v1.8H, v2.8H, v3.8H - __ cmhs(v0, __ T2S, v1, v2); // cmhs v0.2S, v1.2S, v2.2S - __ cmhs(v24, __ T4S, v25, v26); // cmhs v24.4S, v25.4S, v26.4S - __ cmhs(v4, __ T2D, v5, v6); // cmhs v4.2D, v5.2D, v6.2D - __ fcmgt(v3, __ T2S, v4, v5); // fcmgt v3.2S, v4.2S, v5.2S - __ fcmgt(v11, __ T4S, v12, v13); // fcmgt v11.4S, v12.4S, v13.4S - __ fcmgt(v30, __ T2D, v31, v0); // fcmgt v30.2D, v31.2D, v0.2D - __ cmge(v27, __ T8B, v28, v29); // cmge v27.8B, v28.8B, v29.8B - __ cmge(v9, __ T16B, v10, v11); // cmge v9.16B, v10.16B, v11.16B - __ cmge(v25, __ T4H, v26, v27); // cmge v25.4H, v26.4H, v27.4H - __ cmge(v2, __ T8H, v3, v4); // cmge v2.8H, v3.8H, v4.8H - __ cmge(v12, __ T2S, v13, v14); // cmge v12.2S, v13.2S, v14.2S - __ cmge(v17, __ T4S, v18, v19); // cmge v17.4S, v18.4S, v19.4S - __ cmge(v30, __ T2D, v31, v0); // cmge v30.2D, v31.2D, v0.2D + __ cmhi(v7, __ T4S, v8, v9); // cmhi v7.4S, v8.4S, v9.4S + __ cmhi(v12, __ T2D, v13, v14); // cmhi v12.2D, v13.2D, v14.2D + __ cmhs(v0, __ T8B, v1, v2); // cmhs v0.8B, v1.8B, v2.8B + __ cmhs(v19, __ T16B, v20, v21); // cmhs v19.16B, v20.16B, v21.16B + __ cmhs(v1, __ T4H, v2, v3); // cmhs v1.4H, v2.4H, v3.4H + __ cmhs(v23, __ T8H, v24, v25); // cmhs v23.8H, v24.8H, v25.8H + __ cmhs(v2, __ T2S, v3, v4); // cmhs v2.2S, v3.2S, v4.2S + __ cmhs(v0, __ T4S, v1, v2); // cmhs v0.4S, v1.4S, v2.4S + __ cmhs(v8, __ T2D, v9, v10); // cmhs v8.2D, v9.2D, v10.2D + __ fcmgt(v23, __ T2S, v24, v25); // fcmgt v23.2S, v24.2S, v25.2S + __ fcmgt(v25, __ T4S, v26, v27); // fcmgt v25.4S, v26.4S, v27.4S + __ fcmgt(v15, __ T2D, v16, v17); // fcmgt v15.2D, v16.2D, v17.2D + __ cmge(v29, __ T8B, v30, v31); // cmge v29.8B, v30.8B, v31.8B + __ cmge(v3, __ T16B, v4, v5); // cmge v3.16B, v4.16B, v5.16B + __ cmge(v10, __ T4H, v11, v12); // cmge v10.4H, v11.4H, v12.4H + __ cmge(v22, __ T8H, v23, v24); // cmge v22.8H, v23.8H, v24.8H + __ cmge(v10, __ T2S, v11, v12); // cmge v10.2S, v11.2S, v12.2S + __ cmge(v4, __ T4S, v5, v6); // cmge v4.4S, v5.4S, v6.4S + __ cmge(v17, __ T2D, v18, v19); // cmge v17.2D, v18.2D, v19.2D __ fcmge(v1, __ T2S, v2, v3); // fcmge v1.2S, v2.2S, v3.2S - __ fcmge(v12, __ T4S, v13, v14); // fcmge v12.4S, v13.4S, v14.4S - __ fcmge(v28, __ T2D, v29, v30); // fcmge v28.2D, v29.2D, v30.2D + __ fcmge(v11, __ T4S, v12, v13); // fcmge v11.4S, v12.4S, v13.4S + __ fcmge(v7, __ T2D, v8, v9); // fcmge v7.2D, v8.2D, v9.2D // SpecialCases __ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE @@ -927,205 +975,205 @@ __ fmovd(v0, -1.0625); // fmov d0, #-1.0625 // LSEOp - __ swp(Assembler::xword, r0, r19, r12); // swp x0, x19, [x12] - __ ldadd(Assembler::xword, r17, r22, r13); // ldadd x17, x22, [x13] - __ ldbic(Assembler::xword, r28, r30, sp); // ldclr x28, x30, [sp] - __ ldeor(Assembler::xword, r1, r26, r28); // ldeor x1, x26, [x28] - __ ldorr(Assembler::xword, r4, r30, r4); // ldset x4, x30, [x4] - __ ldsmin(Assembler::xword, r6, r30, r26); // ldsmin x6, x30, [x26] - __ ldsmax(Assembler::xword, r16, r9, r8); // ldsmax x16, x9, [x8] - __ ldumin(Assembler::xword, r12, r0, r20); // ldumin x12, x0, [x20] - __ ldumax(Assembler::xword, r1, r24, r2); // ldumax x1, x24, [x2] + __ swp(Assembler::xword, r10, r15, r17); // swp x10, x15, [x17] + __ ldadd(Assembler::xword, r2, r10, r12); // ldadd x2, x10, [x12] + __ ldbic(Assembler::xword, r12, r15, r13); // ldclr x12, x15, [x13] + __ ldeor(Assembler::xword, r2, r7, r20); // ldeor x2, x7, [x20] + __ ldorr(Assembler::xword, r26, r16, r4); // ldset x26, x16, [x4] + __ ldsmin(Assembler::xword, r2, r4, r12); // ldsmin x2, x4, [x12] + __ ldsmax(Assembler::xword, r16, r21, r16); // ldsmax x16, x21, [x16] + __ ldumin(Assembler::xword, r16, r11, r21); // ldumin x16, x11, [x21] + __ ldumax(Assembler::xword, r23, r12, r26); // ldumax x23, x12, [x26] // LSEOp - __ swpa(Assembler::xword, r0, r9, r24); // swpa x0, x9, [x24] - __ ldadda(Assembler::xword, r26, r16, r30); // ldadda x26, x16, [x30] - __ ldbica(Assembler::xword, r3, r10, r23); // ldclra x3, x10, [x23] - __ ldeora(Assembler::xword, r10, r4, r15); // ldeora x10, x4, [x15] - __ ldorra(Assembler::xword, r2, r11, r8); // ldseta x2, x11, [x8] - __ ldsmina(Assembler::xword, r10, r15, r17); // ldsmina x10, x15, [x17] - __ ldsmaxa(Assembler::xword, r2, r10, r12); // ldsmaxa x2, x10, [x12] - __ ldumina(Assembler::xword, r12, r15, r13); // ldumina x12, x15, [x13] - __ ldumaxa(Assembler::xword, r2, r7, r20); // ldumaxa x2, x7, [x20] + __ swpa(Assembler::xword, r23, r28, r14); // swpa x23, x28, [x14] + __ ldadda(Assembler::xword, r11, r24, r1); // ldadda x11, x24, [x1] + __ ldbica(Assembler::xword, r12, zr, r10); // ldclra x12, xzr, [x10] + __ ldeora(Assembler::xword, r16, r7, r2); // ldeora x16, x7, [x2] + __ ldorra(Assembler::xword, r3, r13, r19); // ldseta x3, x13, [x19] + __ ldsmina(Assembler::xword, r17, r16, r3); // ldsmina x17, x16, [x3] + __ ldsmaxa(Assembler::xword, r1, r11, r30); // ldsmaxa x1, x11, [x30] + __ ldumina(Assembler::xword, r5, r8, r15); // ldumina x5, x8, [x15] + __ ldumaxa(Assembler::xword, r29, r30, r0); // ldumaxa x29, x30, [x0] // LSEOp - __ swpal(Assembler::xword, r26, r16, r4); // swpal x26, x16, [x4] - __ ldaddal(Assembler::xword, r2, r4, r12); // ldaddal x2, x4, [x12] - __ ldbical(Assembler::xword, r16, r21, r16); // ldclral x16, x21, [x16] - __ ldeoral(Assembler::xword, r16, r11, r21); // ldeoral x16, x11, [x21] - __ ldorral(Assembler::xword, r23, r12, r26); // ldsetal x23, x12, [x26] - __ ldsminal(Assembler::xword, r23, r28, r14); // ldsminal x23, x28, [x14] - __ ldsmaxal(Assembler::xword, r11, r24, r1); // ldsmaxal x11, x24, [x1] - __ lduminal(Assembler::xword, r12, zr, r10); // lduminal x12, xzr, [x10] - __ ldumaxal(Assembler::xword, r16, r7, r2); // ldumaxal x16, x7, [x2] + __ swpal(Assembler::xword, r20, r7, r20); // swpal x20, x7, [x20] + __ ldaddal(Assembler::xword, r23, r28, r21); // ldaddal x23, x28, [x21] + __ ldbical(Assembler::xword, r27, r25, r5); // ldclral x27, x25, [x5] + __ ldeoral(Assembler::xword, r1, r23, r16); // ldeoral x1, x23, [x16] + __ ldorral(Assembler::xword, zr, r5, r12); // ldsetal xzr, x5, [x12] + __ ldsminal(Assembler::xword, r9, r28, r15); // ldsminal x9, x28, [x15] + __ ldsmaxal(Assembler::xword, r29, r22, sp); // ldsmaxal x29, x22, [sp] + __ lduminal(Assembler::xword, r19, zr, r5); // lduminal x19, xzr, [x5] + __ ldumaxal(Assembler::xword, r14, r16, sp); // ldumaxal x14, x16, [sp] // LSEOp - __ swpl(Assembler::xword, r3, r13, r19); // swpl x3, x13, [x19] - __ ldaddl(Assembler::xword, r17, r16, r3); // ldaddl x17, x16, [x3] - __ ldbicl(Assembler::xword, r1, r11, r30); // ldclrl x1, x11, [x30] - __ ldeorl(Assembler::xword, r5, r8, r15); // ldeorl x5, x8, [x15] - __ ldorrl(Assembler::xword, r29, r30, r0); // ldsetl x29, x30, [x0] - __ ldsminl(Assembler::xword, r20, r7, r20); // ldsminl x20, x7, [x20] - __ ldsmaxl(Assembler::xword, r23, r28, r21); // ldsmaxl x23, x28, [x21] - __ lduminl(Assembler::xword, r27, r25, r5); // lduminl x27, x25, [x5] - __ ldumaxl(Assembler::xword, r1, r23, r16); // ldumaxl x1, x23, [x16] + __ swpl(Assembler::xword, r16, r27, r20); // swpl x16, x27, [x20] + __ ldaddl(Assembler::xword, r16, r12, r11); // ldaddl x16, x12, [x11] + __ ldbicl(Assembler::xword, r9, r6, r30); // ldclrl x9, x6, [x30] + __ ldeorl(Assembler::xword, r17, r27, r28); // ldeorl x17, x27, [x28] + __ ldorrl(Assembler::xword, r30, r7, r10); // ldsetl x30, x7, [x10] + __ ldsminl(Assembler::xword, r20, r10, r4); // ldsminl x20, x10, [x4] + __ ldsmaxl(Assembler::xword, r24, r17, r17); // ldsmaxl x24, x17, [x17] + __ lduminl(Assembler::xword, r22, r3, r29); // lduminl x22, x3, [x29] + __ ldumaxl(Assembler::xword, r15, r22, r19); // ldumaxl x15, x22, [x19] // LSEOp - __ swp(Assembler::word, zr, r5, r12); // swp wzr, w5, [x12] - __ ldadd(Assembler::word, r9, r28, r15); // ldadd w9, w28, [x15] - __ ldbic(Assembler::word, r29, r22, sp); // ldclr w29, w22, [sp] - __ ldeor(Assembler::word, r19, zr, r5); // ldeor w19, wzr, [x5] - __ ldorr(Assembler::word, r14, r16, sp); // ldset w14, w16, [sp] - __ ldsmin(Assembler::word, r16, r27, r20); // ldsmin w16, w27, [x20] - __ ldsmax(Assembler::word, r16, r12, r11); // ldsmax w16, w12, [x11] - __ ldumin(Assembler::word, r9, r6, r30); // ldumin w9, w6, [x30] - __ ldumax(Assembler::word, r17, r27, r28); // ldumax w17, w27, [x28] + __ swp(Assembler::word, r19, r22, r2); // swp w19, w22, [x2] + __ ldadd(Assembler::word, r15, r6, r12); // ldadd w15, w6, [x12] + __ ldbic(Assembler::word, r16, r11, r13); // ldclr w16, w11, [x13] + __ ldeor(Assembler::word, r23, r1, r30); // ldeor w23, w1, [x30] + __ ldorr(Assembler::word, r19, r5, r17); // ldset w19, w5, [x17] + __ ldsmin(Assembler::word, r2, r16, r22); // ldsmin w2, w16, [x22] + __ ldsmax(Assembler::word, r13, r10, r21); // ldsmax w13, w10, [x21] + __ ldumin(Assembler::word, r29, r27, r12); // ldumin w29, w27, [x12] + __ ldumax(Assembler::word, r27, r3, r1); // ldumax w27, w3, [x1] // LSEOp - __ swpa(Assembler::word, r30, r7, r10); // swpa w30, w7, [x10] - __ ldadda(Assembler::word, r20, r10, r4); // ldadda w20, w10, [x4] - __ ldbica(Assembler::word, r24, r17, r17); // ldclra w24, w17, [x17] - __ ldeora(Assembler::word, r22, r3, r29); // ldeora w22, w3, [x29] - __ ldorra(Assembler::word, r15, r22, r19); // ldseta w15, w22, [x19] - __ ldsmina(Assembler::word, r19, r22, r2); // ldsmina w19, w22, [x2] - __ ldsmaxa(Assembler::word, r15, r6, r12); // ldsmaxa w15, w6, [x12] - __ ldumina(Assembler::word, r16, r11, r13); // ldumina w16, w11, [x13] - __ ldumaxa(Assembler::word, r23, r1, r30); // ldumaxa w23, w1, [x30] + __ swpa(Assembler::word, zr, r24, r19); // swpa wzr, w24, [x19] + __ ldadda(Assembler::word, r17, r9, r28); // ldadda w17, w9, [x28] + __ ldbica(Assembler::word, r27, r15, r7); // ldclra w27, w15, [x7] + __ ldeora(Assembler::word, r21, r23, sp); // ldeora w21, w23, [sp] + __ ldorra(Assembler::word, r25, r2, sp); // ldseta w25, w2, [sp] + __ ldsmina(Assembler::word, r27, r16, r10); // ldsmina w27, w16, [x10] + __ ldsmaxa(Assembler::word, r23, r19, r3); // ldsmaxa w23, w19, [x3] + __ ldumina(Assembler::word, r16, r0, r25); // ldumina w16, w0, [x25] + __ ldumaxa(Assembler::word, r26, r23, r2); // ldumaxa w26, w23, [x2] // LSEOp - __ swpal(Assembler::word, r19, r5, r17); // swpal w19, w5, [x17] - __ ldaddal(Assembler::word, r2, r16, r22); // ldaddal w2, w16, [x22] - __ ldbical(Assembler::word, r13, r10, r21); // ldclral w13, w10, [x21] - __ ldeoral(Assembler::word, r29, r27, r12); // ldeoral w29, w27, [x12] - __ ldorral(Assembler::word, r27, r3, r1); // ldsetal w27, w3, [x1] - __ ldsminal(Assembler::word, zr, r24, r19); // ldsminal wzr, w24, [x19] - __ ldsmaxal(Assembler::word, r17, r9, r28); // ldsmaxal w17, w9, [x28] - __ lduminal(Assembler::word, r27, r15, r7); // lduminal w27, w15, [x7] - __ ldumaxal(Assembler::word, r21, r23, sp); // ldumaxal w21, w23, [sp] + __ swpal(Assembler::word, r16, r12, r4); // swpal w16, w12, [x4] + __ ldaddal(Assembler::word, r28, r30, r29); // ldaddal w28, w30, [x29] + __ ldbical(Assembler::word, r16, r27, r6); // ldclral w16, w27, [x6] + __ ldeoral(Assembler::word, r9, r29, r15); // ldeoral w9, w29, [x15] + __ ldorral(Assembler::word, r7, r4, r7); // ldsetal w7, w4, [x7] + __ ldsminal(Assembler::word, r15, r9, r23); // ldsminal w15, w9, [x23] + __ ldsmaxal(Assembler::word, r8, r2, r28); // ldsmaxal w8, w2, [x28] + __ lduminal(Assembler::word, r21, zr, r5); // lduminal w21, wzr, [x5] + __ ldumaxal(Assembler::word, r27, r0, r17); // ldumaxal w27, w0, [x17] // LSEOp - __ swpl(Assembler::word, r25, r2, sp); // swpl w25, w2, [sp] - __ ldaddl(Assembler::word, r27, r16, r10); // ldaddl w27, w16, [x10] - __ ldbicl(Assembler::word, r23, r19, r3); // ldclrl w23, w19, [x3] - __ ldeorl(Assembler::word, r16, r0, r25); // ldeorl w16, w0, [x25] - __ ldorrl(Assembler::word, r26, r23, r2); // ldsetl w26, w23, [x2] - __ ldsminl(Assembler::word, r16, r12, r4); // ldsminl w16, w12, [x4] - __ ldsmaxl(Assembler::word, r28, r30, r29); // ldsmaxl w28, w30, [x29] - __ lduminl(Assembler::word, r16, r27, r6); // lduminl w16, w27, [x6] - __ ldumaxl(Assembler::word, r9, r29, r15); // ldumaxl w9, w29, [x15] + __ swpl(Assembler::word, r15, r4, r26); // swpl w15, w4, [x26] + __ ldaddl(Assembler::word, r8, r28, r22); // ldaddl w8, w28, [x22] + __ ldbicl(Assembler::word, r27, r27, r25); // ldclrl w27, w27, [x25] + __ ldeorl(Assembler::word, r23, r0, r4); // ldeorl w23, w0, [x4] + __ ldorrl(Assembler::word, r6, r16, r0); // ldsetl w6, w16, [x0] + __ ldsminl(Assembler::word, r4, r15, r1); // ldsminl w4, w15, [x1] + __ ldsmaxl(Assembler::word, r10, r7, r5); // ldsmaxl w10, w7, [x5] + __ lduminl(Assembler::word, r10, r28, r7); // lduminl w10, w28, [x7] + __ ldumaxl(Assembler::word, r20, r23, r21); // ldumaxl w20, w23, [x21] // SHA3SIMDOp - __ bcax(v7, __ T16B, v4, v7, v15); // bcax v7.16B, v4.16B, v7.16B, v15.16B - __ eor3(v9, __ T16B, v22, v8, v2); // eor3 v9.16B, v22.16B, v8.16B, v2.16B - __ rax1(v27, __ T2D, v20, v30); // rax1 v27.2D, v20.2D, v30.2D - __ xar(v5, __ T2D, v26, v0, 34); // xar v5.2D, v26.2D, v0.2D, #34 + __ bcax(v5, __ T16B, v10, v8, v16); // bcax v5.16B, v10.16B, v8.16B, v16.16B + __ eor3(v30, __ T16B, v6, v17, v2); // eor3 v30.16B, v6.16B, v17.16B, v2.16B + __ rax1(v11, __ T2D, v29, v28); // rax1 v11.2D, v29.2D, v28.2D + __ xar(v2, __ T2D, v26, v22, 58); // xar v2.2D, v26.2D, v22.2D, #58 // SHA512SIMDOp - __ sha512h(v14, __ T2D, v3, v25); // sha512h q14, q3, v25.2D - __ sha512h2(v8, __ T2D, v27, v21); // sha512h2 q8, q27, v21.2D - __ sha512su0(v26, __ T2D, v26); // sha512su0 v26.2D, v26.2D - __ sha512su1(v24, __ T2D, v22, v0); // sha512su1 v24.2D, v22.2D, v0.2D + __ sha512h(v14, __ T2D, v13, v27); // sha512h q14, q13, v27.2D + __ sha512h2(v16, __ T2D, v23, v5); // sha512h2 q16, q23, v5.2D + __ sha512su0(v2, __ T2D, v13); // sha512su0 v2.2D, v13.2D + __ sha512su1(v10, __ T2D, v15, v10); // sha512su1 v10.2D, v15.2D, v10.2D // SVEBinaryImmOp - __ sve_add(z4, __ B, 147u); // add z4.b, z4.b, #0x93 - __ sve_sub(z0, __ B, 124u); // sub z0.b, z0.b, #0x7c - __ sve_and(z1, __ H, 508u); // and z1.h, z1.h, #0x1fc - __ sve_eor(z9, __ D, 18374686479671656447u); // eor z9.d, z9.d, #0xff00000000007fff - __ sve_orr(z22, __ S, 251662080u); // orr z22.s, z22.s, #0xf000f00 + __ sve_add(z26, __ S, 98u); // add z26.s, z26.s, #0x62 + __ sve_sub(z3, __ S, 138u); // sub z3.s, z3.s, #0x8a + __ sve_and(z4, __ B, 131u); // and z4.b, z4.b, #0x83 + __ sve_eor(z17, __ H, 16368u); // eor z17.h, z17.h, #0x3ff0 + __ sve_orr(z2, __ S, 4164941887u); // orr z2.s, z2.s, #0xf83ff83f // SVEBinaryImmOp - __ sve_add(z8, __ S, 248u); // add z8.s, z8.s, #0xf8 - __ sve_sub(z6, __ S, 16u); // sub z6.s, z6.s, #0x10 - __ sve_and(z11, __ D, 4160749568u); // and z11.d, z11.d, #0xf8000000 - __ sve_eor(z26, __ S, 1610637312u); // eor z26.s, z26.s, #0x60006000 - __ sve_orr(z13, __ D, 18446181398634037247u); // orr z13.d, z13.d, #0xfffe003fffffffff + __ sve_add(z23, __ B, 51u); // add z23.b, z23.b, #0x33 + __ sve_sub(z7, __ S, 104u); // sub z7.s, z7.s, #0x68 + __ sve_and(z27, __ S, 7864320u); // and z27.s, z27.s, #0x780000 + __ sve_eor(z2, __ D, 68719476224u); // eor z2.d, z2.d, #0xffffffe00 + __ sve_orr(z6, __ S, 1056980736u); // orr z6.s, z6.s, #0x3f003f00 // SVEBinaryImmOp - __ sve_add(z5, __ B, 112u); // add z5.b, z5.b, #0x70 - __ sve_sub(z10, __ S, 88u); // sub z10.s, z10.s, #0x58 - __ sve_and(z26, __ S, 253952u); // and z26.s, z26.s, #0x3e000 - __ sve_eor(z22, __ S, 496u); // eor z22.s, z22.s, #0x1f0 - __ sve_orr(z19, __ S, 536870910u); // orr z19.s, z19.s, #0x1ffffffe + __ sve_add(z12, __ S, 67u); // add z12.s, z12.s, #0x43 + __ sve_sub(z24, __ S, 154u); // sub z24.s, z24.s, #0x9a + __ sve_and(z0, __ H, 511u); // and z0.h, z0.h, #0x1ff + __ sve_eor(z19, __ D, 9241386433220968447u); // eor z19.d, z19.d, #0x803fffff803fffff + __ sve_orr(z6, __ B, 128u); // orr z6.b, z6.b, #0x80 // SVEBinaryImmOp - __ sve_add(z14, __ H, 22u); // add z14.h, z14.h, #0x16 - __ sve_sub(z16, __ B, 172u); // sub z16.b, z16.b, #0xac - __ sve_and(z23, __ B, 62u); // and z23.b, z23.b, #0x3e - __ sve_eor(z17, __ H, 33279u); // eor z17.h, z17.h, #0x81ff - __ sve_orr(z16, __ B, 254u); // orr z16.b, z16.b, #0xfe + __ sve_add(z17, __ D, 74u); // add z17.d, z17.d, #0x4a + __ sve_sub(z10, __ S, 170u); // sub z10.s, z10.s, #0xaa + __ sve_and(z22, __ D, 17179852800u); // and z22.d, z22.d, #0x3ffffc000 + __ sve_eor(z15, __ S, 8388600u); // eor z15.s, z15.s, #0x7ffff8 + __ sve_orr(z4, __ D, 8064u); // orr z4.d, z4.d, #0x1f80 // SVEBinaryImmOp - __ sve_add(z3, __ B, 49u); // add z3.b, z3.b, #0x31 - __ sve_sub(z17, __ S, 110u); // sub z17.s, z17.s, #0x6e - __ sve_and(z12, __ S, 4290777087u); // and z12.s, z12.s, #0xffc00fff - __ sve_eor(z19, __ S, 134217216u); // eor z19.s, z19.s, #0x7fffe00 - __ sve_orr(z23, __ B, 254u); // orr z23.b, z23.b, #0xfe + __ sve_add(z8, __ S, 162u); // add z8.s, z8.s, #0xa2 + __ sve_sub(z22, __ B, 130u); // sub z22.b, z22.b, #0x82 + __ sve_and(z9, __ S, 4292870159u); // and z9.s, z9.s, #0xffe0000f + __ sve_eor(z5, __ D, 1150687262887383032u); // eor z5.d, z5.d, #0xff80ff80ff80ff8 + __ sve_orr(z22, __ H, 32256u); // orr z22.h, z22.h, #0x7e00 // SVEBinaryImmOp - __ sve_add(z13, __ S, 54u); // add z13.s, z13.s, #0x36 - __ sve_sub(z0, __ B, 120u); // sub z0.b, z0.b, #0x78 - __ sve_and(z17, __ D, 18014398509481728u); // and z17.d, z17.d, #0x3fffffffffff00 - __ sve_eor(z22, __ S, 4294709247u); // eor z22.s, z22.s, #0xfffc0fff - __ sve_orr(z2, __ B, 225u); // orr z2.b, z2.b, #0xe1 + __ sve_add(z8, __ S, 134u); // add z8.s, z8.s, #0x86 + __ sve_sub(z25, __ H, 39u); // sub z25.h, z25.h, #0x27 + __ sve_and(z4, __ S, 4186112u); // and z4.s, z4.s, #0x3fe000 + __ sve_eor(z29, __ B, 131u); // eor z29.b, z29.b, #0x83 + __ sve_orr(z29, __ D, 4611685469745315712u); // orr z29.d, z29.d, #0x3fffff803fffff80 // SVEVectorOp - __ sve_add(z20, __ D, z7, z4); // add z20.d, z7.d, z4.d - __ sve_sub(z7, __ S, z0, z8); // sub z7.s, z0.s, z8.s - __ sve_fadd(z19, __ D, z22, z4); // fadd z19.d, z22.d, z4.d - __ sve_fmul(z9, __ D, z22, z11); // fmul z9.d, z22.d, z11.d - __ sve_fsub(z5, __ S, z30, z16); // fsub z5.s, z30.s, z16.s - __ sve_abs(z22, __ H, p3, z1); // abs z22.h, p3/m, z1.h - __ sve_add(z8, __ D, p5, z16); // add z8.d, p5/m, z8.d, z16.d - __ sve_and(z15, __ S, p1, z4); // and z15.s, p1/m, z15.s, z4.s - __ sve_asr(z8, __ B, p1, z29); // asr z8.b, p1/m, z8.b, z29.b - __ sve_bic(z28, __ D, p4, z29); // bic z28.d, p4/m, z28.d, z29.d - __ sve_cnt(z9, __ H, p3, z2); // cnt z9.h, p3/m, z2.h - __ sve_eor(z28, __ B, p0, z7); // eor z28.b, p0/m, z28.b, z7.b - __ sve_lsl(z26, __ H, p5, z17); // lsl z26.h, p5/m, z26.h, z17.h - __ sve_lsr(z8, __ D, p4, z21); // lsr z8.d, p4/m, z8.d, z21.d - __ sve_mul(z5, __ S, p5, z21); // mul z5.s, p5/m, z5.s, z21.s - __ sve_neg(z22, __ S, p4, z29); // neg z22.s, p4/m, z29.s - __ sve_not(z19, __ S, p0, z4); // not z19.s, p0/m, z4.s - __ sve_orr(z23, __ B, p1, z19); // orr z23.b, p1/m, z23.b, z19.b - __ sve_smax(z23, __ B, p6, z19); // smax z23.b, p6/m, z23.b, z19.b - __ sve_smin(z8, __ D, p2, z14); // smin z8.d, p2/m, z8.d, z14.d - __ sve_sub(z17, __ B, p7, z21); // sub z17.b, p7/m, z17.b, z21.b - __ sve_fabs(z30, __ D, p0, z10); // fabs z30.d, p0/m, z10.d - __ sve_fadd(z12, __ S, p0, z9); // fadd z12.s, p0/m, z12.s, z9.s - __ sve_fdiv(z24, __ D, p4, z4); // fdiv z24.d, p4/m, z24.d, z4.d - __ sve_fmax(z6, __ D, p2, z27); // fmax z6.d, p2/m, z6.d, z27.d - __ sve_fmin(z13, __ D, p4, z30); // fmin z13.d, p4/m, z13.d, z30.d - __ sve_fmul(z22, __ D, p5, z30); // fmul z22.d, p5/m, z22.d, z30.d - __ sve_fneg(z9, __ S, p3, z19); // fneg z9.s, p3/m, z19.s - __ sve_frintm(z20, __ S, p7, z9); // frintm z20.s, p7/m, z9.s - __ sve_frintn(z13, __ S, p3, z19); // frintn z13.s, p3/m, z19.s - __ sve_frintp(z24, __ S, p2, z19); // frintp z24.s, p2/m, z19.s - __ sve_fsqrt(z17, __ S, p4, z16); // fsqrt z17.s, p4/m, z16.s - __ sve_fsub(z0, __ S, p0, z11); // fsub z0.s, p0/m, z0.s, z11.s - __ sve_fmad(z15, __ S, p3, z15, z4); // fmad z15.s, p3/m, z15.s, z4.s - __ sve_fmla(z29, __ D, p1, z0, z10); // fmla z29.d, p1/m, z0.d, z10.d - __ sve_fmls(z26, __ D, p0, z0, z9); // fmls z26.d, p0/m, z0.d, z9.d - __ sve_fnmla(z28, __ D, p2, z24, z3); // fnmla z28.d, p2/m, z24.d, z3.d - __ sve_fnmls(z7, __ D, p6, z28, z13); // fnmls z7.d, p6/m, z28.d, z13.d - __ sve_mla(z10, __ D, p6, z12, z17); // mla z10.d, p6/m, z12.d, z17.d - __ sve_mls(z17, __ S, p3, z2, z29); // mls z17.s, p3/m, z2.s, z29.s - __ sve_and(z21, z20, z7); // and z21.d, z20.d, z7.d - __ sve_eor(z2, z1, z26); // eor z2.d, z1.d, z26.d - __ sve_orr(z9, z16, z17); // orr z9.d, z16.d, z17.d - __ sve_bic(z0, z4, z2); // bic z0.d, z4.d, z2.d - __ sve_uzp1(z14, __ S, z6, z11); // uzp1 z14.s, z6.s, z11.s - __ sve_uzp2(z14, __ H, z16, z29); // uzp2 z14.h, z16.h, z29.h + __ sve_add(z2, __ B, z11, z28); // add z2.b, z11.b, z28.b + __ sve_sub(z7, __ S, z1, z26); // sub z7.s, z1.s, z26.s + __ sve_fadd(z17, __ D, z14, z8); // fadd z17.d, z14.d, z8.d + __ sve_fmul(z21, __ D, z24, z5); // fmul z21.d, z24.d, z5.d + __ sve_fsub(z21, __ D, z17, z22); // fsub z21.d, z17.d, z22.d + __ sve_abs(z29, __ B, p5, z19); // abs z29.b, p5/m, z19.b + __ sve_add(z4, __ B, p4, z23); // add z4.b, p4/m, z4.b, z23.b + __ sve_and(z19, __ D, p1, z23); // and z19.d, p1/m, z19.d, z23.d + __ sve_asr(z19, __ H, p0, z8); // asr z19.h, p0/m, z19.h, z8.h + __ sve_bic(z14, __ D, p6, z17); // bic z14.d, p6/m, z14.d, z17.d + __ sve_cnt(z21, __ B, p1, z30); // cnt z21.b, p1/m, z30.b + __ sve_eor(z10, __ B, p5, z12); // eor z10.b, p5/m, z10.b, z12.b + __ sve_lsl(z9, __ S, p1, z24); // lsl z9.s, p1/m, z9.s, z24.s + __ sve_lsr(z4, __ H, p6, z6); // lsr z4.h, p6/m, z4.h, z6.h + __ sve_mul(z27, __ S, p6, z13); // mul z27.s, p6/m, z27.s, z13.s + __ sve_neg(z30, __ S, p5, z22); // neg z30.s, p5/m, z22.s + __ sve_not(z30, __ H, p7, z9); // not z30.h, p7/m, z9.h + __ sve_orr(z19, __ D, p1, z20); // orr z19.d, p1/m, z19.d, z20.d + __ sve_smax(z9, __ H, p2, z13); // smax z9.h, p2/m, z9.h, z13.h + __ sve_smin(z19, __ H, p0, z24); // smin z19.h, p0/m, z19.h, z24.h + __ sve_sub(z19, __ S, p3, z17); // sub z19.s, p3/m, z19.s, z17.s + __ sve_fabs(z16, __ S, p1, z0); // fabs z16.s, p1/m, z0.s + __ sve_fadd(z11, __ S, p2, z15); // fadd z11.s, p2/m, z11.s, z15.s + __ sve_fdiv(z15, __ D, p1, z15); // fdiv z15.d, p1/m, z15.d, z15.d + __ sve_fmax(z5, __ D, p0, z10); // fmax z5.d, p0/m, z5.d, z10.d + __ sve_fmin(z26, __ S, p0, z0); // fmin z26.s, p0/m, z26.s, z0.s + __ sve_fmul(z19, __ D, p7, z10); // fmul z19.d, p7/m, z19.d, z10.d + __ sve_fneg(z3, __ D, p5, z7); // fneg z3.d, p5/m, z7.d + __ sve_frintm(z28, __ S, p3, z21); // frintm z28.s, p3/m, z21.s + __ sve_frintn(z26, __ D, p3, z17); // frintn z26.d, p3/m, z17.d + __ sve_frintp(z17, __ D, p3, z2); // frintp z17.d, p3/m, z2.d + __ sve_fsqrt(z16, __ S, p5, z20); // fsqrt z16.s, p5/m, z20.s + __ sve_fsub(z19, __ D, p0, z1); // fsub z19.d, p0/m, z19.d, z1.d + __ sve_fmad(z17, __ D, p2, z16, z17); // fmad z17.d, p2/m, z16.d, z17.d + __ sve_fmla(z0, __ S, p1, z2, z23); // fmla z0.s, p1/m, z2.s, z23.s + __ sve_fmls(z6, __ D, p2, z20, z14); // fmls z6.d, p2/m, z20.d, z14.d + __ sve_fnmla(z29, __ D, p3, z3, z3); // fnmla z29.d, p3/m, z3.d, z3.d + __ sve_fnmls(z9, __ S, p0, z24, z27); // fnmls z9.s, p0/m, z24.s, z27.s + __ sve_mla(z19, __ S, p5, z7, z25); // mla z19.s, p5/m, z7.s, z25.s + __ sve_mls(z13, __ B, p1, z7, z25); // mls z13.b, p1/m, z7.b, z25.b + __ sve_and(z21, z17, z17); // and z21.d, z17.d, z17.d + __ sve_eor(z3, z9, z19); // eor z3.d, z9.d, z19.d + __ sve_orr(z7, z11, z14); // orr z7.d, z11.d, z14.d + __ sve_bic(z17, z11, z13); // bic z17.d, z11.d, z13.d + __ sve_uzp1(z17, __ H, z30, z17); // uzp1 z17.h, z30.h, z17.h + __ sve_uzp2(z15, __ S, z14, z26); // uzp2 z15.s, z14.s, z26.s // SVEReductionOp - __ sve_andv(v3, __ H, p0, z22); // andv h3, p0, z22.h - __ sve_orv(v3, __ B, p6, z27); // orv b3, p6, z27.b - __ sve_eorv(v19, __ D, p5, z7); // eorv d19, p5, z7.d - __ sve_smaxv(v21, __ H, p3, z5); // smaxv h21, p3, z5.h - __ sve_sminv(v25, __ S, p1, z21); // sminv s25, p1, z21.s - __ sve_fminv(v17, __ S, p0, z3); // fminv s17, p0, z3.s - __ sve_fmaxv(v19, __ S, p3, z7); // fmaxv s19, p3, z7.s - __ sve_fadda(v14, __ S, p4, z17); // fadda s14, p4, s14, z17.s - __ sve_uaddv(v13, __ D, p6, z17); // uaddv d13, p6, z17.d + __ sve_andv(v27, __ H, p5, z7); // andv h27, p5, z7.h + __ sve_orv(v5, __ H, p7, z27); // orv h5, p7, z27.h + __ sve_eorv(v0, __ S, p3, z24); // eorv s0, p3, z24.s + __ sve_smaxv(v20, __ S, p0, z3); // smaxv s20, p0, z3.s + __ sve_sminv(v25, __ D, p1, z25); // sminv d25, p1, z25.d + __ sve_fminv(v17, __ S, p4, z1); // fminv s17, p4, z1.s + __ sve_fmaxv(v14, __ S, p7, z13); // fmaxv s14, p7, z13.s + __ sve_fadda(v17, __ D, p0, z30); // fadda d17, p0, d17, z30.d + __ sve_uaddv(v22, __ H, p5, z29); // uaddv d22, p5, z29.h __ bind(forth); @@ -1144,144 +1192,155 @@ 0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061, 0x120cb166, 0x321764bc, 0x52174681, 0x720c0227, 0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01, - 0x14000000, 0x17ffffd7, 0x140003a6, 0x94000000, - 0x97ffffd4, 0x940003a3, 0x3400000a, 0x34fffa2a, - 0x3400740a, 0x35000008, 0x35fff9c8, 0x350073a8, - 0xb400000b, 0xb4fff96b, 0xb400734b, 0xb500001d, - 0xb5fff91d, 0xb50072fd, 0x10000013, 0x10fff8b3, - 0x10007293, 0x90000013, 0x36300016, 0x3637f836, - 0x36307216, 0x3758000c, 0x375ff7cc, 0x375871ac, + 0x14000000, 0x17ffffd7, 0x140003d2, 0x94000000, + 0x97ffffd4, 0x940003cf, 0x3400000a, 0x34fffa2a, + 0x3400798a, 0x35000008, 0x35fff9c8, 0x35007928, + 0xb400000b, 0xb4fff96b, 0xb40078cb, 0xb500001d, + 0xb5fff91d, 0xb500787d, 0x10000013, 0x10fff8b3, + 0x10007813, 0x90000013, 0x36300016, 0x3637f836, + 0x36307796, 0x3758000c, 0x375ff7cc, 0x3758772c, 0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc, 0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f, 0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016, 0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0, - 0x54006f80, 0x54000001, 0x54fff541, 0x54006f21, - 0x54000002, 0x54fff4e2, 0x54006ec2, 0x54000002, - 0x54fff482, 0x54006e62, 0x54000003, 0x54fff423, - 0x54006e03, 0x54000003, 0x54fff3c3, 0x54006da3, - 0x54000004, 0x54fff364, 0x54006d44, 0x54000005, - 0x54fff305, 0x54006ce5, 0x54000006, 0x54fff2a6, - 0x54006c86, 0x54000007, 0x54fff247, 0x54006c27, - 0x54000008, 0x54fff1e8, 0x54006bc8, 0x54000009, - 0x54fff189, 0x54006b69, 0x5400000a, 0x54fff12a, - 0x54006b0a, 0x5400000b, 0x54fff0cb, 0x54006aab, - 0x5400000c, 0x54fff06c, 0x54006a4c, 0x5400000d, - 0x54fff00d, 0x540069ed, 0x5400000e, 0x54ffefae, - 0x5400698e, 0x5400000f, 0x54ffef4f, 0x5400692f, + 0x54007500, 0x54000001, 0x54fff541, 0x540074a1, + 0x54000002, 0x54fff4e2, 0x54007442, 0x54000002, + 0x54fff482, 0x540073e2, 0x54000003, 0x54fff423, + 0x54007383, 0x54000003, 0x54fff3c3, 0x54007323, + 0x54000004, 0x54fff364, 0x540072c4, 0x54000005, + 0x54fff305, 0x54007265, 0x54000006, 0x54fff2a6, + 0x54007206, 0x54000007, 0x54fff247, 0x540071a7, + 0x54000008, 0x54fff1e8, 0x54007148, 0x54000009, + 0x54fff189, 0x540070e9, 0x5400000a, 0x54fff12a, + 0x5400708a, 0x5400000b, 0x54fff0cb, 0x5400702b, + 0x5400000c, 0x54fff06c, 0x54006fcc, 0x5400000d, + 0x54fff00d, 0x54006f6d, 0x5400000e, 0x54ffefae, + 0x54006f0e, 0x5400000f, 0x54ffef4f, 0x54006eaf, 0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60, - 0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, - 0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200, - 0xd63f0280, 0xc80a7d1b, 0xc800fea1, 0xc85f7fb1, - 0xc85fff9d, 0xc89ffee1, 0xc8dffe95, 0x88167e7b, - 0x880bfcd0, 0x885f7c11, 0x885ffd44, 0x889ffed8, - 0x88dffe6a, 0x48017fc5, 0x4808fe2c, 0x485f7dc9, - 0x485ffc27, 0x489ffe05, 0x48dffd82, 0x080a7c6c, - 0x081cff4e, 0x085f7d5e, 0x085ffeae, 0x089ffd2d, - 0x08dfff76, 0xc87f4d7c, 0xc87fcc5e, 0xc8220417, - 0xc82cb5f0, 0x887f55b1, 0x887ff90b, 0x88382c2d, - 0x883aedb5, 0xf819928b, 0xb803e21c, 0x381f713b, - 0x781ce322, 0xf850f044, 0xb85e129e, 0x385e92f1, - 0x785ff35d, 0x39801921, 0x7881318b, 0x78dce02b, - 0xb8829313, 0xfc45f318, 0xbc5d50af, 0xfc001375, - 0xbc1951b7, 0xf8008c0a, 0xb801dc03, 0x38009dca, - 0x781fdf3d, 0xf8570e0c, 0xb85faecc, 0x385f6d6d, - 0x785ebea0, 0x38804cd7, 0x789cbce3, 0x78df9c9c, - 0xb89eed18, 0xfc40cd6e, 0xbc5bdd93, 0xfc103c14, - 0xbc040c08, 0xf81a2783, 0xb81ca4eb, 0x381e855b, - 0x7801b4e6, 0xf853654d, 0xb85d74af, 0x384095a2, - 0x785ec5bc, 0x389e15a9, 0x789dc703, 0x78c06474, - 0xb89ff667, 0xfc57e51e, 0xbc4155f9, 0xfc05a6ee, - 0xbc1df408, 0xf835da2a, 0xb836d9a4, 0x3833580d, - 0x7826cb6c, 0xf8706900, 0xb87ae880, 0x3865db2e, - 0x78714889, 0x38a7789b, 0x78beca2f, 0x78f6c810, - 0xb8bef956, 0xfc6afabd, 0xbc734963, 0xfc3d5b8d, - 0xbc25fbb7, 0xf9189d05, 0xb91ecb1d, 0x39187a33, - 0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176, - 0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422, - 0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a, - 0xbd1b1869, 0x5800597b, 0x1800000b, 0xf8945060, - 0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035, - 0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380, - 0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11, - 0x2b2a278d, 0xcb22aa0f, 0x6b2d29bd, 0x8b2cce8c, - 0xab2b877e, 0xcb21c8ee, 0xeb3ba47d, 0x3a4d400e, - 0x7a5132c6, 0xba5e622e, 0xfa53814c, 0x3a52d8c2, - 0x7a4d8924, 0xba4b3aab, 0xfa4d7882, 0x1a96804c, - 0x1a912618, 0x5a90b0e6, 0x5a96976b, 0x9a9db06a, - 0x9a9b374c, 0xda95c14f, 0xda89c6fe, 0x5ac0015e, - 0x5ac005fd, 0x5ac00bdd, 0x5ac012b9, 0x5ac01404, - 0xdac002b1, 0xdac0061d, 0xdac00a95, 0xdac00e66, - 0xdac0107e, 0xdac01675, 0x1ac00b0b, 0x1ace0f3b, - 0x1ad121c3, 0x1ad825e7, 0x1ad92a3c, 0x1adc2f42, - 0x9ada0b25, 0x9ad10e1b, 0x9acc22a6, 0x9acc2480, - 0x9adc2a3b, 0x9ad12c5c, 0x9bce7dea, 0x9b597c6e, - 0x1b0e166f, 0x1b1ae490, 0x9b023044, 0x9b089e3d, - 0x9b391083, 0x9b24c73a, 0x9bb15f40, 0x9bbcc6af, - 0x7ea3d55b, 0x1e3908e0, 0x1e2f18c9, 0x1e2a29fd, - 0x1e273a22, 0x7ef7d56b, 0x1e770ba7, 0x1e6b1b6e, - 0x1e78288b, 0x1e6e39ec, 0x1f1c3574, 0x1f17f98b, - 0x1f2935da, 0x1f2574ea, 0x1f4b306f, 0x1f5ec7cf, - 0x1f6f3e93, 0x1f6226a9, 0x1e2040fb, 0x1e20c3dd, - 0x1e214031, 0x1e21c0c2, 0x1e22c06a, 0x1e604178, - 0x1e60c027, 0x1e61400b, 0x1e61c223, 0x1e6240dc, - 0x1e3800d6, 0x9e380360, 0x1e78005a, 0x9e7800e5, - 0x1e22017c, 0x9e2201b9, 0x1e6202eb, 0x9e620113, - 0x1e2602b1, 0x9e660299, 0x1e270233, 0x9e6703a2, - 0x1e2822c0, 0x1e7322a0, 0x1e202288, 0x1e602168, - 0x293c19f4, 0x2966387b, 0x69762970, 0xa9041dc7, - 0xa9475c0c, 0x29b61ccd, 0x29ee3c5e, 0x69ee0764, - 0xa9843977, 0xa9f46ebd, 0x28ba16b6, 0x28fc44db, - 0x68f61430, 0xa8b352cd, 0xa8c56d5e, 0x28024565, - 0x2874134e, 0xa8027597, 0xa87b1aa0, 0x0c40734f, - 0x4cdfa177, 0x0cc76ee8, 0x4cdf2733, 0x0d40c23d, - 0x4ddfcaf8, 0x0dd9ccaa, 0x4c408d51, 0x0cdf85ec, - 0x4d60c239, 0x0dffcbc1, 0x4de9ce30, 0x4cc24999, - 0x0c404a7a, 0x4d40e6af, 0x4ddfe9b9, 0x0dddef8e, - 0x4cdf07b1, 0x0cc000fb, 0x0d60e238, 0x0dffe740, - 0x0de2eb2c, 0x0e31baf6, 0x4e31bb9b, 0x0e71b8a4, - 0x4e71b907, 0x4eb1b8e6, 0x0e30a841, 0x4e30ab7a, - 0x0e70aa0f, 0x4e70a862, 0x4eb0a9cd, 0x6e30f9cd, - 0x0e31ab38, 0x2e31ab17, 0x4e31a8a4, 0x6e31aa93, - 0x0e71aa0f, 0x2e71a820, 0x4e71a8a4, 0x6e71aab4, - 0x4eb1a98b, 0x6eb1abdd, 0x6eb0fa0f, 0x7e30fad5, - 0x7e70f8a4, 0x7eb0f9ee, 0x7ef0faf6, 0x0e20bb59, - 0x4e20b8e6, 0x0e60b9ac, 0x4e60b9ee, 0x0ea0b9cd, - 0x4ea0b9ee, 0x4ee0b949, 0x0ea0fb59, 0x4ea0fbbc, - 0x4ee0f96a, 0x2ea0fa93, 0x6ea0f98b, 0x6ee0fa51, - 0x2ea1fad5, 0x6ea1fa0f, 0x6ee1fab4, 0x2e205b17, - 0x6e205b7a, 0x0e271cc5, 0x4e281ce6, 0x0eb11e0f, - 0x4eb11e0f, 0x2e3b1f59, 0x6e321e30, 0x0e3d879b, - 0x4e3a8738, 0x0e71860f, 0x4e7b8759, 0x0eb085ee, - 0x4eac856a, 0x4eef85cd, 0x0e30d5ee, 0x4e36d6b4, - 0x4e63d441, 0x2e3886f6, 0x6e2087fe, 0x2e7085ee, - 0x6e648462, 0x2ea884e6, 0x6ea58483, 0x6ee98507, - 0x0ebad738, 0x4ea2d420, 0x4efdd79b, 0x0e3f9fdd, - 0x4e279cc5, 0x0e679cc5, 0x4e7f9fdd, 0x0ead9d8b, - 0x4ebb9f59, 0x2ea2d420, 0x6ea0d7fe, 0x6ee2d420, - 0x2e33de51, 0x6e3edfbc, 0x6e7bdf59, 0x0e6b9549, - 0x4e7b9759, 0x0eae95ac, 0x4eb1960f, 0x0e2dcd8b, - 0x4e2ccd6a, 0x4e73ce51, 0x2e7a9738, 0x6e7796d5, - 0x2eb99717, 0x6ea29420, 0x0eb2ce30, 0x4eaccd6a, - 0x4ee8cce6, 0x2e3effbc, 0x6e28fce6, 0x6e67fcc5, - 0x0e2764c5, 0x4e3666b4, 0x0e736651, 0x4e71660f, - 0x0eb36651, 0x4ebf67dd, 0x0e3ca77a, 0x4e3ea7bc, - 0x0e63a441, 0x4e7da79b, 0x0ea2a420, 0x4eb6a6b4, - 0x0e3ef7bc, 0x4e31f60f, 0x4e6ef5ac, 0x0e2c6d6a, - 0x4e3e6fbc, 0x0e7e6fbc, 0x4e756e93, 0x0eb86ef6, - 0x4eac6d6a, 0x0e26aca4, 0x4e20affe, 0x0e76aeb4, - 0x4e6aad28, 0x0ea0affe, 0x4eb3ae51, 0x0eacf56a, - 0x4ebdf79b, 0x4ee4f462, 0x2e3a8f38, 0x6e268ca4, - 0x2e658c83, 0x6e6a8d28, 0x2eb88ef6, 0x6eb38e51, - 0x6eef8dcd, 0x0e26e4a4, 0x4e3ee7bc, 0x4e79e717, - 0x0e3736d5, 0x4e3b3759, 0x0e7a3738, 0x4e653483, - 0x0eb93717, 0x4ebc377a, 0x4ef93717, 0x2e3035ee, - 0x6e3736d5, 0x2e653483, 0x6e793717, 0x2eaa3528, - 0x6eba3738, 0x6ef53693, 0x2e313e0f, 0x6e323e30, - 0x2e643c62, 0x6e633c41, 0x2ea23c20, 0x6eba3f38, - 0x6ee63ca4, 0x2ea5e483, 0x6eade58b, 0x6ee0e7fe, - 0x0e3d3f9b, 0x4e2b3d49, 0x0e7b3f59, 0x4e643c62, - 0x0eae3dac, 0x4eb33e51, 0x4ee03ffe, 0x2e23e441, - 0x6e2ee5ac, 0x6e7ee7bc, 0xba5fd3e3, 0x3a5f03e5, + 0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f, + 0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf, + 0xd503239f, 0xd50321df, 0xd50323ff, 0xd50323df, + 0xd503211f, 0xd503233f, 0xd503231f, 0xd503215f, + 0xd503237f, 0xd503235f, 0xd69f03e0, 0xd6bf03e0, + 0xd5033fdf, 0xd503207f, 0xd50320ff, 0xd5033e9f, + 0xd50332bf, 0xd61f0200, 0xd63f0280, 0xdac123ea, + 0xdac127fb, 0xdac12be8, 0xdac12fe0, 0xdac133e1, + 0xdac137f5, 0xdac13bf1, 0xdac13ffd, 0xdac147fd, + 0xd61f0b9f, 0xd61f0c3f, 0xd63f0aff, 0xd63f0ebf, + 0xdac143f4, 0xc8167e7b, 0xc80bfcd0, 0xc85f7c11, + 0xc85ffd44, 0xc89ffed8, 0xc8dffe6a, 0x88017fc5, + 0x8808fe2c, 0x885f7dc9, 0x885ffc27, 0x889ffe05, + 0x88dffd82, 0x480a7c6c, 0x481cff4e, 0x485f7d5e, + 0x485ffeae, 0x489ffd2d, 0x48dfff76, 0x081c7d73, + 0x081efc53, 0x085f7ee2, 0x085ffc01, 0x089ffe0c, + 0x08dffded, 0xc87f55b1, 0xc87ff90b, 0xc8382c2d, + 0xc83aedb5, 0x887f0d94, 0x887f87a6, 0x88262e04, + 0x8824b2be, 0xf8061366, 0xb802d151, 0x381e32da, + 0x781ce155, 0xf847d30e, 0xb85f0307, 0x39403448, + 0x785c333e, 0x389f2183, 0x789e422a, 0x78dfb075, + 0xb8817322, 0xfc5bb039, 0xbc40637d, 0xfc02919d, + 0xbc18d2c2, 0xf8003cba, 0xb8199cb4, 0x381e7d88, + 0x781c7c54, 0xf8516fae, 0xb8404fad, 0x385f7e78, + 0x785edf63, 0x389fbc31, 0x789f3e71, 0x78de6d75, + 0xb89c4d21, 0xfc509efa, 0xbc581eb6, 0xfc128ced, + 0xbc198dac, 0xf81134b4, 0xb81b679d, 0x381ea704, + 0x781eb52d, 0xf85c94fa, 0xb858d46d, 0x3840c4a1, + 0x785de5a8, 0x389e5697, 0x789fe4d4, 0x78dd6629, + 0xb89e24d5, 0xfc5e36d0, 0xbc5fd569, 0xfc03c756, + 0xbc1fe7b0, 0xf824cac1, 0xb82d7bd7, 0x382c596c, + 0x78207999, 0xf86058f1, 0xb86e5a61, 0x3869784c, + 0x787bc936, 0x38aff995, 0x78b078dc, 0x78f6ca39, + 0xb8bdea24, 0xfc63f825, 0xbc6d5a38, 0xfc37fa31, + 0xbc25dbd1, 0xf91ba97d, 0xb91e4abc, 0x391b485c, + 0x7919c380, 0xf95e18f9, 0xb958a860, 0x395f20be, + 0x7958f6ee, 0x399bea6a, 0x799b363d, 0x79da47d9, + 0xb99d5851, 0xfd5da60f, 0xbd584fcc, 0xfd1db821, + 0xbd1e9965, 0x58ffdb71, 0x18ffdb42, 0xf886f320, + 0xd8ffdb00, 0xf8bb49c0, 0xf99815c0, 0x1a0f0320, + 0x3a030301, 0x5a140311, 0x7a0d000b, 0x9a07015c, + 0xba1001e4, 0xda140182, 0xfa0d01bd, 0x0b2c6cce, + 0x2b3e5331, 0xcb2e0620, 0x6b3de709, 0x8b20cac1, + 0xab362f8c, 0xcb31518a, 0xeb2acf8f, 0x3a57d262, + 0x7a493226, 0xba4832a2, 0xfa454261, 0x3a518acc, + 0x7a472a23, 0xba5cba05, 0xfa439ac5, 0x1a8cb35d, + 0x1a8f355b, 0x5a9e9395, 0x5a9e3769, 0x9a9dd1fd, + 0x9a8406b9, 0xda9d62b1, 0xda868695, 0x5ac0007e, + 0x5ac00675, 0x5ac00b0b, 0x5ac01360, 0x5ac015d9, + 0xdac001c3, 0xdac004f1, 0xdac00b0f, 0xdac00e3c, + 0xdac01059, 0xdac0179a, 0xdac10325, 0xdac1077a, + 0xdac10a30, 0xdac10ea6, 0xdac1100c, 0xdac11584, + 0xdac11a3b, 0xdac11f9c, 0xd71f0851, 0xd71f0d4f, + 0xd73f09ce, 0xd73f0c79, 0x1ace0a6f, 0x1ac40e05, + 0x1ac4233a, 0x1acc2442, 0x1ac82a3d, 0x1ac42c67, + 0x9ada0899, 0x9ad10c99, 0x9ad12340, 0x9ad525f7, + 0x9adb2a3c, 0x9ac02c6a, 0x9bc97f27, 0x9b5d7de6, + 0x1b02454f, 0x1b0bdd67, 0x9b173ba7, 0x9b0b917b, + 0x9b2f3998, 0x9b3cb574, 0x9bb7798b, 0x9ba9b5da, + 0x7ea5d4ea, 0x1e2309fd, 0x1e2f198b, 0x1e312bde, + 0x1e2f3a93, 0x7ef5d52f, 0x1e7b0922, 0x1e7e1ba7, + 0x1e622831, 0x1e633946, 0x1f070578, 0x1f03c40b, + 0x1f3618dc, 0x1f3a0b60, 0x1f5c2ce5, 0x1f4bddb9, + 0x1f715513, 0x1f734699, 0x1e2043a2, 0x1e20c116, + 0x1e214275, 0x1e21c174, 0x1e22c291, 0x1e6041e6, + 0x1e60c063, 0x1e61407c, 0x1e61c1db, 0x1e62414e, + 0x1e38016c, 0x9e380151, 0x1e7800f9, 0x9e7801c7, + 0x1e22001c, 0x9e220016, 0x1e6202ec, 0x9e6201ad, + 0x1e2601c7, 0x9e660107, 0x1e270234, 0x9e6703dc, + 0x1e222200, 0x1e702120, 0x1e202288, 0x1e6023a8, + 0x29266b01, 0x29462d85, 0x69463f75, 0xa90272c5, + 0xa97e467b, 0x29aa1f4d, 0x29fa54cd, 0x69c27b74, + 0xa9b81555, 0xa9fa12ee, 0x2884321d, 0x28cc477a, + 0x68f451c4, 0xa8b909d0, 0xa8f060f7, 0x281069e0, + 0x2866191a, 0xa8392b2f, 0xa8760670, 0x0c4073db, + 0x4cdfa079, 0x0cca6e1e, 0x4cdf2670, 0x0d40c317, + 0x4ddfc948, 0x0dd7ce89, 0x4c408c62, 0x0cdf87c8, + 0x4d60c344, 0x0dffca23, 0x4df0cd7d, 0x4cd74801, + 0x0c404aa0, 0x4d40e4e5, 0x4ddfe8e1, 0x0dcfeca2, + 0x4cdf07bb, 0x0cc70098, 0x0d60e2ef, 0x0dffe6ae, + 0x0df9e934, 0x0e31bb17, 0x4e31bb7a, 0x0e71b8c5, + 0x4e71b8e6, 0x4eb1ba0f, 0x0e30aa0f, 0x4e30ab59, + 0x0e70aa30, 0x4e70ab9b, 0x4eb0ab38, 0x6e30fa0f, + 0x0e31ab59, 0x2e31a9ee, 0x4e31a96a, 0x6e31a9cd, + 0x0e71a9ee, 0x2e71aab4, 0x4e71a841, 0x6e71aaf6, + 0x4eb1abfe, 0x6eb1a9ee, 0x6eb0f862, 0x7e30f8e6, + 0x7e70f883, 0x7eb0f907, 0x7ef0fb38, 0x0e20b820, + 0x4e20bb9b, 0x0e60bbdd, 0x4e60b8c5, 0x0ea0b8c5, + 0x4ea0bbdd, 0x4ee0b98b, 0x0ea0fb59, 0x4ea0f820, + 0x4ee0fbfe, 0x2ea0f820, 0x6ea0fa51, 0x6ee0fbbc, + 0x2ea1fb59, 0x6ea1f949, 0x6ee1fb59, 0x2e2059ac, + 0x6e205a0f, 0x0e2d1d8b, 0x4e2c1d6a, 0x0eb31e51, + 0x4eba1f38, 0x2e371ed5, 0x6e391f17, 0x0e228420, + 0x4e328630, 0x0e6c856a, 0x4e6884e6, 0x0ebe87bc, + 0x4ea884e6, 0x4ee784c5, 0x0e27d4c5, 0x4e36d6b4, + 0x4e73d651, 0x2e31860f, 0x6e338651, 0x2e7f87dd, + 0x6e7c877a, 0x2ebe87bc, 0x6ea38441, 0x6efd879b, + 0x0ea2d420, 0x4eb6d6b4, 0x4efed7bc, 0x0e319e0f, + 0x4e2e9dac, 0x0e6c9d6a, 0x4e7e9fbc, 0x0ebe9fbc, + 0x4eb59e93, 0x2eb8d6f6, 0x6eacd56a, 0x6ee6d4a4, + 0x2e20dffe, 0x6e36deb4, 0x6e6add28, 0x0e6097fe, + 0x4e739651, 0x0eac956a, 0x4ebd979b, 0x0e24cc62, + 0x4e3acf38, 0x4e66cca4, 0x2e659483, 0x6e6a9528, + 0x2eb896f6, 0x6eb39651, 0x0eafcdcd, 0x4ea6cca4, + 0x4efecfbc, 0x2e39ff17, 0x6e37fed5, 0x6e7bff59, + 0x0e3a6738, 0x4e256483, 0x0e796717, 0x4e7c677a, + 0x0eb96717, 0x4eb065ee, 0x0e37a6d5, 0x4e25a483, + 0x0e79a717, 0x4e6aa528, 0x0ebaa738, 0x4eb5a693, + 0x0e31f60f, 0x4e32f630, 0x4e64f462, 0x0e236c41, + 0x4e226c20, 0x0e7a6f38, 0x4e666ca4, 0x0ea56c83, + 0x4ead6d8b, 0x0e20affe, 0x4e3daf9b, 0x0e6bad49, + 0x4e7baf59, 0x0ea4ac62, 0x4eaeadac, 0x0eb3f651, + 0x4ea0f7fe, 0x4ee3f441, 0x2e2e8dac, 0x6e3e8fbc, + 0x2e628c20, 0x6e738e51, 0x2eae8dac, 0x6eb38e51, + 0x6ef78ed5, 0x0e2ee5ac, 0x4e3de79b, 0x4e7fe7dd, + 0x0e2037fe, 0x4e233441, 0x0e7b3759, 0x4e7d379b, + 0x0ea634a4, 0x4ebf37dd, 0x4ee53483, 0x2e2834e6, + 0x6e3f37dd, 0x2e7b3759, 0x6e733651, 0x2eaa3528, + 0x6ea93507, 0x6eee35ac, 0x2e223c20, 0x6e353e93, + 0x2e633c41, 0x6e793f17, 0x2ea43c62, 0x6ea23c20, + 0x6eea3d28, 0x2eb9e717, 0x6ebbe759, 0x6ef1e60f, + 0x0e3f3fdd, 0x4e253c83, 0x0e6c3d6a, 0x4e783ef6, + 0x0eac3d6a, 0x4ea63ca4, 0x4ef33e51, 0x2e23e441, + 0x6e2de58b, 0x6e69e507, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, 0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f, 0xc8247cbf, 0x88267fff, 0x4e010fe0, 0x5e040420, 0x4e081fe1, 0x4e0c1fe1, 0x4e0a1fe1, @@ -1336,48 +1395,48 @@ 0x1e703000, 0x1e721000, 0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e7c1000, - 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf8208193, - 0xf83101b6, 0xf83c13fe, 0xf821239a, 0xf824309e, - 0xf826535e, 0xf8304109, 0xf82c7280, 0xf8216058, - 0xf8a08309, 0xf8ba03d0, 0xf8a312ea, 0xf8aa21e4, - 0xf8a2310b, 0xf8aa522f, 0xf8a2418a, 0xf8ac71af, - 0xf8a26287, 0xf8fa8090, 0xf8e20184, 0xf8f01215, - 0xf8f022ab, 0xf8f7334c, 0xf8f751dc, 0xf8eb4038, - 0xf8ec715f, 0xf8f06047, 0xf863826d, 0xf8710070, - 0xf86113cb, 0xf86521e8, 0xf87d301e, 0xf8745287, - 0xf87742bc, 0xf87b70b9, 0xf8616217, 0xb83f8185, - 0xb82901fc, 0xb83d13f6, 0xb83320bf, 0xb82e33f0, - 0xb830529b, 0xb830416c, 0xb82973c6, 0xb831639b, - 0xb8be8147, 0xb8b4008a, 0xb8b81231, 0xb8b623a3, - 0xb8af3276, 0xb8b35056, 0xb8af4186, 0xb8b071ab, - 0xb8b763c1, 0xb8f38225, 0xb8e202d0, 0xb8ed12aa, - 0xb8fd219b, 0xb8fb3023, 0xb8ff5278, 0xb8f14389, - 0xb8fb70ef, 0xb8f563f7, 0xb87983e2, 0xb87b0150, - 0xb8771073, 0xb8702320, 0xb87a3057, 0xb870508c, - 0xb87c43be, 0xb87070db, 0xb86961fd, 0xce273c87, - 0xce080ac9, 0xce7e8e9b, 0xce808b45, 0xce79806e, - 0xce758768, 0xcec0835a, 0xce608ad8, 0x2520d264, - 0x2521cf80, 0x058074c1, 0x054242c9, 0x05004476, - 0x25a0df08, 0x25a1c206, 0x0583288b, 0x05401c3a, - 0x05027e8d, 0x2520ce05, 0x25a1cb0a, 0x0580989a, - 0x0540e096, 0x0500fb73, 0x2560c2ce, 0x2521d590, - 0x05803e97, 0x05400d31, 0x05003ed0, 0x2520c623, - 0x25a1cdd1, 0x058052ac, 0x0540ba33, 0x05003ed7, - 0x25a0c6cd, 0x2521cf00, 0x0583c5b1, 0x05407336, - 0x05001e62, 0x04e400f4, 0x04a80407, 0x65c402d3, - 0x65cb0ac9, 0x659007c5, 0x0456ac36, 0x04c01608, - 0x049a048f, 0x041087a8, 0x04db13bc, 0x045aac49, - 0x041900fc, 0x0453963a, 0x04d192a8, 0x049016a5, - 0x0497b3b6, 0x049ea093, 0x04180677, 0x04081a77, - 0x04ca09c8, 0x04011eb1, 0x04dca15e, 0x6580812c, - 0x65cd9098, 0x65c68b66, 0x65c793cd, 0x65c297d6, - 0x049dae69, 0x6582bd34, 0x6580ae6d, 0x6581aa78, - 0x658db211, 0x65818160, 0x65a48def, 0x65ea041d, - 0x65e9201a, 0x65e34b1c, 0x65ed7b87, 0x04d1598a, - 0x049d6c51, 0x04273295, 0x04ba3022, 0x04713209, - 0x04e23080, 0x05ab68ce, 0x057d6e0e, 0x045a22c3, - 0x04183b63, 0x04d934f3, 0x04482cb5, 0x048a26b9, - 0x65872071, 0x65862cf3, 0x6598322e, 0x04c13a2d, + 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf82a822f, + 0xf822018a, 0xf82c11af, 0xf8222287, 0xf83a3090, + 0xf8225184, 0xf8304215, 0xf83072ab, 0xf837634c, + 0xf8b781dc, 0xf8ab0038, 0xf8ac115f, 0xf8b02047, + 0xf8a3326d, 0xf8b15070, 0xf8a143cb, 0xf8a571e8, + 0xf8bd601e, 0xf8f48287, 0xf8f702bc, 0xf8fb10b9, + 0xf8e12217, 0xf8ff3185, 0xf8e951fc, 0xf8fd43f6, + 0xf8f370bf, 0xf8ee63f0, 0xf870829b, 0xf870016c, + 0xf86913c6, 0xf871239b, 0xf87e3147, 0xf874508a, + 0xf8784231, 0xf87673a3, 0xf86f6276, 0xb8338056, + 0xb82f0186, 0xb83011ab, 0xb83723c1, 0xb8333225, + 0xb82252d0, 0xb82d42aa, 0xb83d719b, 0xb83b6023, + 0xb8bf8278, 0xb8b10389, 0xb8bb10ef, 0xb8b523f7, + 0xb8b933e2, 0xb8bb5150, 0xb8b74073, 0xb8b07320, + 0xb8ba6057, 0xb8f0808c, 0xb8fc03be, 0xb8f010db, + 0xb8e921fd, 0xb8e730e4, 0xb8ef52e9, 0xb8e84382, + 0xb8f570bf, 0xb8fb6220, 0xb86f8344, 0xb86802dc, + 0xb87b133b, 0xb8772080, 0xb8663010, 0xb864502f, + 0xb86a40a7, 0xb86a70fc, 0xb87462b7, 0xce284145, + 0xce1108de, 0xce7c8fab, 0xce96eb42, 0xce7b81ae, + 0xce6586f0, 0xcec081a2, 0xce6a89ea, 0x25a0cc5a, + 0x25a1d143, 0x05800e44, 0x05406531, 0x05002d42, + 0x2520c677, 0x25a1cd07, 0x0580687b, 0x0543bb42, + 0x050044a6, 0x25a0c86c, 0x25a1d358, 0x05800500, + 0x05400ad3, 0x05000e06, 0x25e0c951, 0x25a1d54a, + 0x05839276, 0x0540ea6f, 0x0503c8a4, 0x25a0d448, + 0x2521d056, 0x058059c9, 0x05406d05, 0x05003cb6, + 0x25a0d0c8, 0x2561c4f9, 0x05809904, 0x05400e5d, + 0x0500cadd, 0x043c0162, 0x04ba0427, 0x65c801d1, + 0x65c50b15, 0x65d60635, 0x0416b67d, 0x040012e4, + 0x04da06f3, 0x04508113, 0x04db1a2e, 0x041aa7d5, + 0x0419158a, 0x04938709, 0x045198c4, 0x049019bb, + 0x0497b6de, 0x045ebd3e, 0x04d80693, 0x044809a9, + 0x044a0313, 0x04810e33, 0x049ca410, 0x658089eb, + 0x65cd85ef, 0x65c68145, 0x6587801a, 0x65c29d53, + 0x04ddb4e3, 0x6582aebc, 0x65c0ae3a, 0x65c1ac51, + 0x658db690, 0x65c18033, 0x65f18a11, 0x65b70440, + 0x65ee2a86, 0x65e34c7d, 0x65bb6309, 0x049954f3, + 0x041964ed, 0x04313235, 0x04b33123, 0x046e3167, + 0x04ed3171, 0x05716bd1, 0x05ba6dcf, 0x045a34fb, + 0x04583f65, 0x04992f00, 0x04882074, 0x04ca2739, + 0x65873031, 0x65863dae, 0x65d823d1, 0x044137b6, }; // END Generated code -- do not edit diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java index d7ecc7c04ef06..14478eb21949c 100644 --- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java +++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -54,6 +54,7 @@ public class CodeInstallationTest { protected final TargetDescription target; protected final ConstantReflectionProvider constantReflection; protected final TestHotSpotVMConfig config; + protected final Architecture arch; public CodeInstallationTest() { JVMCIBackend backend = JVMCI.getRuntime().getHostJVMCIBackend(); @@ -61,7 +62,8 @@ public CodeInstallationTest() { codeCache = backend.getCodeCache(); target = backend.getTarget(); constantReflection = backend.getConstantReflection(); - config = new TestHotSpotVMConfig(HotSpotJVMCIRuntime.runtime().getConfigStore()); + arch = codeCache.getTarget().arch; + config = new TestHotSpotVMConfig(HotSpotJVMCIRuntime.runtime().getConfigStore(), arch); } protected interface TestCompiler { @@ -70,7 +72,6 @@ protected interface TestCompiler { } private TestAssembler createAssembler() { - Architecture arch = codeCache.getTarget().arch; if (arch instanceof AMD64) { return new AMD64TestAssembler(codeCache, config); } else if (arch instanceof AArch64) { diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/TestHotSpotVMConfig.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/TestHotSpotVMConfig.java index c3c1ab25f5e99..9468027bc85b0 100644 --- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/TestHotSpotVMConfig.java +++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/TestHotSpotVMConfig.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -22,13 +22,16 @@ */ package jdk.vm.ci.code.test; +import jdk.vm.ci.aarch64.AArch64; +import jdk.vm.ci.code.Architecture; import jdk.vm.ci.hotspot.HotSpotVMConfigAccess; import jdk.vm.ci.hotspot.HotSpotVMConfigStore; public class TestHotSpotVMConfig extends HotSpotVMConfigAccess { - public TestHotSpotVMConfig(HotSpotVMConfigStore config) { + public TestHotSpotVMConfig(HotSpotVMConfigStore config, Architecture arch) { super(config); + ropProtection = (arch instanceof AArch64) ? getFieldValue("VM_Version::_rop_protection", Boolean.class) : false; } public final boolean useCompressedOops = getFlag("UseCompressedOops", Boolean.class); @@ -48,4 +51,6 @@ public TestHotSpotVMConfig(HotSpotVMConfigStore config) { public final int maxOopMapStackOffset = getFieldValue("CompilerToVM::Data::_max_oop_map_stack_offset", Integer.class, "int"); public final int heapWordSize = getConstant("HeapWordSize", Integer.class); + + public final boolean ropProtection; } diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/aarch64/AArch64TestAssembler.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/aarch64/AArch64TestAssembler.java index 098095598ba69..47feeac193895 100644 --- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/aarch64/AArch64TestAssembler.java +++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/aarch64/AArch64TestAssembler.java @@ -1,6 +1,6 @@ /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, Arm Limited. All rights reserved. + * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Arm Limited. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -254,6 +254,9 @@ public void emitGrowStack(int size) { public void emitPrologue() { // Must be patchable by NativeJump::patch_verified_entry emitNop(); + if (config.ropProtection) { + code.emitInt(0xdac103be); // pacia x30, x29 + } code.emitInt(0xa9be7bfd); // stp x29, x30, [sp, #-32]! code.emitInt(0x910003fd); // mov x29, sp @@ -469,6 +472,9 @@ public void emitIntRet(Register a) { emitMov(AArch64.r0, a); code.emitInt(0x910003bf); // mov sp, x29 code.emitInt(0xa8c27bfd); // ldp x29, x30, [sp], #32 + if (config.ropProtection) { + code.emitInt(0xdac113be); // autia x30, x29 + } code.emitInt(0xd65f03c0); // ret } @@ -477,6 +483,9 @@ public void emitFloatRet(Register a) { assert a == AArch64.v0 : "Unimplemented move " + a; code.emitInt(0x910003bf); // mov sp, x29 code.emitInt(0xa8c27bfd); // ldp x29, x30, [sp], #32 + if (config.ropProtection) { + code.emitInt(0xdac113be); // autia x30, x29 + } code.emitInt(0xd65f03c0); // ret }