diff --git a/internal/asm/amd64/assembler.go b/internal/asm/amd64/assembler.go index ddd68cb5e0..1191699ffd 100644 --- a/internal/asm/amd64/assembler.go +++ b/internal/asm/amd64/assembler.go @@ -88,7 +88,11 @@ type Assembler interface { // the destination is the constant `value`. CompileMemoryToConst(instruction asm.Instruction, srcBaseReg asm.Register, srcOffset int64, value int64) asm.Node - // CompileLoadStaticConstToRegister adds an instruction where the source operand is asm.StaticConst located in the + // CompileStaticConstToRegister adds an instruction where the source operand is asm.StaticConst located in the // memory and the destination is the dstReg. - CompileLoadStaticConstToRegister(instruction asm.Instruction, c asm.StaticConst, dstReg asm.Register) error + CompileStaticConstToRegister(instruction asm.Instruction, c asm.StaticConst, dstReg asm.Register) error + + // CompileRegisterToStaticConst adds an instruction where the destination operand is asm.StaticConst located in the + // memory and the source is the srcReg. + CompileRegisterToStaticConst(instruction asm.Instruction, srcReg asm.Register, c asm.StaticConst) error } diff --git a/internal/asm/amd64/consts.go b/internal/asm/amd64/consts.go index b7083e4eb4..16c53baa1f 100644 --- a/internal/asm/amd64/consts.go +++ b/internal/asm/amd64/consts.go @@ -167,7 +167,7 @@ const ( MOVBQSX // MOVBQZX is the MOVZX instruction for single-byte in 64-bit mode. https://www.felixcloutier.com/x86/movzx MOVBQZX - // MOVL is the MOV instruction for a word. + // MOVL is the MOV instruction for a double word. MOVL // MOVLQSX is the MOVSXD instruction. https://www.felixcloutier.com/x86/movsx:movsxd MOVLQSX diff --git a/internal/asm/amd64/impl.go b/internal/asm/amd64/impl.go index 42b822681b..da52bd96e7 100644 --- a/internal/asm/amd64/impl.go +++ b/internal/asm/amd64/impl.go @@ -204,6 +204,7 @@ var ( OperandTypesConstToRegister = OperandTypes{OperandTypeConst, OperandTypeRegister} OperandTypesConstToMemory = OperandTypes{OperandTypeConst, OperandTypeMemory} OperandTypesStaticConstToRegister = OperandTypes{OperandTypeStaticConst, OperandTypeRegister} + OperandTypesRegisterToStaticConst = OperandTypes{OperandTypeRegister, OperandTypeStaticConst} ) // String implements fmt.Stringer @@ -295,6 +296,8 @@ func (a *AssemblerImpl) EncodeNode(n *NodeImpl) (err error) { err = a.EncodeMemoryToConst(n) case OperandTypesStaticConstToRegister: err = a.encodeStaticConstToRegister(n) + case OperandTypesRegisterToStaticConst: + err = a.encodeRegisterToStaticConst(n) default: err = fmt.Errorf("encoder undefined for [%s] operand type", n.Types) } diff --git a/internal/asm/amd64/impl_staticconst.go b/internal/asm/amd64/impl_staticconst.go index 63c3684fc7..f8e3f0945f 100644 --- a/internal/asm/amd64/impl_staticconst.go +++ b/internal/asm/amd64/impl_staticconst.go @@ -74,15 +74,96 @@ func (a *AssemblerImpl) maybeFlushConstants(isEndOfFunction bool) { } } +type staticConstOpcode struct { + opcode []byte + mandatoryPrefix byte + rex RexPrefix +} + +var registerToStaticConstOpcodes = map[asm.Instruction]staticConstOpcode{ + // https://www.felixcloutier.com/x86/cmp + CMPL: {opcode: []byte{0x3b}}, + CMPQ: {opcode: []byte{0x3b}, rex: RexPrefixW}, +} + +func (a *AssemblerImpl) encodeRegisterToStaticConst(n *NodeImpl) (err error) { + opc, ok := registerToStaticConstOpcodes[n.Instruction] + if !ok { + return errorEncodingUnsupported(n) + } + return a.encodeStaticConstImpl(n, opc.opcode, opc.rex, opc.mandatoryPrefix) +} + +var staticConstToRegisterOpcodes = map[asm.Instruction]struct { + opcode []byte + mandatoryPrefix byte + rex RexPrefix +}{ + // https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 + MOVDQU: {mandatoryPrefix: 0xf3, opcode: []byte{0x0f, 0x6f}}, + // https://www.felixcloutier.com/x86/lea + LEAQ: {opcode: []byte{0x8d}, rex: RexPrefixW}, + // https://www.felixcloutier.com/x86/movupd + MOVUPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x10}}, + // https://www.felixcloutier.com/x86/mov + MOVL: {opcode: []byte{0x8b}}, + MOVQ: {opcode: []byte{0x8b}, rex: RexPrefixW}, + // https://www.felixcloutier.com/x86/ucomisd + UCOMISD: {opcode: []byte{0x0f, 0x2e}, mandatoryPrefix: 0x66}, + // https://www.felixcloutier.com/x86/ucomiss + UCOMISS: {opcode: []byte{0x0f, 0x2e}}, + // https://www.felixcloutier.com/x86/subss + SUBSS: {opcode: []byte{0x0f, 0x5c}, mandatoryPrefix: 0xf3}, + // https://www.felixcloutier.com/x86/subsd + SUBSD: {opcode: []byte{0x0f, 0x5c}, mandatoryPrefix: 0xf2}, + // https://www.felixcloutier.com/x86/cmp + CMPL: {opcode: []byte{0x39}}, + CMPQ: {opcode: []byte{0x39}, rex: RexPrefixW}, + // https://www.felixcloutier.com/x86/add + ADDL: {opcode: []byte{0x03}}, + ADDQ: {opcode: []byte{0x03}, rex: RexPrefixW}, +} + +var staticConstToVectorRegisterOpcodes = map[asm.Instruction]staticConstOpcode{ + // https://www.felixcloutier.com/x86/mov + MOVL: {opcode: []byte{0x0f, 0x6e}, mandatoryPrefix: 0x66}, + MOVQ: {opcode: []byte{0x0f, 0x7e}, mandatoryPrefix: 0xf3}, +} + func (a *AssemblerImpl) encodeStaticConstToRegister(n *NodeImpl) (err error) { + var opc staticConstOpcode + var ok bool + if IsVectorRegister(n.DstReg) && (n.Instruction == MOVL || n.Instruction == MOVQ) { + opc, ok = staticConstToVectorRegisterOpcodes[n.Instruction] + } else { + opc, ok = staticConstToRegisterOpcodes[n.Instruction] + } + if !ok { + return errorEncodingUnsupported(n) + } + return a.encodeStaticConstImpl(n, opc.opcode, opc.rex, opc.mandatoryPrefix) +} + +// encodeStaticConstImpl encodes an instruction where mod:r/m points to the memory location of the static constant n.staticConst, +// and the other operand is the register given at n.SrcReg or n.DstReg. +func (a *AssemblerImpl) encodeStaticConstImpl(n *NodeImpl, opcode []byte, rex RexPrefix, mandatoryPrefix byte) (err error) { a.pool.addConst(n.staticConst) - dstReg3Bits, rexPrefix, err := register3bits(n.DstReg, registerSpecifierPositionModRMFieldReg) + var reg asm.Register + if n.DstReg != asm.NilRegister { + reg = n.DstReg + } else { + reg = n.SrcReg + } + + reg3Bits, rexPrefix, err := register3bits(reg, registerSpecifierPositionModRMFieldReg) if err != nil { return err } - var inst []byte // mandatory prefix + rexPrefix |= rex + + var inst []byte key := asm.StaticConstKey(n.staticConst) a.pool.offsetFinalizedCallbacks[key] = append(a.pool.offsetFinalizedCallbacks[key], func(offsetOfConstInBinary int) { @@ -96,37 +177,18 @@ func (a *AssemblerImpl) encodeStaticConstToRegister(n *NodeImpl) (err error) { a.pool.firstUseOffsetInBinary = &nodeOffset // https://wiki.osdev.org/X86-64_Instruction_Encoding#32.2F64-bit_addressing - modRM := 0b00_000_101 | // Indicate "MOVDQU [RIP + 32bit displacement], DstReg" encoding. - (dstReg3Bits << 3) // Place the DstReg on ModRM:reg. - - var mandatoryPrefix byte - var opcodes []byte - switch n.Instruction { - case MOVDQU: - // https://www.felixcloutier.com/x86/movdqu:vmovdqu8:vmovdqu16:vmovdqu32:vmovdqu64 - mandatoryPrefix = 0xf3 - opcodes = []byte{0x0f, 0x6f} - case LEAQ: - // https://www.felixcloutier.com/x86/lea - rexPrefix |= RexPrefixW - opcodes = []byte{0x8d} - case MOVUPD: - // https://www.felixcloutier.com/x86/movupd - mandatoryPrefix = 0x66 - opcodes = []byte{0x0f, 0x10} - default: - err = errorEncodingUnsupported(n) - return - } + modRM := 0b00_000_101 | // Indicate "[RIP + 32bit displacement]" encoding. + (reg3Bits << 3) // Place the reg on ModRM:reg. if mandatoryPrefix != 0 { inst = append(inst, mandatoryPrefix) } + if rexPrefix != RexPrefixNone { inst = append(inst, rexPrefix) } - inst = append(inst, opcodes...) + inst = append(inst, opcode...) inst = append(inst, modRM, 0x0, 0x0, 0x0, 0x0, // Preserve 4 bytes for displacement. ) @@ -135,8 +197,8 @@ func (a *AssemblerImpl) encodeStaticConstToRegister(n *NodeImpl) (err error) { return } -// CompileLoadStaticConstToRegister implements Assembler.CompileLoadStaticConstToRegister. -func (a *AssemblerImpl) CompileLoadStaticConstToRegister(instruction asm.Instruction, c asm.StaticConst, dstReg asm.Register) (err error) { +// CompileStaticConstToRegister implements Assembler.CompileStaticConstToRegister. +func (a *AssemblerImpl) CompileStaticConstToRegister(instruction asm.Instruction, c asm.StaticConst, dstReg asm.Register) (err error) { if len(c)%2 != 0 { err = fmt.Errorf("the length of a static constant must be even but was %d", len(c)) return @@ -147,3 +209,16 @@ func (a *AssemblerImpl) CompileLoadStaticConstToRegister(instruction asm.Instruc n.staticConst = c return } + +// CompileRegisterToStaticConst implements Assembler.CompileRegisterToStaticConst. +func (a *AssemblerImpl) CompileRegisterToStaticConst(instruction asm.Instruction, srcReg asm.Register, c asm.StaticConst) (err error) { + if len(c)%2 != 0 { + err = fmt.Errorf("the length of a static constant must be even but was %d", len(c)) + return + } + + n := a.newNode(instruction, OperandTypesRegisterToStaticConst) + n.SrcReg = srcReg + n.staticConst = c + return +} diff --git a/internal/asm/amd64/impl_staticconst_test.go b/internal/asm/amd64/impl_staticconst_test.go index 602e37524a..c78adff590 100644 --- a/internal/asm/amd64/impl_staticconst_test.go +++ b/internal/asm/amd64/impl_staticconst_test.go @@ -22,15 +22,15 @@ func TestConstPool_addConst(t *testing.T) { } } -func TestAssemblerImpl_CompileLoadStaticConstToRegister(t *testing.T) { +func TestAssemblerImpl_CompileStaticConstToRegister(t *testing.T) { a := NewAssemblerImpl() t.Run("odd count of bytes", func(t *testing.T) { - err := a.CompileLoadStaticConstToRegister(MOVDQU, []byte{1}, RegAX) + err := a.CompileStaticConstToRegister(MOVDQU, []byte{1}, RegAX) require.Error(t, err) }) t.Run("ok", func(t *testing.T) { cons := []byte{1, 2, 3, 4} - err := a.CompileLoadStaticConstToRegister(MOVDQU, cons, RegAX) + err := a.CompileStaticConstToRegister(MOVDQU, cons, RegAX) require.NoError(t, err) actualNode := a.Current require.Equal(t, MOVDQU, actualNode.Instruction) @@ -40,6 +40,24 @@ func TestAssemblerImpl_CompileLoadStaticConstToRegister(t *testing.T) { }) } +func TestAssemblerImpl_CompileRegisterToStaticConst(t *testing.T) { + a := NewAssemblerImpl() + t.Run("odd count of bytes", func(t *testing.T) { + err := a.CompileRegisterToStaticConst(MOVDQU, RegAX, []byte{1}) + require.Error(t, err) + }) + t.Run("ok", func(t *testing.T) { + cons := []byte{1, 2, 3, 4} + err := a.CompileRegisterToStaticConst(MOVDQU, RegAX, cons) + require.NoError(t, err) + actualNode := a.Current + require.Equal(t, MOVDQU, actualNode.Instruction) + require.Equal(t, OperandTypeRegister, actualNode.Types.src) + require.Equal(t, OperandTypeStaticConst, actualNode.Types.dst) + require.Equal(t, cons, actualNode.staticConst) + }) +} + func TestAssemblerImpl_maybeFlushConstants(t *testing.T) { t.Run("no consts", func(t *testing.T) { a := NewAssemblerImpl() @@ -128,46 +146,483 @@ func TestAssemblerImpl_maybeFlushConstants(t *testing.T) { } } +func TestAssemblerImpl_encodeRegisterToStaticConst(t *testing.T) { + tests := []struct { + name string + ins asm.Instruction + c asm.StaticConst + reg asm.Register + ud2sBeforeConst int + exp []byte + }{ + { + name: "cmp r12d, dword ptr [rip + 0x14]", + ins: CMPL, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegR12, + ud2sBeforeConst: 10, + exp: []byte{ + // cmp r12d, dword ptr [rip + 0x14] + // where rip = 0x7, therefore [rip + 0x14] = [0x1b] + 0x44, 0x3b, 0x25, 0x14, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x1b: consts + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + }, + { + name: "cmp eax, dword ptr [rip + 0x14]", + ins: CMPL, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegAX, + ud2sBeforeConst: 10, + exp: []byte{ + // cmp eax, dword ptr [rip + 0x14] + // where rip = 0x6, therefore [rip + 0x14] = [0x1a] + 0x3b, 0x5, 0x14, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x1a: consts + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + }, + { + name: "cmp r12, qword ptr [rip]", + ins: CMPQ, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegR12, + ud2sBeforeConst: 0, + exp: []byte{ + // cmp r12, qword ptr [rip] + // where rip points to the end of this instruction == the const. + 0x4c, 0x3b, 0x25, 0x0, 0x0, 0x0, 0x0, + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + }, + { + name: "cmp rsp, qword ptr [rip + 0xa]", + ins: CMPQ, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegSP, + ud2sBeforeConst: 5, + exp: []byte{ + // cmp rsp, qword ptr [rip + 0xa] + // where rip = 0x6, therefore [rip + 0xa] = [0x11] + 0x48, 0x3b, 0x25, 0xa, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x11: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + }, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + a := NewAssemblerImpl() + + err := a.CompileRegisterToStaticConst(tc.ins, tc.reg, tc.c) + require.NoError(t, err) + + for i := 0; i < tc.ud2sBeforeConst; i++ { + a.CompileStandAlone(UD2) + } + + actual, err := a.Assemble() + require.NoError(t, err) + + require.Equal(t, tc.exp, actual, hex.EncodeToString(actual)) + }) + } +} + func TestAssemblerImpl_encodeStaticConstToRegister(t *testing.T) { - consts := []asm.StaticConst{ - {0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, - {0x22, 0x22, 0x22, 0x22}, - {0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33}, + tests := []struct { + name string + ins asm.Instruction + c asm.StaticConst + reg asm.Register + ud2sBeforeConst int + exp []byte + }{ + { + name: "movdqu xmm14, xmmword ptr [rip + 0xa]", + ins: MOVDQU, + c: []byte{ + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, + }, + reg: RegX14, + ud2sBeforeConst: 5, + exp: []byte{ + // movdqu xmm14, xmmword ptr [rip + 0xa] + // where rip = 0x9, therefore [rip + 0xa] = [0x13] + 0xf3, 0x44, 0xf, 0x6f, 0x35, 0xa, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x13: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, + }, + }, + { + name: "movupd xmm1, xmmword ptr [rip + 0xa]", + ins: MOVUPD, + c: []byte{ + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, + }, + reg: RegX1, + ud2sBeforeConst: 5, + exp: []byte{ + // movdqu xmm14, xmmword ptr [rip + 0xa] + // where rip = 0x8, therefore [rip + 0xa] = [0x12] + 0x66, 0xf, 0x10, 0xd, 0xa, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x12: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, + }, + }, + { + name: "lea r11, [rip + 0x14]", + ins: LEAQ, + c: []byte{ + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, + }, + reg: RegR11, + ud2sBeforeConst: 10, + exp: []byte{ + // lea r11, [rip + 0x14] + // where rip = 0x7, therefore [rip + 0x14] = [0x1b] + 0x4c, 0x8d, 0x1d, 0x14, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x1b: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, + }, + }, + { + name: "mov r11d, dword ptr [rip + 0x3c]", + ins: MOVL, + c: []byte{ + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, + }, + reg: RegR11, + ud2sBeforeConst: 30, + exp: []byte{ + // mov r11d, dword ptr [rip + 0x3c] + // where rip = 0x7, therefore [rip + 0x3c] = [0x43] + 0x44, 0x8b, 0x1d, 0x3c, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x43: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, + }, + }, + { + name: "movd xmm14, dword ptr [rip + 0x3c]", + ins: MOVL, + c: []byte{ + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, + }, + reg: RegX14, + ud2sBeforeConst: 30, + exp: []byte{ + // movd xmm14, dword ptr [rip + 0x3c] + // where rip = 0x9, therefore [rip + 0x3c] = [0x45] + 0x66, 0x44, 0xf, 0x6e, 0x35, 0x3c, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x45: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, + }, + }, + { + name: "mov rsp, qword ptr [rip + 0x3c]", + ins: MOVQ, + c: []byte{ + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, + }, + reg: RegSP, + ud2sBeforeConst: 30, + exp: []byte{ + // mov rsp, qword ptr [rip + 0x3c] + // where rip = 0x7, therefore [rip + 0x3c] = [0x43] + 0x48, 0x8b, 0x25, 0x3c, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x43: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, + }, + }, + { + name: "movq xmm1, qword ptr [rip + 0x3c]", + ins: MOVQ, + c: []byte{ + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, + }, + reg: RegX1, + ud2sBeforeConst: 30, + exp: []byte{ + // movq xmm1, qword ptr [rip + 0x3c] + // where rip = 0x8, therefore [rip + 0x3c] = [0x44] + 0xf3, 0xf, 0x7e, 0xd, 0x3c, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x44: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, + }, + }, + { + name: "ucomisd xmm15, qword ptr [rip + 6]", + ins: UCOMISD, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegX15, + ud2sBeforeConst: 3, + exp: []byte{ + // ucomisd xmm15, qword ptr [rip + 6] + // where rip = 0x9, therefore [rip + 6] = [0xf] + 0x66, 0x44, 0xf, 0x2e, 0x3d, 0x6, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0xf: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + }, + }, + { + name: "ucomiss xmm15, dword ptr [rip + 6]", + ins: UCOMISS, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegX15, + ud2sBeforeConst: 3, + exp: []byte{ + // ucomiss xmm15, dword ptr [rip + 6] + // where rip = 0x8, therefore [rip + 6] = [0xe] + 0x44, 0xf, 0x2e, 0x3d, 0x6, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0xe: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + }, + }, + { + name: "subss xmm13, dword ptr [rip + 0xa]", + ins: SUBSS, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegX13, + ud2sBeforeConst: 5, + exp: []byte{ + // subss xmm13, dword ptr [rip + 0xa] + // where rip = 0x9, therefore [rip + 0xa] = [0x13] + 0xf3, 0x44, 0xf, 0x5c, 0x2d, 0xa, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x12: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + }, + }, + { + name: "subsd xmm1, qword ptr [rip + 0xa]", + ins: SUBSD, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegX1, + ud2sBeforeConst: 5, + exp: []byte{ + // subsd xmm1, qword ptr [rip + 0xa] + // where rip = 0x8, therefore [rip + 0xa] = [0x12] + 0xf2, 0xf, 0x5c, 0xd, 0xa, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x12: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + }, + { + name: "cmp dword ptr [rip + 0x14], r12d", + ins: CMPL, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegR12, + ud2sBeforeConst: 10, + exp: []byte{ + // cmp dword ptr [rip + 0x14], r12d + // where rip = 0x7, therefore [rip + 0x14] = [0x1b] + 0x44, 0x39, 0x25, 0x14, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x1b: consts + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + }, + { + name: "cmp dword ptr [rip + 0x14], eax", + ins: CMPL, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegAX, + ud2sBeforeConst: 10, + exp: []byte{ + // cmp dword ptr [rip + 0x14], eax + // where rip = 0x6, therefore [rip + 0x14] = [0x1a] + 0x39, 0x5, 0x14, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x1a: consts + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + }, + { + name: "cmp qword ptr [rip], r12", + ins: CMPQ, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegR12, + ud2sBeforeConst: 0, + exp: []byte{ + // cmp qword ptr [rip], r12 + // where rip points to the end of this instruction == the const. + 0x4c, 0x39, 0x25, 0x0, 0x0, 0x0, 0x0, + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + }, + { + name: "cmp qword ptr [rip + 0xa], rsp", + ins: CMPQ, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegSP, + ud2sBeforeConst: 5, + exp: []byte{ + // cmp qword ptr [rip + 0xa], rsp + // where rip = 0x6, therefore [rip + 0xa] = [0x11] + 0x48, 0x39, 0x25, 0xa, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x11: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + }, + }, + { + name: "ucomiss xmm15, dword ptr [rip + 6]", + ins: UCOMISS, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegX15, + ud2sBeforeConst: 3, + exp: []byte{ + // ucomiss xmm15, dword ptr [rip + 6] + // where rip = 0x8, therefore [rip + 6] = [0xe] + 0x44, 0xf, 0x2e, 0x3d, 0x6, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0xe: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + }, + }, + { + name: "subss xmm13, dword ptr [rip + 0xa]", + ins: SUBSS, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegX13, + ud2sBeforeConst: 5, + exp: []byte{ + // subss xmm13, dword ptr [rip + 0xa] + // where rip = 0x9, therefore [rip + 0xa] = [0x13] + 0xf3, 0x44, 0xf, 0x5c, 0x2d, 0xa, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x12: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + }, + }, + { + name: "subsd xmm1, qword ptr [rip + 0xa]", + ins: SUBSD, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegX1, + ud2sBeforeConst: 5, + exp: []byte{ + // subsd xmm1, qword ptr [rip + 0xa] + // where rip = 0x8, therefore [rip + 0xa] = [0x12] + 0xf2, 0xf, 0x5c, 0xd, 0xa, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x12: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + }, + { + name: "add eax, dword ptr [rip + 0xa]", + ins: ADDL, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegAX, + ud2sBeforeConst: 5, + exp: []byte{ + // add eax, dword ptr [rip + 0xa] + // where rip = 0x6, therefore [rip + 0xa] = [0x10] + 0x3, 0x5, 0xa, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x10: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + }, + }, + { + name: "add rax, qword ptr [rip + 0xa]", + ins: ADDQ, + c: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8}, + reg: RegAX, + ud2sBeforeConst: 5, + exp: []byte{ + // add rax, dword ptr [rip + 0xa] + // where rip = 0x7, therefore [rip + 0xa] = [0x11] + 0x48, 0x3, 0x5, 0xa, 0x0, 0x0, 0x0, + // UD2 * ud2sBeforeConst + 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, 0xf, 0xb, + // 0x11: + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + }, + }, } - a := NewAssemblerImpl() - a.CompileStandAlone(UD2) // insert any dummy instruction before MOVDQUs. - err := a.CompileLoadStaticConstToRegister(MOVDQU, consts[0], RegX12) - require.NoError(t, err) - err = a.CompileLoadStaticConstToRegister(MOVUPD, consts[1], RegX0) - require.NoError(t, err) - err = a.CompileLoadStaticConstToRegister(LEAQ, consts[0], RegX0) - require.NoError(t, err) - err = a.CompileLoadStaticConstToRegister(MOVDQU, consts[2], RegX12) - require.NoError(t, err) - - actual, err := a.Assemble() - require.NoError(t, err) - - require.Equal(t, []byte{ - 0x0f, 0x0b, // dummy instruction. - // 0x2: movdqu xmm12, xmmword ptr [rip + 0x18] - // where rip = 0x0b, therefore [rip + 0x18] = [0x23] = consts[0]. - 0xf3, 0x44, 0x0f, 0x6f, 0x25, 0x18, 0x00, 0x00, 0x00, - // 0x0b: movupd xmm0, xmmword ptr [rip + 0x18] - // where rip = 0x13, therefore [rip + 0x18] = [0x2b] = consts[1]. - 0x66, 0x0f, 0x10, 0x05, 0x18, 0x00, 0x00, 0x00, - // 0x13: lea rax, [rip + 9] - // where rip = 0x1a, therefore [rip + 0x9] = [0x23] = consts[0]. - 0x48, 0x8d, 0x05, 0x09, 0x00, 0x00, 0x00, - // 0x1a: movdqu xmm12, xmmword ptr [rip + 0xc] - // where rip = 0x23, therefore [rip + 0xc] = [0x2f] = consts[2]. - 0xf3, 0x44, 0x0f, 0x6f, 0x25, 0x0c, 0x00, 0x00, 0x00, - // 0x23: consts[0] - 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, - // 0x2b: consts[1] - 0x22, 0x22, 0x22, 0x22, - // 0x2f: consts[2] - 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, - }, actual, hex.EncodeToString(actual)) + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + a := NewAssemblerImpl() + + err := a.CompileStaticConstToRegister(tc.ins, tc.c, tc.reg) + require.NoError(t, err) + + for i := 0; i < tc.ud2sBeforeConst; i++ { + a.CompileStandAlone(UD2) + } + + actual, err := a.Assemble() + require.NoError(t, err) + + require.Equal(t, tc.exp, actual, hex.EncodeToString(actual)) + }) + } } diff --git a/internal/engine/compiler/impl_amd64.go b/internal/engine/compiler/impl_amd64.go index 704df008e7..aac50b5507 100644 --- a/internal/engine/compiler/impl_amd64.go +++ b/internal/engine/compiler/impl_amd64.go @@ -16,77 +16,33 @@ import ( "github.com/tetratelabs/wazero/internal/asm/amd64" "github.com/tetratelabs/wazero/internal/buildoptions" "github.com/tetratelabs/wazero/internal/platform" + "github.com/tetratelabs/wazero/internal/u32" + "github.com/tetratelabs/wazero/internal/u64" "github.com/tetratelabs/wazero/internal/wasm" "github.com/tetratelabs/wazero/internal/wazeroir" ) var ( - zero64Bit uint64 = 0 - zero64BitAddress uintptr - minimum32BitSignedInt int32 = math.MinInt32 - minimum32BitSignedIntAddress uintptr - maximum32BitSignedInt int32 = math.MaxInt32 - maximum32BitSignedIntAddress uintptr - maximum32BitUnsignedInt uint32 = math.MaxUint32 - maximum32BitUnsignedIntAddress uintptr - minimum64BitSignedInt int64 = math.MinInt64 - minimum64BitSignedIntAddress uintptr - maximum64BitSignedInt int64 = math.MaxInt64 - maximum64BitSignedIntAddress uintptr - maximum64BitUnsignedInt uint64 = math.MaxUint64 - maximum64BitUnsignedIntAddress uintptr - float32SignBitMask uint32 = 1 << 31 - float32RestBitMask = ^float32SignBitMask - float32SignBitMaskAddress uintptr - float32RestBitMaskAddress uintptr - float64SignBitMask uint64 = 1 << 63 - float64RestBitMask = ^float64SignBitMask - float64SignBitMaskAddress uintptr - float64RestBitMaskAddress uintptr - float32ForMinimumSigned32bitInteger = math.Float32frombits(0xCF00_0000) - float32ForMinimumSigned32bitIntegerAddress uintptr - float64ForMinimumSigned32bitInteger = math.Float64frombits(0xC1E0_0000_0020_0000) - float64ForMinimumSigned32bitIntegerAddress uintptr - float32ForMinimumSigned64bitInteger = math.Float32frombits(0xDF00_0000) - float32ForMinimumSigned64bitIntegerAddress uintptr - float64ForMinimumSigned64bitInteger = math.Float64frombits(0xC3E0_0000_0000_0000) - float64ForMinimumSigned64bitIntegerAddress uintptr - float32ForMaximumSigned32bitIntPlusOne = math.Float32frombits(0x4F00_0000) - float32ForMaximumSigned32bitIntPlusOneAddress uintptr - float64ForMaximumSigned32bitIntPlusOne = math.Float64frombits(0x41E0_0000_0000_0000) - float64ForMaximumSigned32bitIntPlusOneAddress uintptr - float32ForMaximumSigned64bitIntPlusOne = math.Float32frombits(0x5F00_0000) - float32ForMaximumSigned64bitIntPlusOneAddress uintptr - float64ForMaximumSigned64bitIntPlusOne = math.Float64frombits(0x43E0_0000_0000_0000) - float64ForMaximumSigned64bitIntPlusOneAddress uintptr + minimum32BitSignedInt int32 = math.MinInt32 + maximum32BitSignedInt int32 = math.MaxInt32 + maximum32BitUnsignedInt uint32 = math.MaxUint32 + minimum64BitSignedInt int64 = math.MinInt64 + maximum64BitSignedInt int64 = math.MaxInt64 + maximum64BitUnsignedInt uint64 = math.MaxUint64 + float32SignBitMask uint32 = 1 << 31 + float32RestBitMask = ^float32SignBitMask + float64SignBitMask uint64 = 1 << 63 + float64RestBitMask = ^float64SignBitMask + float32ForMinimumSigned32bitInteger = uint32(0xCF00_0000) + float64ForMinimumSigned32bitInteger = uint64(0xC1E0_0000_0020_0000) + float32ForMinimumSigned64bitInteger = uint32(0xDF00_0000) + float64ForMinimumSigned64bitInteger = uint64(0xC3E0_0000_0000_0000) + float32ForMaximumSigned32bitIntPlusOne = uint32(0x4F00_0000) + float64ForMaximumSigned32bitIntPlusOne = uint64(0x41E0_0000_0000_0000) + float32ForMaximumSigned64bitIntPlusOne = uint32(0x5F00_0000) + float64ForMaximumSigned64bitIntPlusOne = uint64(0x43E0_0000_0000_0000) ) -func init() { - // TODO: what if these address exceed 32-bit address space? Even though AMD says 2GB memory space - // should be enough for everyone, we might end up in these circum stances. We access these variables - // via 32-bit displacement which cannot accommodate 64-bit addresses. - // https://stackoverflow.com/questions/31853189/x86-64-assembly-why-displacement-not-64-bits - zero64BitAddress = uintptr(unsafe.Pointer(&zero64Bit)) - minimum32BitSignedIntAddress = uintptr(unsafe.Pointer(&minimum32BitSignedInt)) - maximum32BitSignedIntAddress = uintptr(unsafe.Pointer(&maximum32BitSignedInt)) - maximum32BitUnsignedIntAddress = uintptr(unsafe.Pointer(&maximum32BitUnsignedInt)) - minimum64BitSignedIntAddress = uintptr(unsafe.Pointer(&minimum64BitSignedInt)) - maximum64BitSignedIntAddress = uintptr(unsafe.Pointer(&maximum64BitSignedInt)) - maximum64BitUnsignedIntAddress = uintptr(unsafe.Pointer(&maximum64BitUnsignedInt)) - float32SignBitMaskAddress = uintptr(unsafe.Pointer(&float32SignBitMask)) - float32RestBitMaskAddress = uintptr(unsafe.Pointer(&float32RestBitMask)) - float64SignBitMaskAddress = uintptr(unsafe.Pointer(&float64SignBitMask)) - float64RestBitMaskAddress = uintptr(unsafe.Pointer(&float64RestBitMask)) - float32ForMinimumSigned32bitIntegerAddress = uintptr(unsafe.Pointer(&float32ForMinimumSigned32bitInteger)) - float64ForMinimumSigned32bitIntegerAddress = uintptr(unsafe.Pointer(&float64ForMinimumSigned32bitInteger)) - float32ForMinimumSigned64bitIntegerAddress = uintptr(unsafe.Pointer(&float32ForMinimumSigned64bitInteger)) - float64ForMinimumSigned64bitIntegerAddress = uintptr(unsafe.Pointer(&float64ForMinimumSigned64bitInteger)) - float32ForMaximumSigned32bitIntPlusOneAddress = uintptr(unsafe.Pointer(&float32ForMaximumSigned32bitIntPlusOne)) - float64ForMaximumSigned32bitIntPlusOneAddress = uintptr(unsafe.Pointer(&float64ForMaximumSigned32bitIntPlusOne)) - float32ForMaximumSigned64bitIntPlusOneAddress = uintptr(unsafe.Pointer(&float32ForMaximumSigned64bitIntPlusOne)) - float64ForMaximumSigned64bitIntPlusOneAddress = uintptr(unsafe.Pointer(&float64ForMaximumSigned64bitIntPlusOne)) -} - var ( // amd64ReservedRegisterForCallEngine: pointer to callEngine (i.e. *callEngine as uintptr) amd64ReservedRegisterForCallEngine = amd64.RegR13 @@ -1525,9 +1481,13 @@ func (c *amd64Compiler) performDivisionOnInts(isRem, is32Bit, signed bool) error // Next we check if the quotient is the most negative value for the signed integer. // That means whether or not we try to do (math.MaxInt32 / -1) or (math.Math.Int64 / -1) respectively. if is32Bit { - c.assembler.CompileRegisterToMemory(amd64.CMPL, x1.register, asm.NilRegister, int64(minimum32BitSignedIntAddress)) + if err := c.assembler.CompileRegisterToStaticConst(amd64.CMPL, x1.register, u32.LeBytes(uint32(minimum32BitSignedInt))); err != nil { + return err + } } else { - c.assembler.CompileRegisterToMemory(amd64.CMPQ, x1.register, asm.NilRegister, int64(minimum64BitSignedIntAddress)) + if err := c.assembler.CompileRegisterToStaticConst(amd64.CMPQ, x1.register, u64.LeBytes(uint64(minimum64BitSignedInt))); err != nil { + return err + } } // If it doesn't equal, we jump to the normal case. @@ -1782,10 +1742,16 @@ func (c *amd64Compiler) compileNeg(o *wazeroir.OperationNeg) (err error) { // since we cannot take XOR directly with float reg and const. // And then negate the value by XOR it with the sign-bit mask. if o.Type == wazeroir.Float32 { - c.assembler.CompileMemoryToRegister(amd64.MOVL, asm.NilRegister, int64(float32SignBitMaskAddress), tmpReg) + err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, u32.LeBytes(float32SignBitMask), tmpReg) + if err != nil { + return err + } c.assembler.CompileRegisterToRegister(amd64.XORPS, tmpReg, target.register) } else { - c.assembler.CompileMemoryToRegister(amd64.MOVQ, asm.NilRegister, int64(float64SignBitMaskAddress), tmpReg) + err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, u64.LeBytes(float64SignBitMask), tmpReg) + if err != nil { + return err + } c.assembler.CompileRegisterToRegister(amd64.XORPD, tmpReg, target.register) } return nil @@ -1946,9 +1912,12 @@ func (c *amd64Compiler) compileCopysign(o *wazeroir.OperationCopysign) error { // Move the rest bit mask to the temp register. if is32Bit { - c.assembler.CompileMemoryToRegister(amd64.MOVL, asm.NilRegister, int64(float32RestBitMaskAddress), tmpReg) + err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, u32.LeBytes(float32RestBitMask), tmpReg) } else { - c.assembler.CompileMemoryToRegister(amd64.MOVQ, asm.NilRegister, int64(float64RestBitMaskAddress), tmpReg) + err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, u64.LeBytes(float64RestBitMask), tmpReg) + } + if err != nil { + return err } // Clear the sign bit of x1 via AND with the mask. @@ -1960,9 +1929,12 @@ func (c *amd64Compiler) compileCopysign(o *wazeroir.OperationCopysign) error { // Move the sign bit mask to the temp register. if is32Bit { - c.assembler.CompileMemoryToRegister(amd64.MOVL, asm.NilRegister, int64(float32SignBitMaskAddress), tmpReg) + err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, u32.LeBytes(float32SignBitMask), tmpReg) } else { - c.assembler.CompileMemoryToRegister(amd64.MOVQ, asm.NilRegister, int64(float64SignBitMaskAddress), tmpReg) + err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, u64.LeBytes(float64SignBitMask), tmpReg) + } + if err != nil { + return err } // Clear the non-sign bits of x2 via AND with the mask. @@ -2053,9 +2025,9 @@ func (c *amd64Compiler) emitUnsignedI32TruncFromFloat(isFloat32Bit, nonTrapping // First, we check the source float value is above or equal math.MaxInt32+1. if isFloat32Bit { - c.assembler.CompileMemoryToRegister(amd64.UCOMISS, asm.NilRegister, int64(float32ForMaximumSigned32bitIntPlusOneAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, u32.LeBytes(float32ForMaximumSigned32bitIntPlusOne), source.register) } else { - c.assembler.CompileMemoryToRegister(amd64.UCOMISD, asm.NilRegister, int64(float64ForMaximumSigned32bitIntPlusOneAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, u64.LeBytes(float64ForMaximumSigned32bitIntPlusOne), source.register) } // Check the parity flag (set when the value is NaN), and if it is set, we should raise an exception. @@ -2107,9 +2079,12 @@ func (c *amd64Compiler) emitUnsignedI32TruncFromFloat(isFloat32Bit, nonTrapping // First, we subtract the math.MaxInt32+1 from the original value so it can fit in signed 32-bit integer. c.assembler.SetJumpTargetOnNext(jmpAboveOrEqualMaxIn32PlusOne) if isFloat32Bit { - c.assembler.CompileMemoryToRegister(amd64.SUBSS, asm.NilRegister, int64(float32ForMaximumSigned32bitIntPlusOneAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.SUBSS, u32.LeBytes(float32ForMaximumSigned32bitIntPlusOne), source.register) } else { - c.assembler.CompileMemoryToRegister(amd64.SUBSD, asm.NilRegister, int64(float64ForMaximumSigned32bitIntPlusOneAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.SUBSD, u64.LeBytes(float64ForMaximumSigned32bitIntPlusOne), source.register) + } + if err != nil { + return err } // Then, convert the subtracted value as a signed 32-bit integer. @@ -2129,7 +2104,9 @@ func (c *amd64Compiler) emitUnsignedI32TruncFromFloat(isFloat32Bit, nonTrapping // Otherwise, we successfully converted the source float minus (math.MaxInt32+1) to int. // So, we retrieve the original source float value by adding the sign mask. - c.assembler.CompileMemoryToRegister(amd64.ADDL, asm.NilRegister, int64(float32SignBitMaskAddress), result) + if err = c.assembler.CompileStaticConstToRegister(amd64.ADDL, u32.LeBytes(float32SignBitMask), result); err != nil { + return err + } okJmpForAboveOrEqualMaxInt32PlusOne := c.assembler.CompileJump(amd64.JMP) @@ -2137,7 +2114,10 @@ func (c *amd64Compiler) emitUnsignedI32TruncFromFloat(isFloat32Bit, nonTrapping if !nonTrapping { c.compileExitFromNativeCode(nativeCallStatusIntegerOverflow) } else { - c.assembler.CompileMemoryToRegister(amd64.MOVL, asm.NilRegister, int64(maximum32BitUnsignedIntAddress), result) + err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, u32.LeBytes(maximum32BitUnsignedInt), result) + if err != nil { + return err + } } // We jump to the next instructions for valid cases. @@ -2167,9 +2147,12 @@ func (c *amd64Compiler) emitUnsignedI64TruncFromFloat(isFloat32Bit, nonTrapping // First, we check the source float value is above or equal math.MaxInt64+1. if isFloat32Bit { - c.assembler.CompileMemoryToRegister(amd64.UCOMISS, asm.NilRegister, int64(float32ForMaximumSigned64bitIntPlusOneAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, u32.LeBytes(float32ForMaximumSigned64bitIntPlusOne), source.register) } else { - c.assembler.CompileMemoryToRegister(amd64.UCOMISD, asm.NilRegister, int64(float64ForMaximumSigned64bitIntPlusOneAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, u64.LeBytes(float64ForMaximumSigned64bitIntPlusOne), source.register) + } + if err != nil { + return err } // Check the parity flag (set when the value is NaN), and if it is set, we should raise an exception. @@ -2221,9 +2204,12 @@ func (c *amd64Compiler) emitUnsignedI64TruncFromFloat(isFloat32Bit, nonTrapping // First, we subtract the math.MaxInt64+1 from the original value so it can fit in signed 64-bit integer. c.assembler.SetJumpTargetOnNext(jmpAboveOrEqualMaxIn32PlusOne) if isFloat32Bit { - c.assembler.CompileMemoryToRegister(amd64.SUBSS, asm.NilRegister, int64(float32ForMaximumSigned64bitIntPlusOneAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.SUBSS, u32.LeBytes(float32ForMaximumSigned64bitIntPlusOne), source.register) } else { - c.assembler.CompileMemoryToRegister(amd64.SUBSD, asm.NilRegister, int64(float64ForMaximumSigned64bitIntPlusOneAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.SUBSD, u64.LeBytes(float64ForMaximumSigned64bitIntPlusOne), source.register) + } + if err != nil { + return err } // Then, convert the subtracted value as a signed 64-bit integer. @@ -2243,7 +2229,9 @@ func (c *amd64Compiler) emitUnsignedI64TruncFromFloat(isFloat32Bit, nonTrapping // Otherwise, we successfully converted the the source float minus (math.MaxInt64+1) to int. // So, we retrieve the original source float value by adding the sign mask. - c.assembler.CompileMemoryToRegister(amd64.ADDQ, asm.NilRegister, int64(float64SignBitMaskAddress), result) + if err = c.assembler.CompileStaticConstToRegister(amd64.ADDQ, u64.LeBytes(float64SignBitMask), result); err != nil { + return err + } okJmpForAboveOrEqualMaxInt64PlusOne := c.assembler.CompileJump(amd64.JMP) @@ -2251,7 +2239,10 @@ func (c *amd64Compiler) emitUnsignedI64TruncFromFloat(isFloat32Bit, nonTrapping if !nonTrapping { c.compileExitFromNativeCode(nativeCallStatusIntegerOverflow) } else { - c.assembler.CompileMemoryToRegister(amd64.MOVQ, asm.NilRegister, int64(maximum64BitUnsignedIntAddress), result) + err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, u64.LeBytes(maximum64BitUnsignedInt), result) + if err != nil { + return err + } } // We jump to the next instructions for valid cases. @@ -2290,7 +2281,10 @@ func (c *amd64Compiler) emitSignedI32TruncFromFloat(isFloat32Bit, nonTrapping bo // 1) the source float value is either +-Inf or NaN, or it exceeds representative ranges of 32bit signed integer, or // 2) the source equals the minimum signed 32-bit (=-2147483648.000000) whose bit pattern is float32ForMinimumSigned32bitIntegerAddress for 32 bit float // or float64ForMinimumSigned32bitIntegerAddress for 64bit float. - c.assembler.CompileMemoryToRegister(amd64.CMPL, asm.NilRegister, int64(float32SignBitMaskAddress), result) + err = c.assembler.CompileStaticConstToRegister(amd64.CMPL, u32.LeBytes(float32SignBitMask), result) + if err != nil { + return err + } // Otherwise, jump to exit as the result is valid. okJmp := c.assembler.CompileJump(amd64.JNE) @@ -2321,9 +2315,12 @@ func (c *amd64Compiler) emitSignedI32TruncFromFloat(isFloat32Bit, nonTrapping bo // meaning that the value exceeds the lower bound of 32-bit signed integer range. c.assembler.SetJumpTargetOnNext(jmpIfNotNaN) if isFloat32Bit { - c.assembler.CompileMemoryToRegister(amd64.UCOMISS, asm.NilRegister, int64(float32ForMinimumSigned32bitIntegerAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, u32.LeBytes(float32ForMinimumSigned32bitInteger), source.register) } else { - c.assembler.CompileMemoryToRegister(amd64.UCOMISD, asm.NilRegister, int64(float64ForMinimumSigned32bitIntegerAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, u64.LeBytes(float64ForMinimumSigned32bitInteger), source.register) + } + if err != nil { + return err } if !nonTrapping { @@ -2338,9 +2335,9 @@ func (c *amd64Compiler) emitSignedI32TruncFromFloat(isFloat32Bit, nonTrapping bo // At this point, the value is the minimum signed 32-bit int (=-2147483648.000000) or larger than 32-bit maximum. // So, check if the value equals the minimum signed 32-bit int. if isFloat32Bit { - c.assembler.CompileMemoryToRegister(amd64.UCOMISS, asm.NilRegister, int64(zero64BitAddress), source.register) + c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, []byte{0, 0, 0, 0}, source.register) } else { - c.assembler.CompileMemoryToRegister(amd64.UCOMISD, asm.NilRegister, int64(zero64BitAddress), source.register) + c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, []byte{0, 0, 0, 0, 0, 0, 0, 0}, source.register) } jmpIfMinimumSignedInt := c.assembler.CompileJump(amd64.JCS) // jump if the value is minus (= the minimum signed 32-bit int). @@ -2351,7 +2348,7 @@ func (c *amd64Compiler) emitSignedI32TruncFromFloat(isFloat32Bit, nonTrapping bo // We jump to the next instructions for valid cases. c.assembler.SetJumpTargetOnNext(okJmp, jmpIfMinimumSignedInt) } else { - // Jump if the value does not exceeds the lower bound. + // Jump if the value does not exceed the lower bound. var jmpIfNotExceedsLowerBound asm.Node if isFloat32Bit { jmpIfNotExceedsLowerBound = c.assembler.CompileJump(amd64.JCC) @@ -2360,20 +2357,27 @@ func (c *amd64Compiler) emitSignedI32TruncFromFloat(isFloat32Bit, nonTrapping bo } // If the value exceeds the lower bound, we "saturate" it to the minimum. - c.assembler.CompileMemoryToRegister(amd64.MOVL, asm.NilRegister, int64(minimum32BitSignedIntAddress), result) + if err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, u32.LeBytes(uint32(minimum32BitSignedInt)), result); err != nil { + return err + } nonTrappingSaturatedMinimumJump := c.assembler.CompileJump(amd64.JMP) // Otherwise, the value is the minimum signed 32-bit int (=-2147483648.000000) or larger than 32-bit maximum. c.assembler.SetJumpTargetOnNext(jmpIfNotExceedsLowerBound) if isFloat32Bit { - c.assembler.CompileMemoryToRegister(amd64.UCOMISS, asm.NilRegister, int64(zero64BitAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, []byte{0, 0, 0, 0}, source.register) } else { - c.assembler.CompileMemoryToRegister(amd64.UCOMISD, asm.NilRegister, int64(zero64BitAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, []byte{0, 0, 0, 0, 0, 0, 0, 0}, source.register) + } + if err != nil { + return err } jmpIfMinimumSignedInt := c.assembler.CompileJump(amd64.JCS) // jump if the value is minus (= the minimum signed 32-bit int). // If the value exceeds signed 32-bit maximum, we saturate it to the maximum. - c.assembler.CompileMemoryToRegister(amd64.MOVL, asm.NilRegister, int64(maximum32BitSignedIntAddress), result) + if err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, u32.LeBytes(uint32(maximum32BitSignedInt)), result); err != nil { + return err + } c.assembler.SetJumpTargetOnNext(okJmp, nontrappingNanJump, nonTrappingSaturatedMinimumJump, jmpIfMinimumSignedInt) } @@ -2408,7 +2412,10 @@ func (c *amd64Compiler) emitSignedI64TruncFromFloat(isFloat32Bit, nonTrapping bo // 1) the source float value is either +-Inf or NaN, or it exceeds representative ranges of 32bit signed integer, or // 2) the source equals the minimum signed 32-bit (=-9223372036854775808.0) whose bit pattern is float32ForMinimumSigned64bitIntegerAddress for 32 bit float // or float64ForMinimumSigned64bitIntegerAddress for 64bit float. - c.assembler.CompileMemoryToRegister(amd64.CMPQ, asm.NilRegister, int64(float64SignBitMaskAddress), result) + err = c.assembler.CompileStaticConstToRegister(amd64.CMPQ, u64.LeBytes(float64SignBitMask), result) + if err != nil { + return err + } // Otherwise, we simply jump to exit as the result is valid. okJmp := c.assembler.CompileJump(amd64.JNE) @@ -2438,9 +2445,12 @@ func (c *amd64Compiler) emitSignedI64TruncFromFloat(isFloat32Bit, nonTrapping bo // meaning that the value exceeds the lower bound of 64-bit signed integer range. c.assembler.SetJumpTargetOnNext(jmpIfNotNaN) if isFloat32Bit { - c.assembler.CompileMemoryToRegister(amd64.UCOMISS, asm.NilRegister, int64(float32ForMinimumSigned64bitIntegerAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, u32.LeBytes(float32ForMinimumSigned64bitInteger), source.register) } else { - c.assembler.CompileMemoryToRegister(amd64.UCOMISD, asm.NilRegister, int64(float64ForMinimumSigned64bitIntegerAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, u64.LeBytes(float64ForMinimumSigned64bitInteger), source.register) + } + if err != nil { + return err } if !nonTrapping { @@ -2450,9 +2460,12 @@ func (c *amd64Compiler) emitSignedI64TruncFromFloat(isFloat32Bit, nonTrapping bo // At this point, the value is the minimum signed 64-bit int (=-9223372036854775808.0) or larger than 64-bit maximum. // So, check if the value equals the minimum signed 64-bit int. if isFloat32Bit { - c.assembler.CompileMemoryToRegister(amd64.UCOMISS, asm.NilRegister, int64(zero64BitAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, []byte{0, 0, 0, 0}, source.register) } else { - c.assembler.CompileMemoryToRegister(amd64.UCOMISD, asm.NilRegister, int64(zero64BitAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, []byte{0, 0, 0, 0, 0, 0, 0, 0}, source.register) + } + if err != nil { + return err } jmpIfMinimumSignedInt := c.assembler.CompileJump(amd64.JCS) // jump if the value is minus (= the minimum signed 64-bit int). @@ -2467,22 +2480,31 @@ func (c *amd64Compiler) emitSignedI64TruncFromFloat(isFloat32Bit, nonTrapping bo jmpIfNotExceedsLowerBound := c.assembler.CompileJump(amd64.JCC) // If the value exceeds the lower bound, we "saturate" it to the minimum. - c.assembler.CompileMemoryToRegister(amd64.MOVQ, asm.NilRegister, int64(minimum64BitSignedIntAddress), result) + err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, u64.LeBytes(uint64(minimum64BitSignedInt)), result) + if err != nil { + return err + } + nonTrappingSaturatedMinimumJump := c.assembler.CompileJump(amd64.JMP) // Otherwise, the value is the minimum signed 64-bit int (=-9223372036854775808.0) or larger than 64-bit maximum. // So, check if the value equals the minimum signed 64-bit int. c.assembler.SetJumpTargetOnNext(jmpIfNotExceedsLowerBound) if isFloat32Bit { - c.assembler.CompileMemoryToRegister(amd64.UCOMISS, asm.NilRegister, int64(zero64BitAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, []byte{0, 0, 0, 0}, source.register) } else { - c.assembler.CompileMemoryToRegister(amd64.UCOMISD, asm.NilRegister, int64(zero64BitAddress), source.register) + err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, []byte{0, 0, 0, 0, 0, 0, 0, 0}, source.register) + } + if err != nil { + return err } jmpIfMinimumSignedInt := c.assembler.CompileJump(amd64.JCS) // jump if the value is minus (= the minimum signed 64-bit int). // If the value exceeds signed 64-bit maximum, we saturate it to the maximum. - c.assembler.CompileMemoryToRegister(amd64.MOVQ, asm.NilRegister, int64(maximum64BitSignedIntAddress), result) + if err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, u64.LeBytes(uint64(maximum64BitSignedInt)), result); err != nil { + return err + } c.assembler.SetJumpTargetOnNext(okJmp, jmpIfMinimumSignedInt, nonTrappingSaturatedMinimumJump, nontrappingNanJump) } @@ -2874,17 +2896,20 @@ func (c *amd64Compiler) compileEqOrNeForFloats(x1Reg, x2Reg asm.Register, cmpIns } // compileEqz implements compiler.compileEqz for the amd64 architecture. -func (c *amd64Compiler) compileEqz(o *wazeroir.OperationEqz) error { +func (c *amd64Compiler) compileEqz(o *wazeroir.OperationEqz) (err error) { v := c.locationStack.pop() - if err := c.compileEnsureOnRegister(v); err != nil { + if err = c.compileEnsureOnRegister(v); err != nil { return err } switch o.Type { case wazeroir.UnsignedInt32: - c.assembler.CompileMemoryToRegister(amd64.CMPL, asm.NilRegister, int64(zero64BitAddress), v.register) + err = c.assembler.CompileStaticConstToRegister(amd64.CMPL, []byte{0, 0, 0, 0}, v.register) case wazeroir.UnsignedInt64: - c.assembler.CompileMemoryToRegister(amd64.CMPQ, asm.NilRegister, int64(zero64BitAddress), v.register) + err = c.assembler.CompileStaticConstToRegister(amd64.CMPQ, []byte{0, 0, 0, 0, 0, 0, 0, 0}, v.register) + } + if err != nil { + return err } // v is consumed by the cmp operation so release it. diff --git a/internal/engine/compiler/impl_vec_amd64.go b/internal/engine/compiler/impl_vec_amd64.go index 01974b1383..3e59bea006 100644 --- a/internal/engine/compiler/impl_vec_amd64.go +++ b/internal/engine/compiler/impl_vec_amd64.go @@ -495,12 +495,12 @@ func (c *amd64Compiler) compileV128Shuffle(o *wazeroir.OperationV128Shuffle) err } } - err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, consts[:16], tmp) + err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, consts[:16], tmp) if err != nil { return err } c.assembler.CompileRegisterToRegister(amd64.PSHUFB, tmp, v.register) - err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, consts[16:], tmp) + err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, consts[16:], tmp) if err != nil { return err } @@ -534,7 +534,7 @@ func (c *amd64Compiler) compileV128Swizzle(*wazeroir.OperationV128Swizzle) error return err } - err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, swizzleConst[:], tmp) + err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, swizzleConst[:], tmp) if err != nil { return err } @@ -957,7 +957,7 @@ func (c *amd64Compiler) compileV128ShrI8x16Impl(signed bool) error { } // Read the initial address of the mask table into gpTmp register. - err = c.assembler.CompileLoadStaticConstToRegister(amd64.LEAQ, i8x16LogicalSHRMaskTable[:], gpTmp) + err = c.assembler.CompileStaticConstToRegister(amd64.LEAQ, i8x16LogicalSHRMaskTable[:], gpTmp) if err != nil { return err } @@ -1042,7 +1042,7 @@ func (c *amd64Compiler) compileV128Shl(o *wazeroir.OperationV128Shl) error { } // Read the initial address of the mask table into gpTmp register. - err = c.assembler.CompileLoadStaticConstToRegister(amd64.LEAQ, i8x16SHLMaskTable[:], gpTmp) + err = c.assembler.CompileStaticConstToRegister(amd64.LEAQ, i8x16SHLMaskTable[:], gpTmp) if err != nil { return err } @@ -1754,7 +1754,7 @@ func (c *amd64Compiler) compileV128Popcnt(*wazeroir.OperationV128Popcnt) error { // Read the popcntMask into tmp1, and we have // tmp1 = [0xf, ..., 0xf] - if err := c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, popcntMask[:], tmp1); err != nil { + if err := c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, popcntMask[:], tmp1); err != nil { return err } @@ -1775,7 +1775,7 @@ func (c *amd64Compiler) compileV128Popcnt(*wazeroir.OperationV128Popcnt) error { // Read the popcntTable into tmp1, and we have // tmp1 = [0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04] - if err := c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, popcntTable[:], tmp1); err != nil { + if err := c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, popcntTable[:], tmp1); err != nil { return err } @@ -2261,7 +2261,7 @@ func (c *amd64Compiler) compileV128Q15mulrSatS(*wazeroir.OperationV128Q15mulrSat x1r, x2r := x1.register, x2.register // See https://github.com/WebAssembly/simd/pull/365 for the following logic. - if err := c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, q15mulrSatSMask[:], tmp); err != nil { + if err := c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, q15mulrSatSMask[:], tmp); err != nil { return err } @@ -2299,7 +2299,7 @@ func (c *amd64Compiler) compileV128ExtAddPairwise(o *wazeroir.OperationV128ExtAd return err } - if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, + if err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, allOnesI8x16[:], allOnesReg); err != nil { return err } @@ -2329,7 +2329,7 @@ func (c *amd64Compiler) compileV128ExtAddPairwise(o *wazeroir.OperationV128ExtAd if o.Signed { // See https://www.felixcloutier.com/x86/pmaddwd - if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, allOnesI16x8[:], tmp); err != nil { + if err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, allOnesI16x8[:], tmp); err != nil { return err } @@ -2337,7 +2337,7 @@ func (c *amd64Compiler) compileV128ExtAddPairwise(o *wazeroir.OperationV128ExtAd c.pushVectorRuntimeValueLocationOnRegister(vr) } else { - if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, extAddPairwiseI16x8uMask[:16], tmp); err != nil { + if err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, extAddPairwiseI16x8uMask[:16], tmp); err != nil { return err } @@ -2347,7 +2347,7 @@ func (c *amd64Compiler) compileV128ExtAddPairwise(o *wazeroir.OperationV128ExtAd // vr[i] = int8(-w1) for i = 0...8 c.assembler.CompileRegisterToRegister(amd64.PXOR, tmp, vr) - if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, allOnesI16x8[:], tmp); err != nil { + if err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, allOnesI16x8[:], tmp); err != nil { return err } @@ -2356,7 +2356,7 @@ func (c *amd64Compiler) compileV128ExtAddPairwise(o *wazeroir.OperationV128ExtAd c.assembler.CompileRegisterToRegister(amd64.PMADDWD, tmp, vr) // tmp[i] = [0, 0, 1, 0] = int32(math.MaxInt16+1) - if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, extAddPairwiseI16x8uMask[16:], tmp); err != nil { + if err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, extAddPairwiseI16x8uMask[16:], tmp); err != nil { return err } @@ -2468,7 +2468,7 @@ func (c *amd64Compiler) compileV128FConvertFromI(o *wazeroir.OperationV128FConve } // tmp = [0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00] - if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, fConvertFromIMask[:16], tmp); err != nil { + if err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, fConvertFromIMask[:16], tmp); err != nil { return err } @@ -2479,7 +2479,7 @@ func (c *amd64Compiler) compileV128FConvertFromI(o *wazeroir.OperationV128FConve c.assembler.CompileRegisterToRegister(amd64.UNPCKLPS, tmp, vr) // tmp = [float64(0x1.0p52), float64(0x1.0p52)] - if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVDQU, twop52[:], tmp); err != nil { + if err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, twop52[:], tmp); err != nil { return err } @@ -2649,7 +2649,7 @@ func (c *amd64Compiler) compileV128ITruncSatFromF(o *wazeroir.OperationV128ITrun c.assembler.CompileRegisterToRegister(amd64.CMPEQPD, tmp, tmp) // Load the 2147483647 into tmp2's each lane. - if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVUPD, i32sMaxOnF64x2[:], tmp2); err != nil { + if err = c.assembler.CompileStaticConstToRegister(amd64.MOVUPD, i32sMaxOnF64x2[:], tmp2); err != nil { return err } @@ -2671,7 +2671,7 @@ func (c *amd64Compiler) compileV128ITruncSatFromF(o *wazeroir.OperationV128ITrun c.assembler.CompileRegisterToRegister(amd64.MAXPD, tmp, vr) // tmp2[i] = float64(math.MaxUint32) = math.MaxUint32 - if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVUPD, i32uMaxOnF64x2[:], tmp2); err != nil { + if err = c.assembler.CompileStaticConstToRegister(amd64.MOVUPD, i32uMaxOnF64x2[:], tmp2); err != nil { return err } @@ -2683,7 +2683,7 @@ func (c *amd64Compiler) compileV128ITruncSatFromF(o *wazeroir.OperationV128ITrun c.assembler.CompileRegisterToRegisterWithArg(amd64.ROUNDPD, vr, vr, 0x3) // tmp2[i] = float64(0x1.0p52) - if err = c.assembler.CompileLoadStaticConstToRegister(amd64.MOVUPD, twop52[:], tmp2); err != nil { + if err = c.assembler.CompileStaticConstToRegister(amd64.MOVUPD, twop52[:], tmp2); err != nil { return err } diff --git a/internal/integration_test/asm/amd64_debug/debug_assembler.go b/internal/integration_test/asm/amd64_debug/debug_assembler.go index 6b5c615083..037e809286 100644 --- a/internal/integration_test/asm/amd64_debug/debug_assembler.go +++ b/internal/integration_test/asm/amd64_debug/debug_assembler.go @@ -301,7 +301,12 @@ func (ta *testAssembler) CompileMemoryToConst( return &testNode{goasm: ret.(*golang_asm.GolangAsmNode), n: ret2.(*asm_amd64.NodeImpl)} } -// CompileLoadStaticConstToRegister implements Assembler.CompileLoadStaticConstToRegister. -func (ta *testAssembler) CompileLoadStaticConstToRegister(asm.Instruction, []byte, asm.Register) (err error) { - panic("CompileLoadStaticConstToRegister cannot be supported by golang-asm") +// CompileStaticConstToRegister implements Assembler.CompileStaticConstToRegister. +func (ta *testAssembler) CompileStaticConstToRegister(asm.Instruction, []byte, asm.Register) (err error) { + panic("CompileStaticConstToRegister cannot be supported by golang-asm") +} + +// CompileRegisterToStaticConst implements Assembler.CompileRegisterToStaticConst. +func (ta *testAssembler) CompileRegisterToStaticConst(asm.Instruction, asm.Register, []byte) (err error) { + panic("CompileRegisterToStaticConst cannot be supported by golang-asm") } diff --git a/internal/integration_test/asm/amd64_debug/golang_asm.go b/internal/integration_test/asm/amd64_debug/golang_asm.go index cfbbe0a8fa..8f57922cc1 100644 --- a/internal/integration_test/asm/amd64_debug/golang_asm.go +++ b/internal/integration_test/asm/amd64_debug/golang_asm.go @@ -380,9 +380,14 @@ func (a *assemblerGoAsmImpl) CompileReadInstructionAddress( }) } -// CompileLoadStaticConstToRegister implements Assembler.CompileLoadStaticConstToRegister. -func (a *assemblerGoAsmImpl) CompileLoadStaticConstToRegister(instruction asm.Instruction, c []byte, dstReg asm.Register) (err error) { - panic("CompileLoadStaticConstToRegister cannot be supported by golangasm") +// CompileStaticConstToRegister implements Assembler.CompileStaticConstToRegister. +func (a *assemblerGoAsmImpl) CompileStaticConstToRegister(instruction asm.Instruction, c []byte, dstReg asm.Register) (err error) { + panic("CompileStaticConstToRegister cannot be supported by golangasm") +} + +// CompileRegisterToStaticConst implements Assembler.CompileRegisterToStaticConst. +func (ta *assemblerGoAsmImpl) CompileRegisterToStaticConst(asm.Instruction, asm.Register, []byte) (err error) { + panic("CompileRegisterToStaticConst cannot be supported by golang-asm") } // castAsGolangAsmRegister maps the registers to golang-asm specific register values. diff --git a/internal/u32/u32.go b/internal/u32/u32.go new file mode 100644 index 0000000000..5960a6f0cc --- /dev/null +++ b/internal/u32/u32.go @@ -0,0 +1,11 @@ +package u32 + +// LeBytes returns a byte slice corresponding to the 4 bytes in the uint32 in little-endian byte order. +func LeBytes(v uint32) []byte { + return []byte{ + byte(v), + byte(v >> 8), + byte(v >> 16), + byte(v >> 24), + } +} diff --git a/internal/u32/u32_test.go b/internal/u32/u32_test.go new file mode 100644 index 0000000000..7d0e2dc104 --- /dev/null +++ b/internal/u32/u32_test.go @@ -0,0 +1,39 @@ +package u32 + +import ( + "encoding/binary" + "math" + "testing" + + "github.com/tetratelabs/wazero/internal/testing/require" +) + +func TestBytes(t *testing.T) { + tests := []struct { + name string + input uint32 + }{ + { + name: "zero", + input: 0, + }, + { + name: "half", + input: math.MaxInt32, + }, + { + name: "max", + input: math.MaxUint32, + }, + } + + for _, tt := range tests { + tc := tt + + t.Run(tc.name, func(t *testing.T) { + expected := make([]byte, 4) + binary.LittleEndian.PutUint32(expected, tc.input) + require.Equal(t, expected, LeBytes(tc.input)) + }) + } +}