Skip to content

Commit

Permalink
ARM: p2v: switch to MOVW for Thumb2 and ARM/LPAE
Browse files Browse the repository at this point in the history
In preparation for reducing the phys-to-virt minimum relative alignment
from 16 MiB to 2 MiB, switch to patchable sequences involving MOVW
instructions that can more easily be manipulated to carry a 12-bit
immediate. Note that the non-LPAE ARM sequence is not updated: MOVW
may not be supported on non-LPAE platforms, and the sequence itself
can be updated more easily to apply the 12 bits of displacement.

For Thumb2, which has many more versions of opcodes, switch to a sequence
that can be patched by the same patching code for both versions. Note
that the Thumb2 opcodes for MOVW and MVN are unambiguous, and have no
rotation bits in their immediate fields, so there is no need to use
placeholder constants in the asm blocks.

While at it, drop the 'volatile' qualifiers from the asm blocks: the
code does not have any side effects that are invisible to the compiler,
so it is free to omit these sequences if the outputs are not used.

Suggested-by: Russell King <linux@armlinux.org.uk>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
  • Loading branch information
ardbiesheuvel committed Oct 28, 2020
1 parent 0e3db6c commit e8e00f5
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 43 deletions.
44 changes: 32 additions & 12 deletions arch/arm/include/asm/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
#define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
#define PHYS_PFN_OFFSET (__pv_phys_pfn_offset)

#ifndef CONFIG_THUMB2_KERNEL
#define __pv_stub(from,to,instr) \
__asm__("@ __pv_stub\n" \
"1: " instr " %0, %1, %2\n" \
Expand All @@ -192,25 +193,45 @@ extern const void *__pv_table_begin, *__pv_table_end;
: "=r" (to) \
: "r" (from), "I" (__PV_BITS_31_24))

#define __pv_stub_mov_hi(t) \
__asm__ volatile("@ __pv_stub_mov\n" \
"1: mov %R0, %1\n" \
#define __pv_add_carry_stub(x, y) \
__asm__("@ __pv_add_carry_stub\n" \
"0: movw %R0, #0\n" \
" adds %Q0, %1, %R0, lsl #24\n" \
"1: mov %R0, %2\n" \
" adc %R0, %R0, #0\n" \
" .pushsection .pv_table,\"a\"\n" \
" .long 1b - .\n" \
" .long 0b - ., 1b - .\n" \
" .popsection\n" \
: "=r" (t) \
: "I" (__PV_BITS_7_0))
: "=&r" (y) \
: "r" (x), "I" (__PV_BITS_7_0) \
: "cc")

#else
#define __pv_stub(from,to,instr) \
__asm__("@ __pv_stub\n" \
"0: movw %0, #0\n" \
" lsl %0, #24\n" \
" " instr " %0, %1, %0\n" \
" .pushsection .pv_table,\"a\"\n" \
" .long 0b - .\n" \
" .popsection\n" \
: "=&r" (to) \
: "r" (from))

#define __pv_add_carry_stub(x, y) \
__asm__ volatile("@ __pv_add_carry_stub\n" \
"1: adds %Q0, %1, %2\n" \
__asm__("@ __pv_add_carry_stub\n" \
"0: movw %R0, #0\n" \
" lsls %R0, #24\n" \
" adds %Q0, %1, %R0\n" \
"1: mvn %R0, #0\n" \
" adc %R0, %R0, #0\n" \
" .pushsection .pv_table,\"a\"\n" \
" .long 1b - .\n" \
" .long 0b - ., 1b - .\n" \
" .popsection\n" \
: "+r" (y) \
: "r" (x), "I" (__PV_BITS_31_24) \
: "=&r" (y) \
: "r" (x) \
: "cc")
#endif

static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
{
Expand All @@ -219,7 +240,6 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
if (sizeof(phys_addr_t) == 4) {
__pv_stub(x, t, "add");
} else {
__pv_stub_mov_hi(t);
__pv_add_carry_stub(x, t);
}
return t;
Expand Down
147 changes: 116 additions & 31 deletions arch/arm/kernel/phys2virt.S
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 1994-2002 Russell King
* Copyright (c) 2003 ARM Limited
* Copyright (c) 2003, 2020 ARM Limited
* All Rights Reserved
*/

Expand Down Expand Up @@ -58,55 +58,140 @@ __fixup_a_pv_table:
mov r6, r6, lsr #24
cmn r0, #1
#ifdef CONFIG_THUMB2_KERNEL
@
@ The Thumb-2 versions of the patchable sequences are
@
@ phys-to-virt: movw <reg>, #offset<31:24>
@ lsl <reg>, #24
@ sub <VA>, <PA>, <reg>
@
@ virt-to-phys (non-LPAE): movw <reg>, #offset<31:24>
@ lsl <reg>, #24
@ add <PA>, <VA>, <reg>
@
@ virt-to-phys (LPAE): movw <reg>, #offset<31:24>
@ lsl <reg>, #24
@ adds <PAlo>, <VA>, <reg>
@ mov <PAhi>, #offset<39:32>
@ adc <PAhi>, <PAhi>, #0
@
@ In the non-LPAE case, all patchable instructions are MOVW
@ instructions, where we need to patch in the offset into the
@ second halfword of the opcode (the 16-bit immediate is encoded
@ as imm4:i:imm3:imm8)
@
@ 15 11 10 9 4 3 0 15 14 12 11 8 7 0
@ +-----------+---+-------------+------++---+------+----+------+
@ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 |
@ +-----------+---+-------------+------++---+------+----+------+
@
@ In the LPAE case, we also need to patch in the high word of the
@ offset into the immediate field of the MOV instruction, or patch it
@ to a MVN instruction if the offset is negative. In this case, we
@ need to inspect the first halfword of the opcode, to check whether
@ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if
@ needed. The encoding of the immediate is rather complex for values
@ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower
@ order bits, which can be patched into imm8 directly (and i:imm3
@ cleared)
@
@ 15 11 10 9 5 0 15 14 12 11 8 7 0
@ +-----------+---+---------------------++---+------+----+------+
@ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
@ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
@ +-----------+---+---------------------++---+------+----+------+
@
moveq r0, #0x200000 @ set bit 21, mov to mvn instruction
lsls r6, #24
beq .Lnext
clz r7, r6
lsr r6, #24
lsl r6, r7
bic r6, #0x0080
lsrs r7, #1
orrcs r6, #0x0080
orr r6, r6, r7, lsl #12
orr r6, #0x4000
b .Lnext
.Lloop: add r7, r4
adds r4, #4
ldrh ip, [r7, #2]
ARM_BE8(rev16 ip, ip)
tst ip, #0x4000
and ip, #0x8f00
orrne ip, r6 @ mask in offset bits 31-24
orreq ip, r0 @ mask in offset bits 7-0
ARM_BE8(rev16 ip, ip)
strh ip, [r7, #2]
bne .Lnext
adds r4, #4 @ clears Z flag
#ifdef CONFIG_ARM_LPAE
ldrh ip, [r7]
ARM_BE8(rev16 ip, ip)
bic ip, #0x20
orr ip, ip, r0, lsr #16
tst ip, #0x200 @ MOVW has bit 9 set, MVN has it clear
bne 0f @ skip to MOVW handling (Z flag is clear)
bic ip, #0x20 @ clear bit 5 (MVN -> MOV)
orr ip, ip, r0, lsr #16 @ MOV -> MVN if offset < 0
ARM_BE8(rev16 ip, ip)
strh ip, [r7]
@ Z flag is set
0:
#endif
ldrh ip, [r7, #2]
ARM_BE8(rev16 ip, ip)
and ip, #0xf00 @ clear everything except Rd field
orreq ip, r0 @ Z flag set -> MOV/MVN -> patch in high bits
orrne ip, r6 @ Z flag clear -> MOVW -> patch in low bits
ARM_BE8(rev16 ip, ip)
strh ip, [r7, #2]
#else
#ifdef CONFIG_CPU_ENDIAN_BE8
@ in BE8, we load data in BE, but instructions still in LE
#define PV_BIT22 0x00004000
#define PV_BIT24 0x00000001
#define PV_IMM8_MASK 0xff000000
#define PV_ROT_MASK 0x000f0000
#else
#define PV_BIT22 0x00400000
#define PV_BIT24 0x01000000
#define PV_IMM8_MASK 0x000000ff
#define PV_ROT_MASK 0xf00
#endif

@
@ The ARM versions of the patchable sequences are
@
@ phys-to-virt: sub <VA>, <PA>, #offset<31:24>, lsl #24
@
@ virt-to-phys (non-LPAE): add <PA>, <VA>, #offset<31:24>, lsl #24
@
@ virt-to-phys (LPAE): movw <reg>, #offset<31:24>
@ adds <PAlo>, <VA>, <reg>, lsl #24
@ mov <PAhi>, #offset<39:32>
@ adc <PAhi>, <PAhi>, #0
@
@ In the non-LPAE case, all patchable instructions are ADD or SUB
@ instructions, where we need to patch in the offset into the
@ immediate field of the opcode, which is emitted with the correct
@ rotation value. (The effective value of the immediate is imm12<7:0>
@ rotated right by [2 * imm12<11:8>] bits)
@
@ 31 28 27 23 22 20 19 16 15 12 11 0
@ +------+-----------------+------+------+-------+
@ ADD | cond | 0 0 1 0 1 0 0 0 | Rn | Rd | imm12 |
@ SUB | cond | 0 0 1 0 0 1 0 0 | Rn | Rd | imm12 |
@ MOV | cond | 0 0 1 1 1 0 1 0 | Rn | Rd | imm12 |
@ MVN | cond | 0 0 1 1 1 1 1 0 | Rn | Rd | imm12 |
@ +------+-----------------+------+------+-------+
@
@ In the LPAE case, we use a MOVW instruction to carry the low offset
@ word, and patch in the high word of the offset into the immediate
@ field of the subsequent MOV instruction, or patch it to a MVN
@ instruction if the offset is negative. We can distinguish MOVW
@ instructions based on bits 23:22 of the opcode, and ADD/SUB can be
@ distinguished from MOV/MVN (all using the encodings above) using
@ bit 24.
@
@ 31 28 27 23 22 20 19 16 15 12 11 0
@ +------+-----------------+------+------+-------+
@ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 | Rd | imm12 |
@ +------+-----------------+------+------+-------+
@
moveq r0, #0x400000 @ set bit 22, mov to mvn instruction
b .Lnext
.Lloop: ldr ip, [r7, r4]
#ifdef CONFIG_ARM_LPAE
tst ip, #PV_BIT24 @ ADD/SUB have bit 24 clear
beq 1f
ARM_BE8(rev ip, ip)
tst ip, #0xc00000 @ MOVW has bits 23:22 clear
bic ip, ip, #0x400000 @ clear bit 22
bfc ip, #0, #12 @ clear imm12 field of MOV[W] instruction
orreq ip, ip, r6 @ MOVW -> mask in offset bits 31-24
orrne ip, ip, r0 @ MOV -> mask in offset bits 7-0 (or bit 22)
ARM_BE8(rev ip, ip)
b 2f
1:
#endif
bic ip, ip, #PV_IMM8_MASK
tst ip, #PV_ROT_MASK @ check the rotation field
orrne ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24
biceq ip, ip, #PV_BIT22 @ clear bit 22
orreq ip, ip, r0 ARM_BE8(, ror #8) @ mask in offset bits 7-0 (or bit 22)
orr ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24
2:
str ip, [r7, r4]
add r4, r4, #4
#endif
Expand Down

0 comments on commit e8e00f5

Please sign in to comment.