Skip to content

Commit

Permalink
RISC-V: add zbb support to string functions
Browse files Browse the repository at this point in the history
Add handling for ZBB extension and add support for using it as a
variant for optimized string functions.

Support for the Zbb-str-variants is limited to the GNU-assembler
for now, as LLVM has not yet acquired the functionality to
selectively change the arch option in assembler code.
This is still under review at
    https://reviews.llvm.org/D123515

Co-developed-by: Christoph Muellner <christoph.muellner@vrull.eu>
Signed-off-by: Christoph Muellner <christoph.muellner@vrull.eu>
Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20230113212301.3534711-3-heiko@sntech.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
  • Loading branch information
Heiko Stuebner authored and palmer-dabbelt committed Jan 31, 2023
1 parent 56e0790 commit b6fcdb1
Show file tree
Hide file tree
Showing 8 changed files with 334 additions and 1 deletion.
24 changes: 24 additions & 0 deletions arch/riscv/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,30 @@ config RISCV_ISA_SVPBMT

If you don't know what to do here, say Y.

config TOOLCHAIN_HAS_ZBB
bool
default y
depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zbb)
depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zbb)
depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
depends on AS_IS_GNU

config RISCV_ISA_ZBB
bool "Zbb extension support for bit manipulation instructions"
depends on TOOLCHAIN_HAS_ZBB
depends on !XIP_KERNEL && MMU
select RISCV_ALTERNATIVE
default y
help
Adds support to dynamically detect the presence of the ZBB
extension (basic bit manipulation) and enable its usage.

The Zbb extension provides instructions to accelerate a number
of bit-specific operations (count bit population, sign extending,
bitrotation, etc).

If you don't know what to do here, say Y.

config TOOLCHAIN_HAS_ZICBOM
bool
default y
Expand Down
3 changes: 2 additions & 1 deletion arch/riscv/include/asm/errata_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@

#define CPUFEATURE_SVPBMT 0
#define CPUFEATURE_ZICBOM 1
#define CPUFEATURE_NUMBER 2
#define CPUFEATURE_ZBB 2
#define CPUFEATURE_NUMBER 3

#ifdef __ASSEMBLY__

Expand Down
1 change: 1 addition & 0 deletions arch/riscv/include/asm/hwcap.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ enum riscv_isa_ext_id {
RISCV_ISA_EXT_SSTC,
RISCV_ISA_EXT_SVINVAL,
RISCV_ISA_EXT_SVPBMT,
RISCV_ISA_EXT_ZBB,
RISCV_ISA_EXT_ZICBOM,
RISCV_ISA_EXT_ZIHINTPAUSE,
RISCV_ISA_EXT_ID_MAX
Expand Down
1 change: 1 addition & 0 deletions arch/riscv/kernel/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ arch_initcall(riscv_cpuinfo_init);
* New entries to this struct should follow the ordering rules described above.
*/
static struct riscv_isa_ext_data isa_ext_arr[] = {
__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
Expand Down
18 changes: 18 additions & 0 deletions arch/riscv/kernel/cpufeature.c
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ void __init riscv_fill_hwcap(void)
SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL);
SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB);
SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
}
Expand Down Expand Up @@ -302,6 +303,20 @@ static bool __init_or_module cpufeature_probe_zicbom(unsigned int stage)
return true;
}

static bool __init_or_module cpufeature_probe_zbb(unsigned int stage)
{
if (!IS_ENABLED(CONFIG_RISCV_ISA_ZBB))
return false;

if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
return false;

if (!riscv_isa_extension_available(NULL, ZBB))
return false;

return true;
}

/*
* Probe presence of individual extensions.
*
Expand All @@ -320,6 +335,9 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage)
if (cpufeature_probe_zicbom(stage))
cpu_req_feature |= BIT(CPUFEATURE_ZICBOM);

if (cpufeature_probe_zbb(stage))
cpu_req_feature |= BIT(CPUFEATURE_ZBB);

return cpu_req_feature;
}

Expand Down
85 changes: 85 additions & 0 deletions arch/riscv/lib/strcmp.S
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm-generic/export.h>
#include <asm/alternative-macros.h>
#include <asm/errata_list.h>

/* int strcmp(const char *cs, const char *ct) */
SYM_FUNC_START(strcmp)

ALTERNATIVE("nop", "j strcmp_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)

/*
* Returns
* a0 - comparison result, value like strcmp
Expand Down Expand Up @@ -33,4 +38,84 @@ SYM_FUNC_START(strcmp)
*/
sub a0, t0, t1
ret

/*
* Variant of strcmp using the ZBB extension if available
*/
#ifdef CONFIG_RISCV_ISA_ZBB
strcmp_zbb:

.option push
.option arch,+zbb

/*
* Returns
* a0 - comparison result, value like strcmp
*
* Parameters
* a0 - string1
* a1 - string2
*
* Clobbers
* t0, t1, t2, t3, t4, t5
*/

or t2, a0, a1
li t4, -1
and t2, t2, SZREG-1
bnez t2, 3f

/* Main loop for aligned string. */
.p2align 3
1:
REG_L t0, 0(a0)
REG_L t1, 0(a1)
orc.b t3, t0
bne t3, t4, 2f
addi a0, a0, SZREG
addi a1, a1, SZREG
beq t0, t1, 1b

/*
* Words don't match, and no null byte in the first
* word. Get bytes in big-endian order and compare.
*/
#ifndef CONFIG_CPU_BIG_ENDIAN
rev8 t0, t0
rev8 t1, t1
#endif

/* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */
sltu a0, t0, t1
neg a0, a0
ori a0, a0, 1
ret

2:
/*
* Found a null byte.
* If words don't match, fall back to simple loop.
*/
bne t0, t1, 3f

/* Otherwise, strings are equal. */
li a0, 0
ret

/* Simple loop for misaligned strings. */
.p2align 3
3:
lbu t0, 0(a0)
lbu t1, 0(a1)
addi a0, a0, 1
addi a1, a1, 1
bne t0, t1, 4f
bnez t0, 3b

4:
sub a0, t0, t1
ret

.option pop
#endif
SYM_FUNC_END(strcmp)
105 changes: 105 additions & 0 deletions arch/riscv/lib/strlen.S
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm-generic/export.h>
#include <asm/alternative-macros.h>
#include <asm/errata_list.h>

/* int strlen(const char *s) */
SYM_FUNC_START(strlen)

ALTERNATIVE("nop", "j strlen_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)

/*
* Returns
* a0 - string length
Expand All @@ -25,4 +30,104 @@ SYM_FUNC_START(strlen)
2:
sub a0, t1, a0
ret

/*
* Variant of strlen using the ZBB extension if available
*/
#ifdef CONFIG_RISCV_ISA_ZBB
strlen_zbb:

#ifdef CONFIG_CPU_BIG_ENDIAN
# define CZ clz
# define SHIFT sll
#else
# define CZ ctz
# define SHIFT srl
#endif

.option push
.option arch,+zbb

/*
* Returns
* a0 - string length
*
* Parameters
* a0 - String to measure
*
* Clobbers
* t0, t1, t2, t3
*/

/* Number of irrelevant bytes in the first word. */
andi t2, a0, SZREG-1

/* Align pointer. */
andi t0, a0, -SZREG

li t3, SZREG
sub t3, t3, t2
slli t2, t2, 3

/* Get the first word. */
REG_L t1, 0(t0)

/*
* Shift away the partial data we loaded to remove the irrelevant bytes
* preceding the string with the effect of adding NUL bytes at the
* end of the string's first word.
*/
SHIFT t1, t1, t2

/* Convert non-NUL into 0xff and NUL into 0x00. */
orc.b t1, t1

/* Convert non-NUL into 0x00 and NUL into 0xff. */
not t1, t1

/*
* Search for the first set bit (corresponding to a NUL byte in the
* original chunk).
*/
CZ t1, t1

/*
* The first chunk is special: compare against the number
* of valid bytes in this chunk.
*/
srli a0, t1, 3
bgtu t3, a0, 3f

/* Prepare for the word comparison loop. */
addi t2, t0, SZREG
li t3, -1

/*
* Our critical loop is 4 instructions and processes data in
* 4 byte or 8 byte chunks.
*/
.p2align 3
1:
REG_L t1, SZREG(t0)
addi t0, t0, SZREG
orc.b t1, t1
beq t1, t3, 1b
2:
not t1, t1
CZ t1, t1

/* Get number of processed words. */
sub t2, t0, t2

/* Add number of characters in the first word. */
add a0, a0, t2
srli t1, t1, 3

/* Add number of characters in the last word. */
add a0, a0, t1
3:
ret

.option pop
#endif
SYM_FUNC_END(strlen)

0 comments on commit b6fcdb1

Please sign in to comment.