Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LoongArch] CSR/IOCSR reads are mis-optimized #63549

Closed
xen0n opened this issue Jun 27, 2023 · 0 comments
Closed

[LoongArch] CSR/IOCSR reads are mis-optimized #63549

xen0n opened this issue Jun 27, 2023 · 0 comments

Comments

@xen0n
Copy link
Contributor

xen0n commented Jun 27, 2023

Found during investigation of ClangBuiltLinux/boot-utils#108 (comment). It is due to the wrong cleanup of commit 2efdacf that removed side-effect markers from CSR/IOCSR read intrinsics.

The involved code:
// arch/loongarch/kernel/traps.c
static void init_restore_fp(void)
{
        if (!used_math()) {
                /* First time FP context user. */
                init_fpu();
        } else {
                /* This task has formerly used the FP context */
                if (!is_fpu_owner())
                        own_fpu_inatomic(1);
        }

        BUG_ON(!is_fp_enabled());
}

Faulty generated code:

9000000000223d40 <init_restore_fp>:
9000000000223d40: 63 c0 ff 02   addi.d  $sp, $sp, -16
9000000000223d44: 61 20 c0 29   st.d    $ra, $sp, 8
9000000000223d48: 76 00 c0 29   st.d    $fp, $sp, 0
9000000000223d4c: 44 00 c0 28   ld.d    $a0, $tp, 0
9000000000223d50: 85 54 00 2a   ld.bu   $a1, $a0, 21
9000000000223d54: a5 80 40 03   andi    $a1, $a1, 32
9000000000223d58: 16 08 00 04   csrrd   $fp, 2
9000000000223d5c: a0 4c 00 44   bnez    $a1, 76 <init_restore_fp+0x68>
9000000000223d60: 84 00 0c 24   ldptr.w $a0, $a0, 3072
9000000000223d64: c5 06 80 03   ori     $a1, $fp, 1
9000000000223d68: 25 08 00 04   csrwr   $a1, 2
9000000000223d6c: 45 20 c0 02   addi.d  $a1, $tp, 8
9000000000223d70: 06 00 84 03   ori     $a2, $zero, 256
9000000000223d74: a0 98 6c 38   amor_db.d       $zero, $a2, $a1
9000000000223d78: 45 00 c0 28   ld.d    $a1, $tp, 0
9000000000223d7c: a5 20 c0 28   ld.d    $a1, $a1, 8
9000000000223d80: a6 e8 3f 26   ldptr.d $a2, $a1, 16360
9000000000223d84: c6 04 80 03   ori     $a2, $a2, 1
9000000000223d88: a6 e8 3f 27   stptr.d $a2, $a1, 16360
9000000000223d8c: 00 14 59 54   bl      22804 <_init_fpu>
9000000000223d90: 44 00 c0 28   ld.d    $a0, $tp, 0
9000000000223d94: 45 00 00 14   lu12i.w $a1, 2
9000000000223d98: 86 50 80 28   ld.w    $a2, $a0, 20
9000000000223d9c: c5 14 15 00   or      $a1, $a2, $a1
9000000000223da0: 85 50 80 29   st.w    $a1, $a0, 20
9000000000223da4: 00 68 00 50   b       104 <init_restore_fp+0xcc>
9000000000223da8: 44 20 c0 28   ld.d    $a0, $tp, 8
9000000000223dac: 84 00 44 03   andi    $a0, $a0, 256
9000000000223db0: 80 5c 00 44   bnez    $a0, 92 <init_restore_fp+0xcc>
9000000000223db4: 44 db 05 1a   pcalau12i       $a0, 11994
9000000000223db8: 84 00 d4 02   addi.d  $a0, $a0, 1280
9000000000223dbc: 84 40 00 2a   ld.bu   $a0, $a0, 16
9000000000223dc0: 84 20 40 03   andi    $a0, $a0, 8
9000000000223dc4: 80 48 00 40   beqz    $a0, 72 <init_restore_fp+0xcc>
9000000000223dc8: 44 20 c0 28   ld.d    $a0, $tp, 8
9000000000223dcc: 84 00 44 03   andi    $a0, $a0, 256
9000000000223dd0: 80 3c 00 44   bnez    $a0, 60 <init_restore_fp+0xcc>
9000000000223dd4: c4 06 80 03   ori     $a0, $fp, 1
9000000000223dd8: 24 08 00 04   csrwr   $a0, 2
9000000000223ddc: 44 20 c0 02   addi.d  $a0, $tp, 8
9000000000223de0: 05 00 84 03   ori     $a1, $zero, 256
9000000000223de4: 80 94 6c 38   amor_db.d       $zero, $a1, $a0
9000000000223de8: 44 00 c0 28   ld.d    $a0, $tp, 0
9000000000223dec: 84 20 c0 28   ld.d    $a0, $a0, 8
9000000000223df0: 85 e8 3f 26   ldptr.d $a1, $a0, 16360
9000000000223df4: a5 04 80 03   ori     $a1, $a1, 1
9000000000223df8: 85 e8 3f 27   stptr.d $a1, $a0, 16360
9000000000223dfc: 44 00 c0 28   ld.d    $a0, $tp, 0
9000000000223e00: 84 fc df 02   addi.d  $a0, $a0, 2047
9000000000223e04: 84 04 d0 02   addi.d  $a0, $a0, 1025
9000000000223e08: 00 c0 57 54   bl      22464 <_restore_fp>
9000000000223e0c: c4 06 40 03   andi    $a0, $fp, 1
9000000000223e10: 80 14 00 40   beqz    $a0, 20 <init_restore_fp+0xe4>
9000000000223e14: 76 00 c0 28   ld.d    $fp, $sp, 0
9000000000223e18: 61 20 c0 28   ld.d    $ra, $sp, 8
9000000000223e1c: 63 40 c0 02   addi.d  $sp, $sp, 16
9000000000223e20: 20 00 00 4c   ret
9000000000223e24: 01 00 2a 00   break   1

Note there's only one csrrd which means the BUG_ON must be checking the previous value of CSR.EUEN.

Adding the context:

// arch/loongarch/include/asm/loongarch.h
#define read_csr_euen() csr_read32(LOONGARCH_CSR_EUEN)
#define write_csr_euen(val) csr_write32(val, LOONGARCH_CSR_EUEN)

// expanded from __BUILD_CSR_OP(euen)
static inline unsigned long
set_csr_euen(unsigned long set)
{
        unsigned long res, new;
        res = read_csr_euen();
        new = res | set;
        write_csr_euen(new);
        return res;
}

// arch/loongarch/include/asm/fpu.h
#define enable_fpu()            set_csr_euen(CSR_EUEN_FPEN)

static inline int is_fp_enabled(void)
{
        return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_FPEN) ?
                1 : 0;
}

static inline void __own_fpu(void)
{
        enable_fpu();
        set_thread_flag(TIF_USEDFPU);
        KSTK_EUEN(current) |= CSR_EUEN_FPEN;
}

static inline void init_fpu(void)
{
        unsigned int fcsr = current->thread.fpu.fcsr;

        __own_fpu();
        _init_fpu(fcsr);
        set_used_math();
}

We can then arrive at the minimal reproducer below.

Minimal reproducer:

void bug(void);

void foo(int flag)
{
        if (flag)
                (void) __builtin_loongarch_csrwr_w(__builtin_loongarch_csrrd_w(0x2) | 0x1, 0x2);

        if (!(__builtin_loongarch_csrrd_w(0x2) & 0x1))
                bug();
}

Correct codegen:

foo:                                    # @foo
# %bb.0:                                # %entry
        beqz    $a0, .LBB0_2
# %bb.1:                                # %if.then
        csrrd   $a0, 2
        ori     $a0, $a0, 1
        csrwr   $a0, 2
.LBB0_2:                                # %if.end
        csrrd   $a0, 2
        andi    $a0, $a0, 1
        bnez    $a0, .LBB0_4
# %bb.3:                                # %if.then2
        b       %plt(bug)
.LBB0_4:                                # %if.end3
        ret

Wrong codegen:

foo:                                    # @foo
# %bb.0:
        csrrd   $a1, 2
        beqz    $a0, .LBB0_2
# %bb.1:
        ori     $a0, $a1, 1
        csrwr   $a0, 2
.LBB0_2:
        andi    $a0, $a1, 1
        bnez    $a0, .LBB0_4
# %bb.3:
        b       %plt(bug)
.LBB0_4:
        ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants