@@ -3300,10 +3300,140 @@ class MacroAssemblerRISCV64 : public AbstractMacroAssembler<Assembler> {
33003300 m_assembler.fenceInsn ({ RISCV64Assembler::MemoryOperation::R }, { RISCV64Assembler::MemoryOperation::RW });
33013301 }
33023302
3303- MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD_WITH_RETURN (branchAtomicWeakCAS8, JumpList);
3304- MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD_WITH_RETURN (branchAtomicWeakCAS16, JumpList);
3305- MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD_WITH_RETURN (branchAtomicWeakCAS32, JumpList);
3306- MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD_WITH_RETURN (branchAtomicWeakCAS64, JumpList);
3303+ template <unsigned bitSize>
3304+ JumpList branchAtomicWeakCASImpl (StatusCondition cond, RegisterID expectedAndClobbered, RegisterID newValue, BaseIndex address)
3305+ {
3306+ static_assert (bitSize == 8 || bitSize == 16 );
3307+ // There's no 8-bit or 16-bit load-reserved and store-conditional instructions in RISC-V,
3308+ // so we have to implement the operations through the 32-bit versions, with a limited amount
3309+ // of usable registers.
3310+
3311+ auto temp = temps<Data, Memory>();
3312+ JumpList failure;
3313+
3314+ // We clobber the expected-value register with the XOR difference between the expected
3315+ // and the new value, also clipping the result to the desired number of bits.
3316+ m_assembler.xorInsn (expectedAndClobbered, expectedAndClobbered, newValue);
3317+ m_assembler.zeroExtend <bitSize>(expectedAndClobbered);
3318+
3319+ // The BaseIndex address is resolved into the memory temp. The address is aligned to the 4-byte
3320+ // boundary, and the remainder is used to calculate the shift amount for the exact position
3321+ // in the 32-bit word where the target bit pattern is located.
3322+ auto resolution = resolveAddress (address, temp.memory ());
3323+ m_assembler.addiInsn (temp.memory (), resolution.base , Imm::I (resolution.offset ));
3324+ m_assembler.andiInsn (temp.data (), temp.memory (), Imm::I<0b11 >());
3325+ m_assembler.andiInsn (temp.memory (), temp.memory (), Imm::I<~0b11 >());
3326+ m_assembler.slliInsn <3 >(temp.data (), temp.data ());
3327+ m_assembler.addiInsn (temp.data (), temp.data (), Imm::I<32 >());
3328+
3329+ // The XOR value in the expected-value register is shifted into the appropriate position in
3330+ // the upper half of the register. The shift value is OR-ed into the lower half.
3331+ m_assembler.sllInsn (expectedAndClobbered, expectedAndClobbered, temp.data ());
3332+ m_assembler.orInsn (expectedAndClobbered, expectedAndClobbered, temp.data ());
3333+
3334+ // The 32-bit value is loaded through the load-reserve instruction, and then shifted into the
3335+ // upper 32 bits of the register. XOR against the expected-value register will, in the upper
3336+ // 32 bits of the register, produce the 32-bit word with the expected value replaced by the new one.
3337+ m_assembler.lrwInsn (temp.data (), temp.memory (), { RISCV64Assembler::MemoryAccess::Acquire });
3338+ m_assembler.slliInsn <32 >(temp.data (), temp.data ());
3339+ m_assembler.xorInsn (expectedAndClobbered, temp.data (), expectedAndClobbered);
3340+
3341+ // We still have to validate that the expected value, after XOR, matches the new one. The upper
3342+ // 32 bits of the expected-value register are shifted by the pre-prepared shift amount stored
3343+ // in the lower half of that same register. This works becasue the shift amount is read only from
3344+ // the bottom 6 bits of the shift-amount register. XOR-ing against the new-value register and shifting
3345+ // back left should leave is with a zero value, in which case the expected-value bit pattern matched
3346+ // the one that was loaded from memory. If non-zero, the failure branch is taken.
3347+ m_assembler.srlInsn (temp.data (), expectedAndClobbered, expectedAndClobbered);
3348+ m_assembler.xorInsn (temp.data (), temp.data (), newValue);
3349+ m_assembler.slliInsn <64 - bitSize>(temp.data (), temp.data ());
3350+ failure.append (makeBranch (NotEqual, temp.data (), RISCV64Registers::zero));
3351+
3352+ // The corresponding store-conditional remains. The 32-bit word, containing the new value after
3353+ // the XOR, is located in the upper 32 bits of the expected-value register. That can be shifted
3354+ // down and then used in the store-conditional instruction.
3355+ m_assembler.srliInsn <32 >(expectedAndClobbered, expectedAndClobbered);
3356+ m_assembler.scwInsn (temp.data (), temp.memory (), expectedAndClobbered, { RISCV64Assembler::MemoryAccess::AcquireRelease });
3357+
3358+ // On successful store, the temp register will have a zero value, and a non-zero value otherwise.
3359+ // Branches are produced accordingly.
3360+ switch (cond) {
3361+ case Success: {
3362+ Jump success = makeBranch (Equal, temp.data (), RISCV64Registers::zero);
3363+ failure.link (this );
3364+ return JumpList (success);
3365+ }
3366+ case Failure:
3367+ failure.append (makeBranch (NotEqual, temp.data (), RISCV64Registers::zero));
3368+ break ;
3369+ }
3370+
3371+ return failure;
3372+ }
3373+
3374+ JumpList branchAtomicWeakCAS8 (StatusCondition cond, RegisterID expectedAndClobbered, RegisterID newValue, BaseIndex address)
3375+ {
3376+ return branchAtomicWeakCASImpl<8 >(cond, expectedAndClobbered, newValue, address);
3377+ }
3378+
3379+ JumpList branchAtomicWeakCAS16 (StatusCondition cond, RegisterID expectedAndClobbered, RegisterID newValue, BaseIndex address)
3380+ {
3381+ return branchAtomicWeakCASImpl<16 >(cond, expectedAndClobbered, newValue, address);
3382+ }
3383+
3384+ JumpList branchAtomicWeakCAS32 (StatusCondition cond, RegisterID expectedAndClobbered, RegisterID newValue, BaseIndex address)
3385+ {
3386+ auto temp = temps<Data, Memory>();
3387+ JumpList failure;
3388+
3389+ auto resolution = resolveAddress (address, temp.memory ());
3390+ m_assembler.addiInsn (temp.memory (), resolution.base , Imm::I (resolution.offset ));
3391+ m_assembler.zeroExtend <32 >(expectedAndClobbered, expectedAndClobbered);
3392+
3393+ m_assembler.lrwInsn (temp.data (), temp.memory (), { RISCV64Assembler::MemoryAccess::Acquire });
3394+ m_assembler.xorInsn (temp.data (), temp.data (), expectedAndClobbered);
3395+ failure.append (makeBranch (NotEqual, temp.data (), RISCV64Registers::zero));
3396+ m_assembler.scwInsn (temp.data (), temp.memory (), newValue, { RISCV64Assembler::MemoryAccess::AcquireRelease });
3397+
3398+ switch (cond) {
3399+ case Success: {
3400+ Jump success = makeBranch (Equal, temp.data (), RISCV64Registers::zero);
3401+ failure.link (this );
3402+ return JumpList (success);
3403+ }
3404+ case Failure:
3405+ failure.append (makeBranch (NotEqual, temp.data (), RISCV64Registers::zero));
3406+ break ;
3407+ }
3408+
3409+ return failure;
3410+ }
3411+
3412+ JumpList branchAtomicWeakCAS64 (StatusCondition cond, RegisterID expectedAndClobbered, RegisterID newValue, BaseIndex address)
3413+ {
3414+ auto temp = temps<Data, Memory>();
3415+ JumpList failure;
3416+
3417+ auto resolution = resolveAddress (address, temp.memory ());
3418+ m_assembler.addiInsn (temp.memory (), resolution.base , Imm::I (resolution.offset ));
3419+
3420+ m_assembler.lrdInsn (temp.data (), temp.memory (), { RISCV64Assembler::MemoryAccess::Acquire });
3421+ failure.append (makeBranch (NotEqual, temp.data (), expectedAndClobbered));
3422+ m_assembler.scdInsn (temp.data (), temp.memory (), newValue, { RISCV64Assembler::MemoryAccess::AcquireRelease });
3423+
3424+ switch (cond) {
3425+ case Success: {
3426+ Jump success = makeBranch (Equal, temp.data (), RISCV64Registers::zero);
3427+ failure.link (this );
3428+ return JumpList (success);
3429+ }
3430+ case Failure:
3431+ failure.append (makeBranch (NotEqual, temp.data (), RISCV64Registers::zero));
3432+ break ;
3433+ }
3434+
3435+ return failure;
3436+ }
33073437
33083438 MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD (moveConditionally32);
33093439 MACRO_ASSEMBLER_RISCV64_TEMPLATED_NOOP_METHOD (moveConditionally64);
0 commit comments