| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| // RUN: %clang_dfsan %s -o %t && %run %t | ||
| // XFAIL: * | ||
|
|
||
| #include <assert.h> | ||
| #include <stdio.h> | ||
|
|
||
| int main(int argc, char *argv[]) { | ||
| char buf[256] = "10000000000-100000000000 rw-p 00000000 00:00 0"; | ||
| long rss = 0; | ||
| // This test exposes a bug in DFSan's sscanf, that leads to flakiness | ||
| // in release_shadow_space.c (see | ||
| // https://github.com/llvm/llvm-project/issues/91287) | ||
| if (sscanf(buf, "Garbage text before, %ld, Garbage text after", &rss) == 1) { | ||
| printf("Error: matched %ld\n", rss); | ||
| return 1; | ||
| } | ||
|
|
||
| return 0; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| //==- RegAllocFast.h ----------- fast register allocator ----------*-C++-*-==// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_CODEGEN_REGALLOCFAST_H | ||
| #define LLVM_CODEGEN_REGALLOCFAST_H | ||
|
|
||
| #include "llvm/CodeGen/MachinePassManager.h" | ||
| #include "llvm/CodeGen/RegAllocCommon.h" | ||
|
|
||
| namespace llvm { | ||
|
|
||
| struct RegAllocFastPassOptions { | ||
| RegClassFilterFunc Filter = allocateAllRegClasses; | ||
| StringRef FilterName = "all"; | ||
| bool ClearVRegs = true; | ||
| }; | ||
|
|
||
| class RegAllocFastPass : public PassInfoMixin<RegAllocFastPass> { | ||
| RegAllocFastPassOptions Opts; | ||
|
|
||
| public: | ||
| RegAllocFastPass(RegAllocFastPassOptions Opts = RegAllocFastPassOptions()) | ||
| : Opts(Opts) {} | ||
|
|
||
| MachineFunctionProperties getRequiredProperties() { | ||
| return MachineFunctionProperties().set( | ||
| MachineFunctionProperties::Property::NoPHIs); | ||
| } | ||
|
|
||
| MachineFunctionProperties getSetProperties() { | ||
| if (Opts.ClearVRegs) { | ||
| return MachineFunctionProperties().set( | ||
| MachineFunctionProperties::Property::NoVRegs); | ||
| } | ||
|
|
||
| return MachineFunctionProperties(); | ||
| } | ||
|
|
||
| MachineFunctionProperties getClearedProperties() { | ||
| return MachineFunctionProperties().set( | ||
| MachineFunctionProperties::Property::IsSSA); | ||
| } | ||
|
|
||
| PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &); | ||
|
|
||
| void printPipeline(raw_ostream &OS, | ||
| function_ref<StringRef(StringRef)> MapClassName2PassName); | ||
| }; | ||
|
|
||
| } // namespace llvm | ||
|
|
||
| #endif // LLVM_CODEGEN_REGALLOCFAST_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| //=== LoongArchDeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg ===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===---------------------------------------------------------------------===// | ||
| // | ||
| // This pass rewrites Rd to r0 for instrs whose return values are unused. | ||
| // | ||
| //===---------------------------------------------------------------------===// | ||
|
|
||
| #include "LoongArch.h" | ||
| #include "LoongArchInstrInfo.h" | ||
| #include "LoongArchSubtarget.h" | ||
| #include "llvm/ADT/Statistic.h" | ||
| #include "llvm/CodeGen/LiveDebugVariables.h" | ||
| #include "llvm/CodeGen/LiveIntervals.h" | ||
| #include "llvm/CodeGen/LiveStacks.h" | ||
| #include "llvm/CodeGen/MachineFunctionPass.h" | ||
| #include "llvm/CodeGen/MachineRegisterInfo.h" | ||
|
|
||
| using namespace llvm; | ||
| #define DEBUG_TYPE "loongarch-dead-defs" | ||
| #define LoongArch_DEAD_REG_DEF_NAME "LoongArch Dead register definitions" | ||
|
|
||
| STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced"); | ||
|
|
||
| namespace { | ||
| class LoongArchDeadRegisterDefinitions : public MachineFunctionPass { | ||
| public: | ||
| static char ID; | ||
|
|
||
| LoongArchDeadRegisterDefinitions() : MachineFunctionPass(ID) {} | ||
| bool runOnMachineFunction(MachineFunction &MF) override; | ||
| void getAnalysisUsage(AnalysisUsage &AU) const override { | ||
| AU.setPreservesCFG(); | ||
| AU.addRequired<LiveIntervals>(); | ||
| AU.addPreserved<LiveIntervals>(); | ||
| AU.addRequired<LiveIntervals>(); | ||
| AU.addPreserved<SlotIndexes>(); | ||
| AU.addPreserved<LiveDebugVariables>(); | ||
| AU.addPreserved<LiveStacks>(); | ||
| MachineFunctionPass::getAnalysisUsage(AU); | ||
| } | ||
|
|
||
| StringRef getPassName() const override { return LoongArch_DEAD_REG_DEF_NAME; } | ||
| }; | ||
| } // end anonymous namespace | ||
|
|
||
| char LoongArchDeadRegisterDefinitions::ID = 0; | ||
| INITIALIZE_PASS(LoongArchDeadRegisterDefinitions, DEBUG_TYPE, | ||
| LoongArch_DEAD_REG_DEF_NAME, false, false) | ||
|
|
||
| FunctionPass *llvm::createLoongArchDeadRegisterDefinitionsPass() { | ||
| return new LoongArchDeadRegisterDefinitions(); | ||
| } | ||
|
|
||
| bool LoongArchDeadRegisterDefinitions::runOnMachineFunction( | ||
| MachineFunction &MF) { | ||
| if (skipFunction(MF.getFunction())) | ||
| return false; | ||
|
|
||
| const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); | ||
| const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); | ||
| LiveIntervals &LIS = getAnalysis<LiveIntervals>(); | ||
| LLVM_DEBUG(dbgs() << "***** LoongArchDeadRegisterDefinitions *****\n"); | ||
|
|
||
| bool MadeChange = false; | ||
| for (MachineBasicBlock &MBB : MF) { | ||
| for (MachineInstr &MI : MBB) { | ||
| // We only handle non-computational instructions. | ||
| const MCInstrDesc &Desc = MI.getDesc(); | ||
| if (!Desc.mayLoad() && !Desc.mayStore() && | ||
| !Desc.hasUnmodeledSideEffects()) | ||
| continue; | ||
| for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) { | ||
| MachineOperand &MO = MI.getOperand(I); | ||
| if (!MO.isReg() || !MO.isDef() || MO.isEarlyClobber()) | ||
| continue; | ||
| // Be careful not to change the register if it's a tied operand. | ||
| if (MI.isRegTiedToUseOperand(I)) { | ||
| LLVM_DEBUG(dbgs() << " Ignoring, def is tied operand.\n"); | ||
| continue; | ||
| } | ||
| Register Reg = MO.getReg(); | ||
| if (!Reg.isVirtual() || !MO.isDead()) | ||
| continue; | ||
| LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n "; | ||
| MI.print(dbgs())); | ||
| const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF); | ||
| if (!(RC && RC->contains(LoongArch::R0))) { | ||
| LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n"); | ||
| continue; | ||
| } | ||
| assert(LIS.hasInterval(Reg)); | ||
| LIS.removeInterval(Reg); | ||
| MO.setReg(LoongArch::R0); | ||
| LLVM_DEBUG(dbgs() << " Replacing with zero register. New:\n "; | ||
| MI.print(dbgs())); | ||
| ++NumDeadDefsReplaced; | ||
| MadeChange = true; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return MadeChange; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,107 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc < %s -mtriple=r600-- -mcpu=redwood | FileCheck %s --check-prefixes=R600 | ||
|
|
||
| define amdgpu_kernel void @build_vector2 (ptr addrspace(1) %out) { | ||
| ; R600-LABEL: build_vector2: | ||
| ; R600: ; %bb.0: ; %entry | ||
| ; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: MOV * T0.Y, literal.x, | ||
| ; R600-NEXT: 6(8.407791e-45), 0(0.000000e+00) | ||
| ; R600-NEXT: MOV T0.X, literal.x, | ||
| ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, | ||
| ; R600-NEXT: 5(7.006492e-45), 2(2.802597e-45) | ||
| entry: | ||
| store <2 x i32> <i32 5, i32 6>, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @build_vector4 (ptr addrspace(1) %out) { | ||
| ; R600-LABEL: build_vector4: | ||
| ; R600: ; %bb.0: ; %entry | ||
| ; R600-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: MOV * T0.W, literal.x, | ||
| ; R600-NEXT: 8(1.121039e-44), 0(0.000000e+00) | ||
| ; R600-NEXT: MOV * T0.Z, literal.x, | ||
| ; R600-NEXT: 7(9.809089e-45), 0(0.000000e+00) | ||
| ; R600-NEXT: MOV * T0.Y, literal.x, | ||
| ; R600-NEXT: 6(8.407791e-45), 0(0.000000e+00) | ||
| ; R600-NEXT: MOV T0.X, literal.x, | ||
| ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, | ||
| ; R600-NEXT: 5(7.006492e-45), 2(2.802597e-45) | ||
| entry: | ||
| store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @build_vector_v2i16 (ptr addrspace(1) %out) { | ||
| ; R600-LABEL: build_vector_v2i16: | ||
| ; R600: ; %bb.0: ; %entry | ||
| ; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.X, T5.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: MOV T4.X, literal.x, | ||
| ; R600-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, | ||
| ; R600-NEXT: 393221(5.510200e-40), 2(2.802597e-45) | ||
| entry: | ||
| store <2 x i16> <i16 5, i16 6>, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32 %a) { | ||
| ; R600-LABEL: build_vector_v2i16_trunc: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.X, T5.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: LSHR * T0.W, KC0[2].Z, literal.x, | ||
| ; R600-NEXT: 16(2.242078e-44), 0(0.000000e+00) | ||
| ; R600-NEXT: OR_INT T4.X, PV.W, literal.x, | ||
| ; R600-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, | ||
| ; R600-NEXT: 327680(4.591775e-40), 2(2.802597e-45) | ||
| %srl = lshr i32 %a, 16 | ||
| %trunc = trunc i32 %srl to i16 | ||
| %ins.0 = insertelement <2 x i16> undef, i16 %trunc, i32 0 | ||
| %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1 | ||
| store <2 x i16> %ins.1, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out, <4 x i16> %in) { | ||
| ; R600-LABEL: build_v2i32_from_v4i16_shuffle: | ||
| ; R600: ; %bb.0: ; %entry | ||
| ; R600-NEXT: ALU 0, @10, KC0[], KC1[] | ||
| ; R600-NEXT: TEX 1 @6 | ||
| ; R600-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: Fetch clause starting at 6: | ||
| ; R600-NEXT: VTX_READ_16 T1.X, T0.X, 48, #3 | ||
| ; R600-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3 | ||
| ; R600-NEXT: ALU clause starting at 10: | ||
| ; R600-NEXT: MOV * T0.X, 0.0, | ||
| ; R600-NEXT: ALU clause starting at 11: | ||
| ; R600-NEXT: LSHL * T0.Y, T1.X, literal.x, | ||
| ; R600-NEXT: 16(2.242078e-44), 0(0.000000e+00) | ||
| ; R600-NEXT: LSHL T0.X, T0.X, literal.x, | ||
| ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, | ||
| ; R600-NEXT: 16(2.242078e-44), 2(2.802597e-45) | ||
| entry: | ||
| %shuf = shufflevector <4 x i16> %in, <4 x i16> zeroinitializer, <2 x i32> <i32 0, i32 2> | ||
| %zextended = zext <2 x i16> %shuf to <2 x i32> | ||
| %shifted = shl <2 x i32> %zextended, <i32 16, i32 16> | ||
| store <2 x i32> %shifted, ptr addrspace(1) %out | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,159 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s | ||
|
|
||
|
|
||
| ; DAGCombiner will transform: | ||
| ; (fabsf (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF)) | ||
| ; unless isFabsFree returns true | ||
| define amdgpu_kernel void @s_fabsf_fn_free(ptr addrspace(1) %out, i32 %in) { | ||
| ; R600-LABEL: s_fabsf_fn_free: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: MOV * T0.W, KC0[2].Z, | ||
| ; R600-NEXT: MOV T0.X, |PV.W|, | ||
| ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %bc= bitcast i32 %in to float | ||
| %fabs = call float @fabsf(float %bc) | ||
| store float %fabs, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @s_fabsf_free(ptr addrspace(1) %out, i32 %in) { | ||
| ; R600-LABEL: s_fabsf_free: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: MOV * T0.W, KC0[2].Z, | ||
| ; R600-NEXT: MOV T0.X, |PV.W|, | ||
| ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %bc= bitcast i32 %in to float | ||
| %fabs = call float @llvm.fabs.f32(float %bc) | ||
| store float %fabs, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @s_fabsf_f32(ptr addrspace(1) %out, float %in) { | ||
| ; R600-LABEL: s_fabsf_f32: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: MOV * T0.W, KC0[2].Z, | ||
| ; R600-NEXT: MOV T0.X, |PV.W|, | ||
| ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %fabs = call float @llvm.fabs.f32(float %in) | ||
| store float %fabs, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @fabs_v2f32(ptr addrspace(1) %out, <2 x float> %in) { | ||
| ; R600-LABEL: fabs_v2f32: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: MOV * T0.W, KC0[3].X, | ||
| ; R600-NEXT: MOV T0.Y, |PV.W|, | ||
| ; R600-NEXT: MOV * T0.W, KC0[2].W, | ||
| ; R600-NEXT: MOV T0.X, |PV.W|, | ||
| ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in) | ||
| store <2 x float> %fabs, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @fabsf_v4f32(ptr addrspace(1) %out, <4 x float> %in) { | ||
| ; R600-LABEL: fabsf_v4f32: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: MOV T0.W, KC0[4].X, | ||
| ; R600-NEXT: MOV * T1.W, KC0[3].W, | ||
| ; R600-NEXT: MOV * T0.W, |PV.W|, | ||
| ; R600-NEXT: MOV T0.Z, |T1.W|, | ||
| ; R600-NEXT: MOV * T1.W, KC0[3].Z, | ||
| ; R600-NEXT: MOV T0.Y, |PV.W|, | ||
| ; R600-NEXT: MOV * T1.W, KC0[3].Y, | ||
| ; R600-NEXT: MOV T0.X, |PV.W|, | ||
| ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in) | ||
| store <4 x float> %fabs, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @fabsf_fn_fold(ptr addrspace(1) %out, float %in0, float %in1) { | ||
| ; R600-LABEL: fabsf_fn_fold: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: MUL_IEEE * T1.X, |KC0[2].Z|, KC0[2].W, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %fabs = call float @fabsf(float %in0) | ||
| %fmul = fmul float %fabs, %in1 | ||
| store float %fmul, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @fabs_fold(ptr addrspace(1) %out, float %in0, float %in1) { | ||
| ; R600-LABEL: fabs_fold: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: MUL_IEEE * T1.X, |KC0[2].Z|, KC0[2].W, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %fabs = call float @llvm.fabs.f32(float %in0) | ||
| %fmul = fmul float %fabs, %in1 | ||
| store float %fmul, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @bitpreserve_fabsf_f32(ptr addrspace(1) %out, float %in) { | ||
| ; R600-LABEL: bitpreserve_fabsf_f32: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: ADD * T1.X, |KC0[2].Z|, 1.0, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %in.bc = bitcast float %in to i32 | ||
| %int.abs = and i32 %in.bc, 2147483647 | ||
| %bc = bitcast i32 %int.abs to float | ||
| %fadd = fadd float %bc, 1.0 | ||
| store float %fadd, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| declare float @fabsf(float) readnone | ||
| declare float @llvm.fabs.f32(float) readnone | ||
| declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone | ||
| declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,180 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 %s | ||
|
|
||
| define amdgpu_kernel void @fneg_fabsf_fadd_f32(ptr addrspace(1) %out, float %x, float %y) { | ||
| ; R600-LABEL: fneg_fabsf_fadd_f32: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: ADD * T1.X, KC0[2].W, -|KC0[2].Z|, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %fabs = call float @llvm.fabs.f32(float %x) | ||
| %fsub = fsub float -0.000000e+00, %fabs | ||
| %fadd = fadd float %y, %fsub | ||
| store float %fadd, ptr addrspace(1) %out, align 4 | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @fneg_fabsf_fmul_f32(ptr addrspace(1) %out, float %x, float %y) { | ||
| ; R600-LABEL: fneg_fabsf_fmul_f32: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: MUL_IEEE * T1.X, KC0[2].W, -|KC0[2].Z|, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %fabs = call float @llvm.fabs.f32(float %x) | ||
| %fsub = fsub float -0.000000e+00, %fabs | ||
| %fmul = fmul float %y, %fsub | ||
| store float %fmul, ptr addrspace(1) %out, align 4 | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @fneg_fabsf_free_f32(ptr addrspace(1) %out, i32 %in) { | ||
| ; R600-LABEL: fneg_fabsf_free_f32: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: MOV * T0.W, KC0[2].Z, | ||
| ; R600-NEXT: MOV * T0.W, |PV.W|, | ||
| ; R600-NEXT: MOV T0.X, -PV.W, | ||
| ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %bc = bitcast i32 %in to float | ||
| %fabs = call float @llvm.fabs.f32(float %bc) | ||
| %fsub = fsub float -0.000000e+00, %fabs | ||
| store float %fsub, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @fneg_fabsf_fn_free_f32(ptr addrspace(1) %out, i32 %in) { | ||
| ; R600-LABEL: fneg_fabsf_fn_free_f32: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: MOV * T0.W, KC0[2].Z, | ||
| ; R600-NEXT: MOV * T0.W, |PV.W|, | ||
| ; R600-NEXT: MOV T0.X, -PV.W, | ||
| ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %bc = bitcast i32 %in to float | ||
| %fabs = call float @fabsf(float %bc) | ||
| %fsub = fsub float -0.000000e+00, %fabs | ||
| store float %fsub, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @fneg_fabsf_f32(ptr addrspace(1) %out, float %in) { | ||
| ; R600-LABEL: fneg_fabsf_f32: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: MOV * T0.W, KC0[2].Z, | ||
| ; R600-NEXT: MOV * T0.W, |PV.W|, | ||
| ; R600-NEXT: MOV T0.X, -PV.W, | ||
| ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %fabs = call float @llvm.fabs.f32(float %in) | ||
| %fsub = fsub float -0.000000e+00, %fabs | ||
| store float %fsub, ptr addrspace(1) %out, align 4 | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @v_fneg_fabsf_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) { | ||
| ; R600-LABEL: v_fneg_fabsf_f32: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: TEX 0 @6 | ||
| ; R600-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: Fetch clause starting at 6: | ||
| ; R600-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 | ||
| ; R600-NEXT: ALU clause starting at 8: | ||
| ; R600-NEXT: MOV * T0.X, KC0[2].Z, | ||
| ; R600-NEXT: ALU clause starting at 9: | ||
| ; R600-NEXT: MOV * T0.W, |T0.X|, | ||
| ; R600-NEXT: MOV T0.X, -PV.W, | ||
| ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %val = load float, ptr addrspace(1) %in, align 4 | ||
| %fabs = call float @llvm.fabs.f32(float %val) | ||
| %fsub = fsub float -0.000000e+00, %fabs | ||
| store float %fsub, ptr addrspace(1) %out, align 4 | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @fneg_fabsf_v2f32(ptr addrspace(1) %out, <2 x float> %in) { | ||
| ; R600-LABEL: fneg_fabsf_v2f32: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: MOV T0.W, KC0[3].X, | ||
| ; R600-NEXT: MOV * T1.W, KC0[2].W, | ||
| ; R600-NEXT: MOV * T0.W, |PV.W|, | ||
| ; R600-NEXT: MOV T0.Y, -PV.W, | ||
| ; R600-NEXT: MOV * T0.W, |T1.W|, | ||
| ; R600-NEXT: MOV T0.X, -PV.W, | ||
| ; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in) | ||
| %fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs | ||
| store <2 x float> %fsub, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_kernel void @fneg_fabsf_v4f32(ptr addrspace(1) %out, <4 x float> %in) { | ||
| ; R600-LABEL: fneg_fabsf_v4f32: | ||
| ; R600: ; %bb.0: | ||
| ; R600-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[] | ||
| ; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1 | ||
| ; R600-NEXT: CF_END | ||
| ; R600-NEXT: PAD | ||
| ; R600-NEXT: ALU clause starting at 4: | ||
| ; R600-NEXT: MOV * T0.W, KC0[4].X, | ||
| ; R600-NEXT: MOV T0.W, |PV.W|, | ||
| ; R600-NEXT: MOV * T1.W, KC0[3].W, | ||
| ; R600-NEXT: MOV T0.Z, KC0[3].Z, | ||
| ; R600-NEXT: MOV T1.W, |PS|, | ||
| ; R600-NEXT: MOV * T2.W, -PV.W, | ||
| ; R600-NEXT: MOV T2.Z, -PV.W, | ||
| ; R600-NEXT: MOV T0.W, KC0[3].Y, | ||
| ; R600-NEXT: MOV * T1.W, |PV.Z|, | ||
| ; R600-NEXT: MOV T2.Y, -PS, | ||
| ; R600-NEXT: MOV * T0.W, |PV.W|, | ||
| ; R600-NEXT: MOV T2.X, -PV.W, | ||
| ; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, | ||
| ; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) | ||
| %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in) | ||
| %fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs | ||
| store <4 x float> %fsub, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| declare float @fabsf(float) readnone | ||
| declare float @llvm.fabs.f32(float) readnone | ||
| declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone | ||
| declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone | ||
|
|
||
| !llvm.module.flags = !{!0} | ||
| !0 = !{i32 1, !"amdhsa_code_object_version", i32 500} |