Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] New ttracedata intrinsics #70235

Merged
merged 3 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1697,6 +1697,13 @@ def int_amdgcn_s_setprio :
DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
IntrHasSideEffects]>;

def int_amdgcn_s_ttracedata :
DefaultAttrsIntrinsic<[], [llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects]>;
def int_amdgcn_s_ttracedata_imm :
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need the imm form, or can the backend just fold the register version with appropriate constant inputs to the imm instruction?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We really need it. They do different things. s_ttracedata emits a 32-bit "token" into ttracedata. s_ttracedata_imm emits an 8-bit "token" (the low 8 bits of the imm16 operand).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is in no way obvious from the manual descriptions

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think you would have to read the instruction descriptions in conjunction with the separate threadtrace documentation, and most or all of that is not public anyway.

DefaultAttrsIntrinsic<[], [llvm_i16_ty],
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;

// This is IntrHasSideEffects so it can be used to read cycle counters.
def int_amdgcn_s_getreg :
ClangBuiltin<"__builtin_amdgcn_s_getreg">,
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3065,6 +3065,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(B, MI, 2);
return;
}
case Intrinsic::amdgcn_s_ttracedata:
constrainOpWithReadfirstlane(B, MI, 1); // M0
return;
case Intrinsic::amdgcn_raw_buffer_load_lds:
case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
applyDefaultMapping(OpdMapper);
Expand Down Expand Up @@ -4661,6 +4664,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
break;
}
case Intrinsic::amdgcn_s_ttracedata: {
// This must be an SGPR, but accept a VGPR.
unsigned Bank =
getRegBankID(MI.getOperand(1).getReg(), MRI, AMDGPU::SGPRRegBankID);
OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
break;
}
case Intrinsic::amdgcn_end_cf: {
unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1501,7 +1501,10 @@ def S_INCPERFLEVEL : SOPP_Pseudo <"s_incperflevel", (ins i32imm:$simm16), "$simm
def S_DECPERFLEVEL : SOPP_Pseudo <"s_decperflevel", (ins i32imm:$simm16), "$simm16",
[(int_amdgcn_s_decperflevel timm:$simm16)]> {
}
def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins)> {

let Uses = [M0] in
def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins), "",
[(int_amdgcn_s_ttracedata M0)]> {
let simm16 = 0;
let fixed_imm = 1;
}
Expand Down Expand Up @@ -1545,8 +1548,10 @@ let SubtargetPredicate = isGFX10Plus in {
[(SIdenorm_mode (i32 timm:$simm16))]>;
}

let hasSideEffects = 1 in
def S_TTRACEDATA_IMM :
SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16">;
SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16",
[(int_amdgcn_s_ttracedata_imm timm:$simm16)]>;
} // End SubtargetPredicate = isGFX10Plus

let SubtargetPredicate = isGFX11Plus in {
Expand Down
53 changes: 53 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.ttracedata.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s

declare void @llvm.amdgcn.s.ttracedata(i32)
declare void @llvm.amdgcn.s.ttracedata.imm(i16)

define amdgpu_cs void @ttracedata_c() {
; GFX11-LABEL: ttracedata_c:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_mov_b32 m0, 0xf4240
; GFX11-NEXT: s_ttracedata
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.s.ttracedata(i32 1000000)
ret void
}

define amdgpu_cs void @ttracedata_s(i32 inreg %val) {
; GFX11-LABEL: ttracedata_s:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_mov_b32 m0, s0
; GFX11-NEXT: s_ttracedata
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.s.ttracedata(i32 %val)
ret void
}

define amdgpu_cs void @ttracedata_v(i32 %val) {
; GFX11-SDAG-LABEL: ttracedata_v:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: s_mov_b32 m0, s0
; GFX11-SDAG-NEXT: s_ttracedata
; GFX11-SDAG-NEXT: s_endpgm
;
; GFX11-GISEL-LABEL: ttracedata_v:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: v_readfirstlane_b32 m0, v0
; GFX11-GISEL-NEXT: s_ttracedata
; GFX11-GISEL-NEXT: s_endpgm
call void @llvm.amdgcn.s.ttracedata(i32 %val)
ret void
}

define amdgpu_cs void @ttracedata_imm() {
; GFX11-LABEL: ttracedata_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_ttracedata_imm 0x3e8
; GFX11-NEXT: s_endpgm
call void @llvm.amdgcn.s.ttracedata.imm(i16 1000)
ret void
}