Skip to content

Commit b46e3b6

Browse files
committed
Add inline asm support for amdgpu
Add support for inline assembly for the amdgpu backend (the amdgcn-amd-amdhsa target). Add register classes for `vgpr` (vector general purpose register) and `sgpr` (scalar general purpose register). The LLVM backend supports two more classes, `reg`, which is either VGPR or SGPR, up to the compiler to decide. As instructions often rely on a register being either a VGPR or SGPR for the assembly to be valid, reg doesn’t seem that useful (I struggled to write correct tests for it), so I didn’t end up adding it. The fourth register class is AGPRs, which only exist on some hardware versions (not the consumer ones) and they have restricted ways to write and read from them, which makes it hard to write a Rust variable into them. They could be used inside assembly blocks, but I didn’t add them as Rust register class. There is one change affecting general inline assembly code, that is `InlineAsmReg::name()` now returns a `Cow` instead of a `&'static str`. Because amdgpu has many registers, 256 VGPRs plus combinations of 2 or 4 VGPRs, and I didn’t want to list hundreds of static strings, the amdgpu reg stores the register number(s) and a non-static String is generated at runtime for the register name.
1 parent 83e49b7 commit b46e3b6

File tree

6 files changed

+523
-18
lines changed

6 files changed

+523
-18
lines changed

compiler/rustc_codegen_gcc/src/asm.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,8 @@ fn reg_class_to_gcc(reg_class: InlineAsmRegClass) -> &'static str {
665665
InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => {
666666
unreachable!("clobber-only")
667667
}
668+
InlineAsmRegClass::Amdgpu(AmdgpuInlineAsmRegClass::vgpr) => "v",
669+
InlineAsmRegClass::Amdgpu(AmdgpuInlineAsmRegClass::sgpr) => "Sg",
668670
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::reg) => "r",
669671
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg)
670672
| InlineAsmRegClass::Arm(ArmInlineAsmRegClass::dreg_low16)
@@ -761,6 +763,7 @@ fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegCl
761763
InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => {
762764
unreachable!("clobber-only")
763765
}
766+
InlineAsmRegClass::Amdgpu(_) => cx.type_i32(),
764767
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::reg) => cx.type_i32(),
765768
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg)
766769
| InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg_low16) => cx.type_f32(),
@@ -946,6 +949,7 @@ fn modifier_to_gcc(
946949
InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => {
947950
unreachable!("clobber-only")
948951
}
952+
InlineAsmRegClass::Amdgpu(_) => None,
949953
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::reg) => None,
950954
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg)
951955
| InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg_low16) => None,

compiler/rustc_codegen_llvm/src/asm.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ impl<'ll, 'tcx> AsmBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
229229
InlineAsmArch::AArch64 | InlineAsmArch::Arm64EC | InlineAsmArch::Arm => {
230230
constraints.push("~{cc}".to_string());
231231
}
232+
InlineAsmArch::Amdgpu => {}
232233
InlineAsmArch::X86 | InlineAsmArch::X86_64 => {
233234
constraints.extend_from_slice(&[
234235
"~{dirflag}".to_string(),
@@ -645,6 +646,7 @@ fn reg_to_llvm(reg: InlineAsmRegOrRegClass, layout: Option<&TyAndLayout<'_>>) ->
645646
| Arm(ArmInlineAsmRegClass::dreg_low8)
646647
| Arm(ArmInlineAsmRegClass::qreg_low4) => "x",
647648
Arm(ArmInlineAsmRegClass::dreg) | Arm(ArmInlineAsmRegClass::qreg) => "w",
649+
Amdgpu(class) => class.prefix(),
648650
Hexagon(HexagonInlineAsmRegClass::reg) => "r",
649651
Hexagon(HexagonInlineAsmRegClass::preg) => unreachable!("clobber-only"),
650652
LoongArch(LoongArchInlineAsmRegClass::reg) => "r",
@@ -745,6 +747,7 @@ fn modifier_to_llvm(
745747
modifier
746748
}
747749
}
750+
Amdgpu(_) => None,
748751
Hexagon(_) => None,
749752
LoongArch(_) => None,
750753
Mips(_) => None,
@@ -825,6 +828,7 @@ fn dummy_output_type<'ll>(cx: &CodegenCx<'ll, '_>, reg: InlineAsmRegClass) -> &'
825828
Arm(ArmInlineAsmRegClass::qreg)
826829
| Arm(ArmInlineAsmRegClass::qreg_low8)
827830
| Arm(ArmInlineAsmRegClass::qreg_low4) => cx.type_vector(cx.type_i64(), 2),
831+
Amdgpu(_) => cx.type_i32(),
828832
Hexagon(HexagonInlineAsmRegClass::reg) => cx.type_i32(),
829833
Hexagon(HexagonInlineAsmRegClass::preg) => unreachable!("clobber-only"),
830834
LoongArch(LoongArchInlineAsmRegClass::reg) => cx.type_i32(),

compiler/rustc_span/src/symbol.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2028,6 +2028,7 @@ symbols! {
20282028
self_struct_ctor,
20292029
semiopaque,
20302030
semitransparent,
2031+
sgpr,
20312032
sha2,
20322033
sha3,
20332034
sha512_sm_x86,
@@ -2448,6 +2449,7 @@ symbols! {
24482449
verbatim,
24492450
version,
24502451
vfp2,
2452+
vgpr,
24512453
vis,
24522454
visible_private_types,
24532455
volatile,
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
use std::fmt;
2+
3+
use rustc_span::Symbol;
4+
5+
use super::{InlineAsmArch, InlineAsmType, ModifierInfo};
6+
7+
def_reg_class! {
8+
Amdgpu AmdgpuInlineAsmRegClass {
9+
sgpr,
10+
vgpr,
11+
}
12+
}
13+
14+
// See https://llvm.org/docs/AMDGPUOperandSyntax.html
15+
impl AmdgpuInlineAsmRegClass {
16+
pub fn valid_modifiers(self, _arch: InlineAsmArch) -> &'static [char] {
17+
&[]
18+
}
19+
20+
pub fn suggest_class(self, _arch: InlineAsmArch, _ty: InlineAsmType) -> Option<Self> {
21+
None
22+
}
23+
24+
pub fn suggest_modifier(
25+
self,
26+
_arch: InlineAsmArch,
27+
_ty: InlineAsmType,
28+
) -> Option<ModifierInfo> {
29+
None
30+
}
31+
32+
pub fn default_modifier(self, _arch: InlineAsmArch) -> Option<ModifierInfo> {
33+
None
34+
}
35+
36+
pub fn supported_types(
37+
self,
38+
_arch: InlineAsmArch,
39+
) -> &'static [(InlineAsmType, Option<Symbol>)] {
40+
types! { _: I16, F16, I32, F32, I64, F64, I128; }
41+
}
42+
43+
/// The number of supported registers in this class.
44+
/// The returned number is the length, so supported register
45+
/// indices are 0 to max_num()-1.
46+
fn max_num(self) -> u32 {
47+
match self {
48+
Self::sgpr => 106,
49+
Self::vgpr => 256,
50+
}
51+
}
52+
53+
/// Prefix when printed and register constraint in LLVM.
54+
pub fn prefix(self) -> &'static str {
55+
match self {
56+
Self::sgpr => "s",
57+
Self::vgpr => "v",
58+
}
59+
}
60+
61+
/// Get register class from prefix.
62+
fn parse_prefix(prefix: char) -> Result<Self, &'static str> {
63+
match prefix {
64+
's' => Ok(Self::sgpr),
65+
'v' => Ok(Self::vgpr),
66+
_ => Err("unknown register prefix"),
67+
}
68+
}
69+
}
70+
71+
#[derive(
72+
Copy,
73+
Clone,
74+
rustc_macros::Encodable,
75+
rustc_macros::Decodable,
76+
Debug,
77+
Eq,
78+
PartialEq,
79+
PartialOrd,
80+
Hash,
81+
rustc_macros::HashStable_Generic
82+
)]
83+
enum AmdgpuRegRange {
84+
/// Low 16-bit of a register
85+
Low(u32),
86+
/// High 16-bit of a register
87+
High(u32),
88+
/// One or more 32-bit registers, in the inclusive range
89+
Range { start: u32, end: u32 },
90+
}
91+
92+
#[derive(
93+
Copy,
94+
Clone,
95+
rustc_macros::Encodable,
96+
rustc_macros::Decodable,
97+
Debug,
98+
Eq,
99+
PartialEq,
100+
PartialOrd,
101+
Hash,
102+
rustc_macros::HashStable_Generic
103+
)]
104+
#[allow(non_camel_case_types)]
105+
pub struct AmdgpuInlineAsmReg {
106+
class: AmdgpuInlineAsmRegClass,
107+
range: AmdgpuRegRange,
108+
}
109+
110+
impl AmdgpuInlineAsmReg {
111+
pub fn name(self) -> String {
112+
let c = self.class.prefix();
113+
match self.range {
114+
AmdgpuRegRange::Low(n) => format!("{c}{n}.l"),
115+
AmdgpuRegRange::High(n) => format!("{c}{n}.h"),
116+
AmdgpuRegRange::Range { start, end } if start == end => format!("{c}{start}"),
117+
AmdgpuRegRange::Range { start, end } => format!("{c}[{start}:{end}]"),
118+
}
119+
}
120+
121+
pub fn reg_class(self) -> AmdgpuInlineAsmRegClass {
122+
self.class
123+
}
124+
125+
pub fn parse(name: &str) -> Result<Self, &'static str> {
126+
if name.is_empty() {
127+
return Err("invalid empty register");
128+
}
129+
let class = AmdgpuInlineAsmRegClass::parse_prefix(name.chars().next().unwrap())?;
130+
// Form with range, e.g. s[2:3]
131+
let res;
132+
if name[1..].starts_with('[') {
133+
if !name.ends_with(']') {
134+
return Err("invalid register, missing closing bracket");
135+
}
136+
if let Some((start, end)) = name[2..name.len() - 1].split_once(':') {
137+
let Ok(start) = start.parse() else {
138+
return Err("invalid register range start");
139+
};
140+
let Ok(end) = end.parse() else {
141+
return Err("invalid register range end");
142+
};
143+
144+
// Check range
145+
if start > end {
146+
return Err("invalid reversed register range");
147+
}
148+
149+
if end >= class.max_num() {
150+
return Err("too large register for this class");
151+
}
152+
res = Self { class, range: AmdgpuRegRange::Range { start, end } };
153+
} else {
154+
return Err("invalid register range");
155+
}
156+
} else {
157+
let parse_num = |core: &str| {
158+
let Ok(start) = core.parse() else {
159+
return Err("invalid register number");
160+
};
161+
162+
if start >= class.max_num() {
163+
return Err("too large register for this class");
164+
}
165+
166+
Ok(start)
167+
};
168+
169+
let name = &name[1..];
170+
let range = if let Some(name) = name.strip_suffix(".l") {
171+
AmdgpuRegRange::Low(parse_num(name)?)
172+
} else if let Some(name) = name.strip_suffix(".h") {
173+
AmdgpuRegRange::High(parse_num(name)?)
174+
} else {
175+
let start = parse_num(name)?;
176+
AmdgpuRegRange::Range { start, end: start }
177+
};
178+
res = Self { class, range };
179+
}
180+
Ok(res)
181+
}
182+
183+
pub fn validate(
184+
self,
185+
_arch: super::InlineAsmArch,
186+
_reloc_model: crate::spec::RelocModel,
187+
_target_features: &rustc_data_structures::fx::FxIndexSet<Symbol>,
188+
_target: &crate::spec::Target,
189+
_is_clobber: bool,
190+
) -> Result<(), &'static str> {
191+
Ok(())
192+
}
193+
}
194+
195+
pub(super) fn fill_reg_map(
196+
_arch: super::InlineAsmArch,
197+
_reloc_model: crate::spec::RelocModel,
198+
_target_features: &rustc_data_structures::fx::FxIndexSet<Symbol>,
199+
_target: &crate::spec::Target,
200+
map: &mut rustc_data_structures::fx::FxHashMap<
201+
super::InlineAsmRegClass,
202+
rustc_data_structures::fx::FxIndexSet<super::InlineAsmReg>,
203+
>,
204+
) {
205+
use super::{InlineAsmReg, InlineAsmRegClass};
206+
207+
// Add single registers of each class (no register ranges)
208+
#[allow(rustc::potential_query_instability)]
209+
for class in regclass_map().keys() {
210+
let InlineAsmRegClass::Amdgpu(class) = *class else { unreachable!("Must be amdgpu class") };
211+
if let Some(set) = map.get_mut(&InlineAsmRegClass::Amdgpu(class)) {
212+
for i in 0..class.max_num() {
213+
set.insert(InlineAsmReg::Amdgpu(AmdgpuInlineAsmReg {
214+
class,
215+
range: AmdgpuRegRange::Range { start: i, end: i },
216+
}));
217+
}
218+
}
219+
}
220+
}
221+
222+
impl AmdgpuInlineAsmReg {
223+
pub fn emit(
224+
self,
225+
out: &mut dyn fmt::Write,
226+
_arch: InlineAsmArch,
227+
_modifier: Option<char>,
228+
) -> fmt::Result {
229+
out.write_str(&self.name())
230+
}
231+
232+
// There are too many conflicts to list
233+
pub fn overlapping_regs(self, mut _cb: impl FnMut(AmdgpuInlineAsmReg)) {}
234+
}

0 commit comments

Comments
 (0)