Skip to content

Commit

Permalink
Unrolled build for rust-lang#125980
Browse files Browse the repository at this point in the history
Rollup merge of rust-lang#125980 - kjetilkjeka:nvptx_remove_direct_passmode, r=davidtwco

Nvptx remove direct passmode

This PR does what should have been done in rust-lang#117671. That is fully avoid using the `PassMode::Direct` for `extern "C" fn` for `nvptx64-nvidia-cuda` and enable the compatibility test. `@RalfJung` [pointed me in the right direction](rust-lang#117480 (comment)) for solving this issue.

There are still some ABI bugs after this PR is merged. These ABI tests are created based on what is actually correct, and since they continue passing with even more of them enabled things are improving. I don't have the time to tackle all the remaining issues right now, but I think getting these improvements merged is very valuable in themselves and plan to tackle more of them long term.

This also doesn't remove the use of `PassMode::Direct` for `extern "ptx-kernel" fn`. This was also not trivial to make work. And since the ABI is hidden behind an unstable feature it's less urgent.

I don't know if it's correct to request `@RalfJung` as a reviewer (due to team structures), but he helped me a lot to figure out this stuff. If that's not appropriate then `@davidtwco` would be a good candidate since he know about this topic from rust-lang#117671

r​? `@RalfJung`
  • Loading branch information
rust-timer committed Jun 12, 2024
2 parents 02c7a59 + a49fe0a commit cd8fcad
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 16 deletions.
47 changes: 39 additions & 8 deletions compiler/rustc_target/src/abi/call/nvptx64.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,54 @@
use crate::abi::call::{ArgAbi, FnAbi, PassMode, Reg, Size, Uniform};
use crate::abi::{HasDataLayout, TyAbiInterface};

use super::{ArgAttribute, ArgAttributes, ArgExtension, CastTarget};

fn classify_ret<Ty>(ret: &mut ArgAbi<'_, Ty>) {
if ret.layout.is_aggregate() && ret.layout.size.bits() > 64 {
ret.make_indirect();
} else {
// FIXME: this is wrong! Need to decide which ABI we really want here.
ret.make_direct_deprecated();
if ret.layout.is_aggregate() && ret.layout.is_sized() {
classify_aggregate(ret)
} else if ret.layout.size.bits() < 32 && ret.layout.is_sized() {
ret.extend_integer_width_to(32);
}
}

fn classify_arg<Ty>(arg: &mut ArgAbi<'_, Ty>) {
if arg.layout.is_aggregate() {
arg.make_indirect_byval(None);
} else if arg.layout.size.bits() < 32 {
if arg.layout.is_aggregate() && arg.layout.is_sized() {
classify_aggregate(arg)
} else if arg.layout.size.bits() < 32 && arg.layout.is_sized() {
arg.extend_integer_width_to(32);
}
}

/// the pass mode used for aggregates in arg and ret position
fn classify_aggregate<Ty>(arg: &mut ArgAbi<'_, Ty>) {
let align_bytes = arg.layout.align.abi.bytes();
let size = arg.layout.size;

let reg = match align_bytes {
1 => Reg::i8(),
2 => Reg::i16(),
4 => Reg::i32(),
8 => Reg::i64(),
16 => Reg::i128(),
_ => unreachable!("Align is given as power of 2 no larger than 16 bytes"),
};

if align_bytes == size.bytes() {
arg.cast_to(CastTarget {
prefix: [Some(reg), None, None, None, None, None, None, None],
rest: Uniform::new(Reg::i8(), Size::from_bytes(0)),
attrs: ArgAttributes {
regular: ArgAttribute::default(),
arg_ext: ArgExtension::None,
pointee_size: Size::ZERO,
pointee_align: None,
},
});
} else {
arg.cast_to(Uniform::new(reg, size));
}
}

fn classify_arg_kernel<'a, Ty, C>(_cx: &C, arg: &mut ArgAbi<'a, Ty>)
where
Ty: TyAbiInterface<'a, C> + Copy,
Expand Down
10 changes: 10 additions & 0 deletions tests/assembly/nvptx-c-abi-arg-v7.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ pub struct TripleU16 {
h: u16,
}
#[repr(C)]
pub struct DoubleI32 {
f: i32,
g: i32,
}
#[repr(C)]
pub struct TripleU32 {
f: u32,
g: u32,
Expand Down Expand Up @@ -175,6 +180,11 @@ pub unsafe extern "C" fn f_triple_u16_arg(_a: TripleU16) {}
#[no_mangle]
pub unsafe extern "C" fn f_triple_u32_arg(_a: TripleU32) {}

// CHECK: .visible .func f_double_i32_arg(
// CHECK: .param .align 4 .b8 f_double_i32_arg_param_0[8]
#[no_mangle]
pub unsafe extern "C" fn f_double_i32_arg(_a: DoubleI32) {}

// CHECK: .visible .func f_triple_u64_arg(
// CHECK: .param .align 8 .b8 f_triple_u64_arg_param_0[24]
#[no_mangle]
Expand Down
12 changes: 11 additions & 1 deletion tests/assembly/nvptx-c-abi-ret-v7.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
//@ assembly-output: ptx-linker
//@ compile-flags: --crate-type cdylib -C target-cpu=sm_86 -Z unstable-options -Clinker-flavor=llbc
//@ only-nvptx64
//@ ignore-nvptx64

// The PTX ABI stability is tied to major versions of the PTX ISA
// These tests assume major version 7
Expand Down Expand Up @@ -41,6 +40,11 @@ pub struct TripleU16 {
h: u16,
}
#[repr(C)]
pub struct DoubleI32 {
f: i32,
g: i32,
}
#[repr(C)]
pub struct TripleU32 {
f: u32,
g: u32,
Expand Down Expand Up @@ -187,6 +191,12 @@ pub unsafe extern "C" fn f_triple_u16_ret() -> TripleU16 {
TripleU16 { f: 18, g: 19, h: 20 }
}

// CHECK: .visible .func (.param .align 4 .b8 func_retval0[8]) f_double_i32_ret(
#[no_mangle]
pub unsafe extern "C" fn f_double_i32_ret() -> DoubleI32 {
DoubleI32 { f: 1, g: 2 }
}

// CHECK: .visible .func (.param .align 4 .b8 func_retval0[12]) f_triple_u32_ret(
#[no_mangle]
pub unsafe extern "C" fn f_triple_u32_ret() -> TripleU32 {
Expand Down
10 changes: 10 additions & 0 deletions tests/assembly/nvptx-kernel-abi/nvptx-kernel-args-abi-v7.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ pub struct TripleU16 {
h: u16,
}
#[repr(C)]
pub struct DoubleI32 {
f: i32,
g: i32,
}
#[repr(C)]
pub struct TripleU32 {
f: u32,
g: u32,
Expand Down Expand Up @@ -180,6 +185,11 @@ pub unsafe extern "ptx-kernel" fn f_triple_u8_arg(_a: TripleU8) {}
#[no_mangle]
pub unsafe extern "ptx-kernel" fn f_triple_u16_arg(_a: TripleU16) {}

// CHECK: .visible .entry f_double_i32_arg(
// CHECK: .param .align 4 .b8 f_double_i32_arg_param_0[8]
#[no_mangle]
pub unsafe extern "ptx-kernel" fn f_double_i32_arg(_a: DoubleI32) {}

// CHECK: .visible .entry f_triple_u32_arg(
// CHECK: .param .align 4 .b8 f_triple_u32_arg_param_0[12]
#[no_mangle]
Expand Down
10 changes: 3 additions & 7 deletions tests/ui/abi/compatibility.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,9 @@
//@ revisions: csky
//@[csky] compile-flags: --target csky-unknown-linux-gnuabiv2
//@[csky] needs-llvm-components: csky

// FIXME: disabled on nvptx64 since the target ABI fails the sanity check
// see https://github.com/rust-lang/rust/issues/117480
/* revisions: nvptx64
[nvptx64] compile-flags: --target nvptx64-nvidia-cuda
[nvptx64] needs-llvm-components: nvptx
*/
//@ revisions: nvptx64
//@[nvptx64] compile-flags: --target nvptx64-nvidia-cuda
//@[nvptx64] needs-llvm-components: nvptx
#![feature(rustc_attrs, unsized_fn_params, transparent_unions)]
#![cfg_attr(not(host), feature(no_core, lang_items), no_std, no_core)]
#![allow(unused, improper_ctypes_definitions, internal_features)]
Expand Down

0 comments on commit cd8fcad

Please sign in to comment.