From fa11cbdb5ed164563be8a2c0224bfd381c2010dc Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Mon, 8 Jul 2019 16:13:13 +0200 Subject: [PATCH 01/17] Detail layout of repr(C) unions --- reference/src/layout/unions.md | 51 +++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index baf736cf..2c504e71 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -18,16 +18,6 @@ is already entirely determined by their types, and since we intend to allow creating references to fields (`&u.f1`), unions do not have any wiggle-room there. -### C-compatible layout ("repr C") - -For unions tagged `#[repr(C)]`, the compiler will apply the C layout scheme. Per -sections [6.5.8.5] and [6.7.2.1.16] of the C11 specification, this means that -the offset of every field is 0. Unsafe code can cast a pointer to the union to -a field type to obtain a pointer to any field, and vice versa. - -[6.5.8.5]: http://port70.net/~nsz/c/c11/n1570.html#6.5.8p5 -[6.7.2.1.16]: http://port70.net/~nsz/c/c11/n1570.html#6.7.2.1p16 - ### Default layout ("repr rust") **The default layout of unions is not specified.** As of this writing, we want @@ -38,3 +28,44 @@ contents are. Even if the offsets happen to be all 0, there might still be differences in the function call ABI. If you need to pass unions by-value across an FFI boundary, you have to use `#[repr(C)]`. + +### Layout of `repr(C)` unions + +The layout of `repr(C)` unions follows the C layout scheme. Per sections +[6.5.8.5] and [6.7.2.1.16] of the C11 specification, this means that the offset +of every field is 0. Unsafe code can cast a pointer to the union to a field type +to obtain a pointer to any field, and vice versa. + +[6.5.8.5]: http://port70.net/~nsz/c/c11/n1570.html#6.5.8p5 +[6.7.2.1.16]: http://port70.net/~nsz/c/c11/n1570.html#6.7.2.1p16 + +Since all fields are at offset 0, this implies that `repr(C)` unions do not have +padding before or in-between their fields. They can, however, have trailing +padding (see next example). + +Union fields of zero-size participate in the layout computation of the union. For example: + +```rust +# use std::mem::{size_of, align_of}; +#[repr(C)] +union U { + x: u8, + y: [u16; 0], +} +# fn main() { +// The zero-sized type [u16; 0] raises the alignment requirement to 2 +assert_eq!(align_of::(), 2); +// This introduces trailing padding, raising the union size to 2 +assert_eq!(size_of::(), 2); +# } +``` + +> **NOTE**: U is larger than its largest field, and has therefore 1 byte of +> trailing padding. + +This handling of zero-sized types is equivalent to the handling of zero-sized +types in struct fields, and matches the behavior of GCC and Clang for unions in +C when zero-sized types are allowed via their language extensions. + +The bit `i` of a `repr(C)` union is a padding bit if the bit `i` of each of its +fields is a padding bit or trailing padding. From ccd7fb37fd21a476f9e167ba0a8aba1cfa9e3cc7 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Fri, 26 Jul 2019 12:06:30 +0200 Subject: [PATCH 02/17] Correct sub-section title --- reference/src/layout/unions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index 2c504e71..9048ca08 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -29,7 +29,7 @@ Even if the offsets happen to be all 0, there might still be differences in the function call ABI. If you need to pass unions by-value across an FFI boundary, you have to use `#[repr(C)]`. -### Layout of `repr(C)` unions +### Layout of "repr C" unions The layout of `repr(C)` unions follows the C layout scheme. Per sections [6.5.8.5] and [6.7.2.1.16] of the C11 specification, this means that the offset From bc72ad6f00ece2088aa1f17a854cbe027055902f Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Fri, 26 Jul 2019 14:15:02 +0200 Subject: [PATCH 03/17] Re-word the repr C union section into sub-sections --- reference/src/layout/unions.md | 59 ++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index 9048ca08..849a420e 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -39,11 +39,56 @@ to obtain a pointer to any field, and vice versa. [6.5.8.5]: http://port70.net/~nsz/c/c11/n1570.html#6.5.8p5 [6.7.2.1.16]: http://port70.net/~nsz/c/c11/n1570.html#6.7.2.1p16 -Since all fields are at offset 0, this implies that `repr(C)` unions do not have -padding before or in-between their fields. They can, however, have trailing -padding (see next example). +#### Padding -Union fields of zero-size participate in the layout computation of the union. For example: +Since all fields are at offset 0, `repr(C)` unions do not have padding before +their fields. They can, however, have trailing padding, to make sure the size is +a multiple of the alignment: + +```rust +# use std::mem::{size_of, align_of}; +#[repr(C, align(2))] +union U { x: u8 } +# fn main() { +// The repr(align) attribute raises the alignment requirement of U to 2 +assert_eq!(align_of::(), 2); +// This introduces trailing padding, raising the union size to 2 +assert_eq!(size_of::(), 2); +# } +``** + +**Note**: there is no room between fields for padding, so `repr(C)` unions can +only have trailing padding. + +The bit `i` of a `repr(C)` union is a padding bit if the bit `i` of each of its +fields is a padding bit or trailing padding. That is: + +```rust +#[repr(C)] +union U { x: (u8, u16) } +``` + +The byte at offset 1 of `U` is a padding byte. + +#### Zero-sized fields + +If a `#[repr(C)]` union contains a field of zero-size, that field does not +occupy space in the union. For example: + +```rust +# use std::mem::{size_of, align_of}; +#[repr(C)] +union U { + x: u8, + y: (), +} +# fn main() { +assert_eq!(size_of::(), 1); +# } +``` + +The field does, however, participate in the layout computation of the union. For +example: ```rust # use std::mem::{size_of, align_of}; @@ -60,12 +105,6 @@ assert_eq!(size_of::(), 2); # } ``` -> **NOTE**: U is larger than its largest field, and has therefore 1 byte of -> trailing padding. - This handling of zero-sized types is equivalent to the handling of zero-sized types in struct fields, and matches the behavior of GCC and Clang for unions in C when zero-sized types are allowed via their language extensions. - -The bit `i` of a `repr(C)` union is a padding bit if the bit `i` of each of its -fields is a padding bit or trailing padding. From fa925d761b5059b1f3d5af8698b228cee1f841ec Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Fri, 26 Jul 2019 14:30:21 +0200 Subject: [PATCH 04/17] Document C++ compatibility hazard --- reference/src/layout/unions.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index 849a420e..7e56813d 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -87,8 +87,7 @@ assert_eq!(size_of::(), 1); # } ``` -The field does, however, participate in the layout computation of the union. For -example: +The field does, however, participate in the layout computation of the union, and can raise its alignment requirement, which in turn can introduce trailing padding. For example: ```rust # use std::mem::{size_of, align_of}; @@ -103,8 +102,12 @@ assert_eq!(align_of::(), 2); // This introduces trailing padding, raising the union size to 2 assert_eq!(size_of::(), 2); # } -``` +``** This handling of zero-sized types is equivalent to the handling of zero-sized types in struct fields, and matches the behavior of GCC and Clang for unions in C when zero-sized types are allowed via their language extensions. + +**C++ compatibility hazard**: C++ does, in general, give a size of 1 to empty +structs. If an empty struct in C++ is used as an union field, a "naive" +translation of that code into Rust will not produce a compatible result. From 8f9de1c724f823925a7698b8da02f00a8da0e366 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Fri, 26 Jul 2019 14:31:46 +0200 Subject: [PATCH 05/17] Fix typo --- reference/src/layout/unions.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index 7e56813d..3affcaeb 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -87,7 +87,9 @@ assert_eq!(size_of::(), 1); # } ``` -The field does, however, participate in the layout computation of the union, and can raise its alignment requirement, which in turn can introduce trailing padding. For example: +The field does, however, participate in the layout computation of the union, and +can raise its alignment requirement, which in turn can introduce trailing +padding. For example: ```rust # use std::mem::{size_of, align_of}; @@ -102,7 +104,7 @@ assert_eq!(align_of::(), 2); // This introduces trailing padding, raising the union size to 2 assert_eq!(size_of::(), 2); # } -``** +``` This handling of zero-sized types is equivalent to the handling of zero-sized types in struct fields, and matches the behavior of GCC and Clang for unions in From 8b6663b42911f9bb098b1b9f285b3c056988b054 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Fri, 26 Jul 2019 14:38:02 +0200 Subject: [PATCH 06/17] Update reference/src/layout/unions.md Co-Authored-By: Ralf Jung --- reference/src/layout/unions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index 3affcaeb..e32fe04a 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -57,7 +57,7 @@ assert_eq!(size_of::(), 2); # } ``** -**Note**: there is no room between fields for padding, so `repr(C)` unions can +**Note**: Fields are overlapped instead of laid out sequentially, so unlike structs there is no "between the fields" that could be filled with padding. only have trailing padding. The bit `i` of a `repr(C)` union is a padding bit if the bit `i` of each of its From bce77758fdfe4614ced745d3f4bd7267f213256c Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Fri, 26 Jul 2019 14:47:52 +0200 Subject: [PATCH 07/17] Fix spurious trailing sentence --- reference/src/layout/unions.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index e32fe04a..577aa197 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -57,8 +57,9 @@ assert_eq!(size_of::(), 2); # } ``** -**Note**: Fields are overlapped instead of laid out sequentially, so unlike structs there is no "between the fields" that could be filled with padding. -only have trailing padding. +> **Note**: Fields are overlapped instead of laid out sequentially, so +> unlike structs there is no "between the fields" that could be filled +> with padding. The bit `i` of a `repr(C)` union is a padding bit if the bit `i` of each of its fields is a padding bit or trailing padding. That is: From 29b0a7bab846c6265c625a15d1dffeceeef931d6 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Fri, 26 Jul 2019 15:20:34 +0200 Subject: [PATCH 08/17] Remove value representation of repr(C) unions --- reference/src/layout/unions.md | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index 577aa197..6f404bf8 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -61,16 +61,6 @@ assert_eq!(size_of::(), 2); > unlike structs there is no "between the fields" that could be filled > with padding. -The bit `i` of a `repr(C)` union is a padding bit if the bit `i` of each of its -fields is a padding bit or trailing padding. That is: - -```rust -#[repr(C)] -union U { x: (u8, u16) } -``` - -The byte at offset 1 of `U` is a padding byte. - #### Zero-sized fields If a `#[repr(C)]` union contains a field of zero-size, that field does not From b3851798d7c968369d576e70411288fd98462ca1 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Fri, 26 Jul 2019 15:25:10 +0200 Subject: [PATCH 09/17] Fix typo --- reference/src/layout/unions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index 6f404bf8..3847c126 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -55,7 +55,7 @@ assert_eq!(align_of::(), 2); // This introduces trailing padding, raising the union size to 2 assert_eq!(size_of::(), 2); # } -``** +``` > **Note**: Fields are overlapped instead of laid out sequentially, so > unlike structs there is no "between the fields" that could be filled From 6efcf76edaf0a7aa5a56987613facd2f0a9c1666 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sun, 11 Aug 2019 15:13:37 +0200 Subject: [PATCH 10/17] Update reference/src/layout/unions.md Co-Authored-By: Ralf Jung --- reference/src/layout/unions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index 3847c126..d1db7e9a 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -29,7 +29,7 @@ Even if the offsets happen to be all 0, there might still be differences in the function call ABI. If you need to pass unions by-value across an FFI boundary, you have to use `#[repr(C)]`. -### Layout of "repr C" unions +### C-compatible layout ("repr C") The layout of `repr(C)` unions follows the C layout scheme. Per sections [6.5.8.5] and [6.7.2.1.16] of the C11 specification, this means that the offset From 0f03acdee01fe418e89cb6991709fba414a2b3d4 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Sun, 11 Aug 2019 15:33:23 +0200 Subject: [PATCH 11/17] clarify that the tautological assumption that a zero-sized field has zero-size is not that common --- reference/src/layout/unions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index 3847c126..6b98efa1 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -64,7 +64,7 @@ assert_eq!(size_of::(), 2); #### Zero-sized fields If a `#[repr(C)]` union contains a field of zero-size, that field does not -occupy space in the union. For example: +occupy space in Rust unions (as opposed to, e.g., in C++). For example: ```rust # use std::mem::{size_of, align_of}; From 34592df5f8d56e46a9d1b7af16fce257649f43e3 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 14 Aug 2019 11:52:20 +0200 Subject: [PATCH 12/17] Make C++ wording correct --- reference/src/layout/unions.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index 7e1606ea..33d0ae5e 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -64,7 +64,8 @@ assert_eq!(size_of::(), 2); #### Zero-sized fields If a `#[repr(C)]` union contains a field of zero-size, that field does not -occupy space in Rust unions (as opposed to, e.g., in C++). For example: +occupy space in Rust unions, e.g., as opposed to _empty_ types in C++ unions, +which might occupy space depending on how they are used. For example: ```rust # use std::mem::{size_of, align_of}; From e5af2807a37d8f29b5529e0959c0281e4b6026b3 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 14 Aug 2019 12:05:10 +0200 Subject: [PATCH 13/17] Remove duplicate note --- reference/src/layout/unions.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index 33d0ae5e..d5dcbe5a 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -64,8 +64,7 @@ assert_eq!(size_of::(), 2); #### Zero-sized fields If a `#[repr(C)]` union contains a field of zero-size, that field does not -occupy space in Rust unions, e.g., as opposed to _empty_ types in C++ unions, -which might occupy space depending on how they are used. For example: +occupy space in Rust unions. For example: ```rust # use std::mem::{size_of, align_of}; From e88dc3db3dbd5790298864325cd2dc4849b3ace5 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 14 Aug 2019 12:13:36 +0200 Subject: [PATCH 14/17] Simplify zero-sized fields of unions --- reference/src/layout/unions.md | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index d5dcbe5a..2debfe31 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -63,24 +63,12 @@ assert_eq!(size_of::(), 2); #### Zero-sized fields -If a `#[repr(C)]` union contains a field of zero-size, that field does not -occupy space in Rust unions. For example: +`repr(C)` union fields of zero-size are handled in the same way as in struct +fields, matching the behavior of GCC and Clang for unions in C when zero-sized +types are allowed via their language extensions. -```rust -# use std::mem::{size_of, align_of}; -#[repr(C)] -union U { - x: u8, - y: (), -} -# fn main() { -assert_eq!(size_of::(), 1); -# } -``` - -The field does, however, participate in the layout computation of the union, and -can raise its alignment requirement, which in turn can introduce trailing -padding. For example: +That is, these fields occupy zero-size and participate in the layout computation +of the union as usual: ```rust # use std::mem::{size_of, align_of}; @@ -97,10 +85,6 @@ assert_eq!(size_of::(), 2); # } ``` -This handling of zero-sized types is equivalent to the handling of zero-sized -types in struct fields, and matches the behavior of GCC and Clang for unions in -C when zero-sized types are allowed via their language extensions. - **C++ compatibility hazard**: C++ does, in general, give a size of 1 to empty structs. If an empty struct in C++ is used as an union field, a "naive" translation of that code into Rust will not produce a compatible result. From f438c09bef0b543d9c42c12ef010cd451e3e572f Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 15 Aug 2019 06:39:01 +0200 Subject: [PATCH 15/17] Update reference/src/layout/unions.md Co-Authored-By: Ralf Jung --- reference/src/layout/unions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index 2debfe31..1404c6bc 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -80,7 +80,7 @@ union U { # fn main() { // The zero-sized type [u16; 0] raises the alignment requirement to 2 assert_eq!(align_of::(), 2); -// This introduces trailing padding, raising the union size to 2 +// This in turn introduces trailing padding, raising the union size to 2 assert_eq!(size_of::(), 2); # } ``` From a057fa2bd2bbe34596766c77aa4c64885aeb85f4 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 15 Aug 2019 06:44:08 +0200 Subject: [PATCH 16/17] Link to struct chapter --- reference/src/layout/unions.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index 2debfe31..019c0a7f 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -85,6 +85,8 @@ assert_eq!(size_of::(), 2); # } ``` -**C++ compatibility hazard**: C++ does, in general, give a size of 1 to empty -structs. If an empty struct in C++ is used as an union field, a "naive" -translation of that code into Rust will not produce a compatible result. +**C++ compatibility hazard**: C++ does, in general, give a size of 1 to types +with no fields. When such types are used as an union field in C++, a "naive" +translation of that code into Rust will not produce a compatible result. Refer +to the [struct chapter](structs-and-tuples.md#c-compatible-layout-repr-c) for +further details. From 85c89aa385c09d626eec560b68a57752256b23a9 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 15 Aug 2019 11:50:06 +0200 Subject: [PATCH 17/17] mention size-filling padding --- reference/src/layout/unions.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/reference/src/layout/unions.md b/reference/src/layout/unions.md index 1404c6bc..9660b417 100644 --- a/reference/src/layout/unions.md +++ b/reference/src/layout/unions.md @@ -13,7 +13,7 @@ like union U { f1: T1, f2: T2 } ``` -is to determine the offset of the fields. The layout of these fields themselves +is to determine the offset of the fields. The layout of these fields themselves is already entirely determined by their types, and since we intend to allow creating references to fields (`&u.f1`), unions do not have any wiggle-room there. @@ -42,8 +42,11 @@ to obtain a pointer to any field, and vice versa. #### Padding Since all fields are at offset 0, `repr(C)` unions do not have padding before -their fields. They can, however, have trailing padding, to make sure the size is -a multiple of the alignment: +their fields. They can, however, have padding in each union variant *after* the +field, to make all variants have the same size. + +Moreover, the entire union can have trailing padding, to make sure the size is a +multiple of the alignment: ```rust # use std::mem::{size_of, align_of};