[clang] Lower _BitInt(129+) to a different type in LLVM IR #91364

Fznamznon · 2024-05-07T17:43:15Z

Currently for i128:128 targets correct implementation is possible either for __int128 or for _BitInt(129+) with lowering to iN, but not both. Since we have now correct implementation of __int128 in place after a21abc7, this patch attempts to fix codegen issues by lowering _BitInt(129+) types to an array of i8 for "memory", similarly how it is happening for bools now.

Fixes #85139
Fixes #83419

Currently for i128:128 targets either __int128 or a correct _BitInt(129+) implementation possible with lowering to iN, but not both. Since we have now correct implementation of __int128, this patch attempts to fix codegen issues by lowering _BitInt(129+) types to an array of i8 for "memory", similarly how it is happening for bools now. Fixes llvm#85139 Fixes llvm#83419

llvmbot · 2024-05-07T17:43:47Z

@llvm/pr-subscribers-hlsl
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-codegen

Author: Mariya Podchishchaeva (Fznamznon)

Changes

Currently for i128:128 targets correct implementation is possible either for __int128 or for _BitInt(129+) with lowering to iN, but not both. Since we have now correct implementation of __int128 in place after a21abc7, this patch attempts to fix codegen issues by lowering _BitInt(129+) types to an array of i8 for "memory", similarly how it is happening for bools now.

Fixes #85139
Fixes #83419

Full diff: https://github.com/llvm/llvm-project/pull/91364.diff

6 Files Affected:

(modified) clang/lib/CodeGen/CGExpr.cpp (+8)
(modified) clang/lib/CodeGen/CGExprConstant.cpp (+12)
(modified) clang/lib/CodeGen/CGExprScalar.cpp (+7)
(modified) clang/lib/CodeGen/CodeGenTypes.cpp (+6)
(modified) clang/test/CodeGen/ext-int-cc.c (+1-1)
(modified) clang/test/CodeGen/ext-int.c (+93-4)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index d96c7bb1e568..7e631e469a88 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1989,6 +1989,14 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
     return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal();
   }
 
+  if (const auto *BIT = Ty->getAs<BitIntType>()) {
+    if (BIT->getNumBits() > 128) {
+      // Long _BitInt has array of bytes as in-memory type.
+      llvm::Type *NewTy = ConvertType(Ty);
+      Addr = Addr.withElementType(NewTy);
+    }
+  }
+
   llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile);
   if (isNontemporal) {
     llvm::MDNode *Node = llvm::MDNode::get(
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 94962091116a..98ab1e23d128 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1774,6 +1774,18 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM,
     return Res;
   }
 
+  if (const auto *BIT = destType->getAs<BitIntType>()) {
+    if (BIT->getNumBits() > 128) {
+      // Long _BitInt has array of bytes as in-memory type.
+      ConstantAggregateBuilder Builder(CGM);
+      llvm::Type *DesiredTy = CGM.getTypes().ConvertTypeForMem(destType);
+      auto *CI = cast<llvm::ConstantInt>(C);
+      llvm::APInt Value = CI->getValue();
+      Builder.addBits(Value, /*OffsetInBits=*/0, /*AllowOverwrite=*/false);
+      return Builder.build(DesiredTy, /*AllowOversized*/ false);
+    }
+  }
+
   return C;
 }
 
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index d84531959b50..717d47d20dea 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -5348,6 +5348,13 @@ Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
     return llvm::UndefValue::get(ArgTy);
   }
 
+  if (const auto *BIT = Ty->getAs<BitIntType>()) {
+    if (BIT->getNumBits() > 128) {
+      // Long _BitInt has array of bytes as in-memory type.
+      ArgPtr = ArgPtr.withElementType(ArgTy);
+    }
+  }
+
   // FIXME Volatility.
   llvm::Value *Val = Builder.CreateLoad(ArgPtr);
 
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index e8d75eda029e..55c618677ddb 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -114,6 +114,12 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
     return llvm::IntegerType::get(getLLVMContext(),
                                   (unsigned)Context.getTypeSize(T));
 
+  if (const auto *BIT = T->getAs<BitIntType>()) {
+    if (BIT->getNumBits() > 128)
+      R = llvm::ArrayType::get(CGM.Int8Ty,
+                               (unsigned)Context.getTypeSize(T) / 8);
+  }
+
   // Else, don't map it.
   return R;
 }
diff --git a/clang/test/CodeGen/ext-int-cc.c b/clang/test/CodeGen/ext-int-cc.c
index 001e866d34b4..83f20dcb0667 100644
--- a/clang/test/CodeGen/ext-int-cc.c
+++ b/clang/test/CodeGen/ext-int-cc.c
@@ -131,7 +131,7 @@ void ParamPassing3(_BitInt(15) a, _BitInt(31) b) {}
 // are negated. This will give an error when a target does support larger
 // _BitInt widths to alert us to enable the test.
 void ParamPassing4(_BitInt(129) a) {}
-// LIN64: define{{.*}} void @ParamPassing4(ptr byval(i129) align 8 %{{.+}})
+// LIN64: define{{.*}} void @ParamPassing4(ptr byval([24 x i8]) align 8 %{{.+}})
 // WIN64: define dso_local void @ParamPassing4(ptr %{{.+}})
 // LIN32: define{{.*}} void @ParamPassing4(ptr %{{.+}})
 // WIN32: define dso_local void @ParamPassing4(ptr %{{.+}})
diff --git a/clang/test/CodeGen/ext-int.c b/clang/test/CodeGen/ext-int.c
index 4cb399d108f2..a6a632bd985d 100644
--- a/clang/test/CodeGen/ext-int.c
+++ b/clang/test/CodeGen/ext-int.c
@@ -1,12 +1,19 @@
-// RUN: %clang_cc1 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK64
-// RUN: %clang_cc1 -triple x86_64-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK64
-// RUN: %clang_cc1 -triple i386-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LIN32
-// RUN: %clang_cc1 -triple i386-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN32
+// RUN: %clang_cc1 -std=c23 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK64
+// RUN: %clang_cc1 -std=c23 -triple x86_64-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK64
+// RUN: %clang_cc1 -std=c23 -triple i386-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LIN32
+// RUN: %clang_cc1 -std=c23 -triple i386-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN32
+
+// CHECK64: %struct.S1 = type { i17, [4 x i8], [24 x i8] }
+// CHECK64: %struct.S2 = type { [40 x i8], i32, [4 x i8] }
 
 //GH62207
 unsigned _BitInt(1) GlobSize1 = 0;
 // CHECK: @GlobSize1 = {{.*}}global i1 false
 
+// CHECK64: @__const.foo.A = private unnamed_addr constant { i17, [4 x i8], <{ i8, [23 x i8] }> } { i17 1, [4 x i8] undef, <{ i8, [23 x i8] }> <{ i8 -86, [23 x i8] zeroinitializer }> }, align 8
+// CHECK64: @BigGlob = {{.*}}global <{ i8, i8, [38 x i8] }> <{ i8 -68, i8 2, [38 x i8] zeroinitializer }>, align 8
+// CHECK64: @f.p = internal global <{ i8, i8, [22 x i8] }> <{ i8 16, i8 39, [22 x i8] zeroinitializer }>, align 8
+
 void GenericTest(_BitInt(3) a, unsigned _BitInt(3) b, _BitInt(4) c) {
   // CHECK: define {{.*}}void @GenericTest
   int which = _Generic(a, _BitInt(3): 1, unsigned _BitInt(3) : 2, _BitInt(4) : 3);
@@ -62,3 +69,85 @@ void Size1ExtIntParam(unsigned _BitInt(1) A) {
   // CHECK: store i1 %[[PARAM_LOAD]], ptr %[[IDX]]
   B[2] = A;
 }
+
+#if __BITINT_MAXWIDTH__ > 128
+struct S1 {
+  _BitInt(17) A;
+  _BitInt(129) B;
+};
+
+int foo(int a) {
+  // CHECK64: %A1 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 0
+  // CHECK64: store i17 1, ptr %A1, align 8
+  // CHECK64: %B2 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 2
+  // CHECK64: %0 = load i32, ptr %a.addr, align 4
+  // CHECK64: %conv = sext i32 %0 to i129
+  // CHECK64: store i129 %conv, ptr %B2, align 8
+  // CHECK64: %B3 = getelementptr inbounds %struct.S1, ptr %A, i32 0, i32 2
+  // CHECK64: %1 = load i129, ptr %B3, align 8
+  // CHECK64: %conv4 = trunc i129 %1 to i32
+  // CHECK64: %B5 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 2
+  // CHECK64: %2 = load i129, ptr %B5, align 8
+  struct S1 A = {1, 170};
+  struct S1 B = {1, a};
+  return (int)A.B + (int)B.B;
+}
+
+struct S2 {
+  _BitInt(257) A;
+  int B;
+};
+
+_BitInt(257) bar() {
+  // CHECK64: define {{.*}}void @bar(ptr {{.*}} sret([40 x i8]) align 8 %[[RET:.+]])
+  // CHECK64: %A = alloca %struct.S2, align 8
+  // CHECK64: %0 = getelementptr inbounds { <{ i8, [39 x i8] }>, i32, [4 x i8] }, ptr %A, i32 0, i32 0
+  // CHECK64: %1 = getelementptr inbounds <{ i8, [39 x i8] }>, ptr %0, i32 0, i32 0
+  // CHECK64: store i8 1, ptr %1, align 8
+  // CHECK64: %2 = getelementptr inbounds { <{ i8, [39 x i8] }>, i32, [4 x i8] }, ptr %A, i32 0, i32 1
+  // CHECK64: store i32 10000, ptr %2, align 8
+  // CHECK64: %A1 = getelementptr inbounds %struct.S2, ptr %A, i32 0, i32 0
+  // CHECK64: %3 = load i257, ptr %A1, align 8
+  // CHECK64: store i257 %3, ptr %[[RET]], align 8
+  struct S2 A = {1, 10000};
+  return A.A;
+}
+
+void TakesVarargs(int i, ...) {
+  // CHECK64: define{{.*}} void @TakesVarargs(i32
+__builtin_va_list args;
+__builtin_va_start(args, i);
+
+_BitInt(160) A = __builtin_va_arg(args, _BitInt(160));
+  // CHECK64: %[[ARG:.+]] = load i160
+  // CHECK64: store i160 %[[ARG]], ptr %A, align 8
+}
+
+_BitInt(129) *f1(_BitInt(129) *p) {
+  // CHECK64: getelementptr inbounds [24 x i8], {{.*}} i64 1
+  return p + 1;
+}
+
+char *f2(char *p) {
+  // CHECK64: getelementptr inbounds i8, {{.*}} i64 24
+  return p + sizeof(_BitInt(129));
+}
+
+auto BigGlob = (_BitInt(257))700;
+// CHECK64: define {{.*}}void @foobar(ptr {{.*}} sret([40 x i8]) align 8 %[[RET1:.+]])
+_BitInt(257) foobar() {
+  // CHECK64: %A = alloca [40 x i8], align 8
+  // CHECK64: %0 = load i257, ptr @BigGlob, align 8
+  // CHECK64: %add = add nsw i257 %0, 1
+  // CHECK64: store i257 %add, ptr %A, align 8
+  // CHECK64: %1 = load i257, ptr %A, align 8
+  // CHECK64: store i257 %1, ptr %[[RET1]], align 8
+  _BitInt(257) A = BigGlob + 1;
+  return A;
+}
+
+void f() {
+  static _BitInt(130) p = {10000};
+}
+
+#endif

erichkeane

This is unfortunate, and will likely result in the FPGAs needing to generate extra bits here, so this is somewhat harmful in that regard.

It seems to me this is a case where we're trying to work -around an llvm bug? Should we just be fixing that instead?

efriedma-quic

Maybe add a helper somewhere to check "is this type a bitint wider than 128 bits"?

efriedma-quic · 2024-05-07T17:49:03Z

clang/lib/CodeGen/CGExprConstant.cpp

+      // Long _BitInt has array of bytes as in-memory type.
+      ConstantAggregateBuilder Builder(CGM);
+      llvm::Type *DesiredTy = CGM.getTypes().ConvertTypeForMem(destType);
+      auto *CI = cast<llvm::ConstantInt>(C);


I'm not sure this cast is guaranteed to succeed? At least in some cases, we emit constant expressions involving a ptrtoint. Maybe at the widths in question, that can't happen, but this deserves a comment explaining what's going on.

I've added a comment. I'm not able to get a ptrtoint in a constant expression involving a big _BitInt.

efriedma-quic · 2024-05-07T17:53:37Z

clang/lib/CodeGen/CGExprScalar.cpp

@@ -5348,6 +5348,13 @@ Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
    return llvm::UndefValue::get(ArgTy);
  }

+  if (const auto *BIT = Ty->getAs<BitIntType>()) {
+    if (BIT->getNumBits() > 128) {


This seems a little fragile; specifically for _BitInt, we assume the pointer is wrong and the return type is right, but in other cases, we assume the pointer is right and we need to do some sort of conversion.

Can we fix the interface of EmitVAArg so we can just EmitLoadOfScalar() here?

Called EmitLoadOfScalar here.

efriedma-quic · 2024-05-07T18:08:34Z

It seems to me this is a case where we're trying to work -around an llvm bug? Should we just be fixing that instead?

You mean, revert https://reviews.llvm.org/D86310 ? Making any changes in LLVM here is painful; I'd rather not revisit that. CC @hvdijk @rnk

erichkeane · 2024-05-07T18:11:38Z

It seems to me this is a case where we're trying to work -around an llvm bug? Should we just be fixing that instead?

You mean, revert https://reviews.llvm.org/D86310 ? Making any changes in LLVM here is painful; I'd rather not revisit that. CC @hvdijk @rnk

I didn't, no, but I hadn't seen all that conversation.

Aaron has explained a bit more of the context here, and I'm finding myself pretty confused/out of the loop. As this is effectively all codegen, I suspect you, plus your CCs are the best ones to review this. I don't see a problem except for the FPGA folks to this, though between:

1- FPGA folks rarely/ever use large types like this if they can help it.
2- The FPGA group being spun off from Intel, meaning the original stakeholders are all gone
and 3- Me no longer being at Intel

I don't think I have strong feelings here.

efriedma-quic · 2024-05-07T18:16:51Z

I don't think FPGA folks will run into any practical issue with this; this only impacts the in-memory types, and backends shouldn't really be using in-memory types for anything anyways.

hvdijk · 2024-05-07T18:20:44Z

clang/lib/CodeGen/CGExpr.cpp

@@ -1989,6 +1989,14 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
    return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal();
  }

+  if (const auto *BIT = Ty->getAs<BitIntType>()) {
+    if (BIT->getNumBits() > 128) {


For a number of bits >64, <=128, LLVM's iN type will have identical representation to Clang _BitInt(N) but different alignment. I think this is fine, I think nothing needs their alignment to match Clang's, but could you double-check to make sure you agree?

These types remain unchanged.

hvdijk · 2024-05-07T18:28:53Z

Thanks for doing this, it's unfortunate that Clang is in a rather broken state with these types right now and it will be good to see improvement. I think the approach you're taking here is the only approach that will work.

rnk · 2024-05-07T19:58:40Z

I played with the idea of using LLVM packed structs (<{ i129 }>) to represent something like this, but they don't work the way I expected them to do: https://godbolt.org/z/M6hMYYhax

LLVM DataLayout's idea of sizeof(i129) is still rounded up from 17 bytes to 32 bytes.

Using byte arrays for the in-memory type should work, so it's probably the best path forward.

rjmccall · 2024-05-07T21:36:44Z

Hmm. I think this is actually pretty different from the bool pattern. Suppose we're talking about _BitInt(N). Let BYTES := sizeof(_BitInt(N)), and let BITS := BYTES * 8.

The problem being presented here is this:

For at least some values of N, we cannot use LLVM's iN for the type of struct elements, array elements, allocas, global variables, and so on, because the LLVM layout for that type does not match the high-level layout of _BitInt(N). The only available type that does match the memory layout appears to be [BYTES x i8].

However, it doesn't follow from the need to use [BYTES x i8] for memory layout that we have to use [BYTES x i8] for loads and stores. IIUC, loads and stores of both iN and iBITS are in fact required to only touch BYTES bytes and so should be valid. It is near-certain that loads and stores of either of those types would both (1) produce far better code from the backends and (2) be far more optimizable by IR passes than loads and stores of [BYTES x i8].

bool does run into (1) because of targets like PPC where sizeof(bool) == 4. However, we still use i8 as the in-memory type for bool on other targets. Partly, this is to discourage portability bugs where people write IR-gen code that doesn't handle the PPC pattern. But IIRC the main reason is actually to solve this other problem:

LLVM doesn't guarantee any particular extension behavior for integer types that aren't a multiple of 8, but ABIs do generally require objects of type bool to have all bits valid.

I expect that problem (2) also applies to _BitInt.

The upshot is that code like _BitInt(129) x = v; needs to be emitted something like this:

  %x = alloca [12 x i8]      # assuming for the sake of argument that sizeof(_BitInt(129)) == 12
  %storedv = sext i129 %v to i192  # or zext depending on signedness
  store i192 %storedv, ptr %x

Edit: I originally defined BYTES as ceil(N/8), but it clearly has to be sizeof(_BitInt(N)), and I expect the ABI expects extension out to that size as well.

rjmccall · 2024-05-07T21:59:27Z

If you want to do things that way, you will need to (1) generalize CodeGenTypes with a new API that will return this load/store type when applicable and (2) look at all the places we call ConvertTypeForMem, EmitToMemory, and EmitFromMemory to make sure they do the right things.

You definitely should not be hard-coding 128 in a bunch of places. The load/store type should always be iBITS, and the memory type should either be iBITS or [BYTES x i8] depending on whether the former has the right layout characteristics in the LLVM data layout.

efriedma-quic · 2024-05-07T22:39:26Z

You're suggesting we should fork ConvertTypeForMem into two functions? So there's actually three types: the "register" type, the "load/store" type, and the "in-memory" type. I guess that makes sense from a theoretical perspective, but... as a practical matter, I'm not sure how many places need to call the proposed "ConvertTypeForLoadStore".

In EmitLoadOfScalar(), instead of checking for BitInt, you just unconditionally do Addr = Addr.withElementType(ConvertTypeForLoadStore(Ty));. Logical cleanup, I guess. In EmitStoreOfScalar, you don't really need the interface because you can assume the result of EmitToMemory() has the load/store type. And then... what else calls it?

rjmccall · 2024-05-07T23:19:57Z

My experience is that compiler writers are really good at hacking in special cases to make their test cases work and really bad at recognizing that their case isn't as special as they think. There are three types already called out for special treatment in ConvertTypeForMem, of which two are handled in EmitFromMemory and only one is handled in EmitToMemory. I want to set up a pattern that the next person with this sort of problem can follow. It doesn't have to be exactly what I suggested above, but it should be a real pattern.

Fznamznon · 2024-05-08T17:54:55Z

Thank you everyone for the feedback. I'm working on applying.

momchil-velikov · 2024-05-29T13:09:53Z

clang/lib/CodeGen/CGExpr.cpp

+  if (const auto *BIT = Ty->getAs<BitIntType>()) {
+    if (BIT->getNumBits() > 128) {
+      // Long _BitInt has array of bytes as in-memory type.
+      llvm::Type *NewTy = ConvertType(Ty);


Shouldn't we call calling ConvertTypeForMem here?

The idea was to load not array, but iN, so ConvertType here was intentional. However I'm updating this patch soon, it will be using special load/store type whose idea is described by #91364 (comment) .

Oh, I see. It looks close to what we are trying to do with #93495, which is:

create in-memory representations according to the target ABI

improve efficiency of loads/stores, e.g. load/store of i18 in LLVM must touch just 3 bytes, so a compiler would emit one 16-bit load and one 8-bit load, but if i18 comes from _BitInt(18) then a single 32-bit load would work better.

This patch was mostly intended to fix codegen issues when it comes to big _BitInt types (>128 for 64bit targets), however I'm adding new idea of load/store type, so that seems close.

github-actions · 2024-05-29T13:29:23Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff 9a282724a29899e84adc91bdeaf639010408a80d 87bed2e55c4fabb15a1a11dd6fdea420e1cae8b8 -- clang/lib/CodeGen/CGExpr.cpp clang/lib/CodeGen/CGExprConstant.cpp clang/lib/CodeGen/CGExprScalar.cpp clang/lib/CodeGen/CGStmt.cpp clang/lib/CodeGen/CodeGenFunction.cpp clang/lib/CodeGen/CodeGenFunction.h clang/lib/CodeGen/CodeGenTypes.cpp clang/lib/CodeGen/CodeGenTypes.h clang/test/CodeGen/arm-abi-vector.c clang/test/CodeGen/ext-int-cc.c clang/test/CodeGen/ext-int.c clang/test/CodeGenCXX/ext-int.cpp clang/test/Frontend/fixed_point_comparisons.c clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp clang/test/OpenMP/teams_distribute_simd_codegen.cpp

View the diff from clang-format here.

diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index b0bb7734f3..c7e430d144 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -609,9 +609,9 @@ bool ConstStructBuilder::AppendBytes(CharUnits FieldOffsetInChars,
   return Builder.add(InitCst, StartOffset + FieldOffsetInChars, AllowOverwrite);
 }
 
-bool ConstStructBuilder::AppendBitField(
-    const FieldDecl *Field, uint64_t FieldOffset, llvm::Constant *C,
-    bool AllowOverwrite) {
+bool ConstStructBuilder::AppendBitField(const FieldDecl *Field,
+                                        uint64_t FieldOffset, llvm::Constant *C,
+                                        bool AllowOverwrite) {
 
   llvm::ConstantInt *CI = nullptr;
   if (!isa<llvm::ConstantInt>(C)) {

Fznamznon · 2024-05-29T13:32:29Z

clang/test/CodeGen/arm-abi-vector.c

@@ -194,7 +194,7 @@ double varargs_vec_3s(int fixed, ...) {
 // APCS-GNU: [[VAR:%.*]] = alloca <3 x i16>, align 8
 // APCS-GNU: [[AP:%.*]] = load ptr,
 // APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP]], i32 8
-// APCS-GNU: [[VEC:%.*]] = load <3 x i16>, ptr [[AP]], align 4
+// APCS-GNU: [[VEC:%.*]] = load <4 x i16>, ptr [[AP]], align 4


Switch to EmitLoadOfScalar in VisitVAArgExpr caused this.

Fznamznon · 2024-05-29T13:35:14Z

clang/lib/CodeGen/CGExprScalar.cpp

@@ -5348,6 +5348,13 @@ Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
    return llvm::UndefValue::get(ArgTy);
  }

+  if (const auto *BIT = Ty->getAs<BitIntType>()) {
+    if (BIT->getNumBits() > 128) {


Called EmitLoadOfScalar here.

Fznamznon · 2024-05-29T14:14:37Z

Well, I hope I got the idea right, I updated PR so now it is introducing convertTypeForLoadStore API, returning iBITS (Let BYTES := sizeof(_BitInt(N)), and let BITS := BYTES * 8).
I've also tried to inspect as many places where ConvertTypeForMem, emitToMemory, emitFromMemory` are used and actually found a couple of places that original patch broke. However I might as well miss something since the code is mostly new to me.
There is no more check for 128 bits length. I discovered that _BitInt(64+) is also broken for 32 bit targets in the same way (https://godbolt.org/z/vMYaYTzhe ), so now I'm just comparing layouts to determine whether a _BitIntN needs another in-memory type.

efriedma-quic · 2024-05-29T17:44:27Z

clang/lib/CodeGen/CGExprScalar.cpp

-
-  return Val;
+  return CGF.EmitLoadOfScalar(ArgPtr, Ty.isVolatileQualified(), Ty,
+                              VE->getExprLoc());


For reference, this is basically deleting the ArgTy != Val->getType() branch without any replacement. But the Mips case where that was relevant got fixed a different way in 7f416cc , so I think it's dead code anyway.

Maybe worth splitting this into a separate pull request, though, in case it causes an issue for some other target.

The new code pattern is just casting the address to the load/store type of the expression type, right? That seems like it'd be wrong on big-endian targets if the variadic argument was promoted. EmitVAArg, in its wisdom, is giving us the address of something in the va_list and then not telling what type it is other than as the element type of the returned address.

Are _BitInts ever promoted as variadic arguments? I guess that would be up to the target, right? It really feels like EmitVAArg needs to be giving us an actual type for the l-value it's returning, or (better yet?) just returning an RValue instead of forcing the caller to handle demotion.

I agree it would be better to do that in a separate patch.

That seems like it'd be wrong on big-endian targets if the variadic argument was promoted.

I don't think we actually depend on this behavior anymore; we either adjust the pointer to point to the correct bytes, or store the value in a temporary.

Should we just make EmitVAArg return an RValue? Sema appears to unconditionally consider it a pr-value, so while CodeGen has an l-value path for it, that's probably dead.

Should we just make EmitVAArg return an RValue?

You mean, using the same signature as EmitAnyExpr? That should work, sure.

while CodeGen has an l-value path for it, that's probably dead.

I think we end up using EmitVAArgExprLValue in C, for something like va_arg(x, struct X).x. But that's not a real lvalue; it's just a temporary.

Do you mean I should apply RValue::get() in CodeGenFunction::EmitVAArg? And all the callers of EmitVAAArg therefore should do getScalarVal() or similar to get a llvm::value?
Or is it still better to do in another PR?
If I revert the current change with EmitLoadOfScalar there should be a manipulation for _BitInt like it was in first version of the patch and received this comment - #91364 (comment) .

And all the callers of EmitVAAArg therefore should do getScalarVal() or similar to get a llvm::value

Yes, that's the idea. Should significantly simplify the code for some targets.

This should be a dependent PR; we merge that first, then this PR doesn't need to touch the varargs code.

I posted #94635 .

I somehow had to load the value anyway, since inner EmitVAArg that seems to be target-dependent returns an Address as well.

efriedma-quic · 2024-05-29T18:26:25Z

clang/lib/CodeGen/CGStmt.cpp

+    case TEK_Scalar: {
+      llvm::Value *Ret = EmitScalarExpr(RV);
+      // EmitStoreOfScalar could be used here, but it extends bool which for
+      // some targets is returned as i1 zeroext.


For the sake of consistency, I'd prefer to use EmitStoreOfScalar here, even if that means we have to change more code elsewhere (presumably to use EmitLoadOfScalar).

Well, we don't normally have to expose the address of ReturnValue with scalar results, so we could avoid unnecessary promotions/truncations here in common cases by just making ReturnValue be an alloca of the scalar type instead of the for-memory type. However, that would add complexity for the one case where ReturnValue is exposed, which is when the ABI requires the function to return indirectly; in that case, it would be important that we promote the return value before initializing the return value slot. So there's a trade-off here: we can unconditionally treat ReturnValue as having the for-memory type (at the cost of emitting some extra promotions/truncations), or we can track whether we were passed it vs. just allocated internally to this function (at the cost of compiler complexity around most uses of ReturnValue, which is a relatively small amount of code).

clang/lib/CodeGen/CGExprScalar.cpp

efriedma-quic · 2024-05-29T18:33:15Z

clang/lib/CodeGen/CGExprConstant.cpp

@@ -761,6 +761,10 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
      if (Field->hasAttr<NoUniqueAddressAttr>())
        AllowOverwrite = true;
    } else {
+      llvm::Type *LoadType = CGM.getTypes().convertTypeForLoadStore(


I have no idea what this is supposed to do; why don't we want to use the "memory" type here?

In ConstantEmitter::emitForMemory we now split constant into individual bytes if the target type is a long _BitInt. However after this, an integer constant expected by AppendBitField, so this just folds individual bytes back to an integer.

Oh, I see... please move the duplicated code into the implementation of AppendBitField, and add a comment explaining it.

That would change AppendBitField to have a llvm::Value parameter instead of llvm::ConstantInt, is that still ok?

I think you can use llvm::Constant? But that's fine, sure.

efriedma-quic · 2024-05-29T18:37:24Z

clang/lib/CodeGen/CodeGenTypes.h

@@ -128,6 +128,15 @@ class CodeGenTypes {
  /// memory representation is usually i8 or i32, depending on the target.
  llvm::Type *ConvertTypeForMem(QualType T, bool ForBitField = false);

+  /// Check that size and abi alignment of given LLVM type matches size and
+  /// alignment of given AST type.
+  bool LLVMTypeLayoutMatchesAST(QualType ASTTy, llvm::Type *LLVMTy);


I'm not sure we should be exposing LLVMTypeLayoutMatchesAST as a public API? If any code cares, it's probably not calling ToMemory/FromMemory like it should.

ConstantEmitter::emitForMemory is calling LLVMTypeLayoutMatchesAST to determine if a big _BitInt constant needs to be split into individual bytes.

In that case, please rename this to something that more accurately corresponds to that usage.

efriedma-quic · 2024-05-29T18:40:56Z

clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp

@@ -2567,16 +2567,16 @@ int main() {
 // CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK3-NEXT:    [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8


Please regenerate the OpenMP tests in a separate pull request, to reduce the number of irrelevant changes.

They failed due to the change that generalized handling of bool and _BitInt in EmitToMemory/EmitFromMemory. I will need to revert that change then.

It looked like some of the changes weren't related... if they're all in fact related, it's fine, I guess.

clang/lib/CodeGen/CGExpr.cpp

rjmccall · 2024-05-29T23:24:59Z

clang/lib/CodeGen/CGExpr.cpp

-    assert(Value->getType()->isIntegerTy(getContext().getTypeSize(Ty)) &&
-           "wrong value rep of bool");
+  if (hasBooleanRepresentation(Ty) ||
+      (Ty->isBitIntType() && Value->getType()->isIntegerTy())) {


Sorry, what's the situation in which a scalar _BitInt value doesn't have integer IR type?

_BitInt that now has an array of bytes as memory type, is returned from a function via sret(memory type) parameter. Additionally in EmitFunctionEpilog there is a load and a store (demo of these load an store https://godbolt.org/z/K37j4f1jn) of return value emitted that has memory type.

Same for bool, though its memory type is not an array. I suppose there can be more cases, see comment that used to be on lines 2017-2018

// This should really always be an i1, but sometimes it's already // an i8, and it's awkward to track those cases down.

clang/lib/CodeGen/CGExpr.cpp

rjmccall · 2024-05-29T23:27:21Z

clang/lib/CodeGen/CGExpr.cpp

+  if (hasBooleanRepresentation(Ty) || Ty->isBitIntType()) {
+    llvm::Type *ResTy = ConvertType(Ty);
+    bool Signed = Ty->isSignedIntegerOrEnumerationType();
+    return Builder.CreateIntCast(Value, ResTy, Signed, "loadedv");


This is always a trunc, right? Or do we need use CreateIntCast because it might be a trivial truncation?

It is always a trunc. Changed it to CreateTrunc.

rjmccall · 2024-05-29T23:32:31Z

clang/lib/CodeGen/CGExprConstant.cpp

+      auto *CI = cast<llvm::ConstantInt>(C);
+      llvm::APInt Value = CI->getValue();
+      Builder.addBits(Value, /*OffsetInBits=*/0, /*AllowOverwrite=*/false);
+      return Builder.build(DesiredTy, /*AllowOversized*/ false);


We need to actually extend out to the full width of DesiredTy, right? Won't this otherwise produce undef padding?

I'm not seeing undefs in resulting constants. zeroinitializer for all unused bytes is emitted instead. The test CodeGen/ext-int.c illustrates this behavior.

rjmccall · 2024-05-29T23:47:55Z

clang/lib/CodeGen/CGExprScalar.cpp

-
-  return Val;
+  return CGF.EmitLoadOfScalar(ArgPtr, Ty.isVolatileQualified(), Ty,
+                              VE->getExprLoc());


The new code pattern is just casting the address to the load/store type of the expression type, right? That seems like it'd be wrong on big-endian targets if the variadic argument was promoted. EmitVAArg, in its wisdom, is giving us the address of something in the va_list and then not telling what type it is other than as the element type of the returned address.

Are _BitInts ever promoted as variadic arguments? I guess that would be up to the target, right? It really feels like EmitVAArg needs to be giving us an actual type for the l-value it's returning, or (better yet?) just returning an RValue instead of forcing the caller to handle demotion.

I agree it would be better to do that in a separate patch.

rjmccall · 2024-05-30T00:06:39Z

clang/lib/CodeGen/CGStmt.cpp

+    case TEK_Scalar: {
+      llvm::Value *Ret = EmitScalarExpr(RV);
+      // EmitStoreOfScalar could be used here, but it extends bool which for
+      // some targets is returned as i1 zeroext.


Well, we don't normally have to expose the address of ReturnValue with scalar results, so we could avoid unnecessary promotions/truncations here in common cases by just making ReturnValue be an alloca of the scalar type instead of the for-memory type. However, that would add complexity for the one case where ReturnValue is exposed, which is when the ABI requires the function to return indirectly; in that case, it would be important that we promote the return value before initializing the return value slot. So there's a trade-off here: we can unconditionally treat ReturnValue as having the for-memory type (at the cost of emitting some extra promotions/truncations), or we can track whether we were passed it vs. just allocated internally to this function (at the cost of compiler complexity around most uses of ReturnValue, which is a relatively small amount of code).

hassnaaHamdi · 2024-06-05T11:36:56Z

clang/lib/CodeGen/CodeGenTypes.cpp

+    llvm::Type *R = ConvertType(T);
+    if (!LLVMTypeLayoutMatchesAST(T, R))
+      return llvm::Type::getIntNTy(
+          getLLVMContext(), Context.getTypeSizeInChars(T).getQuantity() * 8);


return llvm::Type::getIntNTy( getLLVMContext(), Context.getTypeSizeInChars(T).getQuantity() * 8);
Could you plz explain the logic behind using this implementation ? do you support all types > 128 and also types < 128 ? or it's specific to the types > 128 only ?

We intend to support all types. In general, _BitInt(N) is lowered to iN type in LLVM IR. However it is not possible for certain sizes and some targets now due to different alignment defined for iN and _BitInt(N). This now happens for _BitInt(>=129) for x86_64 targets. LLVMTypeLayoutMatchesAST checks for this mismatch. If there is mismatch, say NBytes = sizeof(_BitInt(N)), we use [i8 x NBytes] as in-memory type. It is better to load and store value using iNBytes type instead of an array type since it is better for optimization and guarantees that all bits of the loaded/stores value is valid. For more info, please refer to #91364 (comment) as well.

I don't understand why the choice of type is conditional. No matter of the "layout" matches or not, the stores have to produce valid in-memory representation (according to the target ABI), which in the general case means writing all the bits of the in-memory representation.

The choice is conditional to keep changes less invasive.

I wonder making it unconditional that requires to have different in-memory type for all _BitInt types?
But in case when the "layout" matches we could use iNBytes for both memory type and load-store type.
Though that makes me wonder what will be the whole point of small _BitInt then...

cf. #91364 (comment)

I'm fairly certain using load/store type of iBITS is the correct thing to do, unconditionally.
Not quite sure about the choice between iBITS and [BYTES x i8], if we're not talking about a load/stores how the array type could possibly be less efficient, so we don't default unconditionally to it?

We could literally use [N x i8] as the allocation type of every single type and save ourselves a lot of grief. It's nicer to generate more semantic types when we can, though; it produces IR that's easier to both read and test.

Fznamznon requested review from rjmccall, AaronBallman, erichkeane and efriedma-quic May 7, 2024 17:43

llvmbot added clang Clang issues not falling into any other category clang:codegen labels May 7, 2024

Fznamznon requested a review from hvdijk May 7, 2024 17:46

erichkeane reviewed May 7, 2024

View reviewed changes

efriedma-quic reviewed May 7, 2024

View reviewed changes

hvdijk reviewed May 7, 2024

View reviewed changes

efriedma-quic mentioned this pull request May 28, 2024

[Clang][AArch64][ARM]: Fix Inefficient loads/stores of _BitInt(N) #93495

Open

momchil-velikov reviewed May 29, 2024

View reviewed changes

Fznamznon added 2 commits May 29, 2024 06:10

Add load and store type, update more places

fc6f90c

Merge branch 'main' into long-bitint-align

e7fa9ad

llvmbot added HLSL HLSL Language Support clang:openmp OpenMP related changes to Clang labels May 29, 2024

Fznamznon commented May 29, 2024

View reviewed changes

Cleanup

15f680f

Fznamznon requested a review from efriedma-quic May 29, 2024 14:14

efriedma-quic reviewed May 29, 2024

View reviewed changes

rjmccall reviewed May 30, 2024

View reviewed changes

Address some comments

0ee57a3

hassnaaHamdi reviewed Jun 5, 2024

View reviewed changes

Move constant folding into AppendBitField

87bed2e

[clang] Lower _BitInt(129+) to a different type in LLVM IR #91364

Are you sure you want to change the base?

[clang] Lower _BitInt(129+) to a different type in LLVM IR #91364

Conversation

Fznamznon commented May 7, 2024

llvmbot commented May 7, 2024 • edited

erichkeane left a comment

Choose a reason for hiding this comment

efriedma-quic left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Fznamznon May 29, 2024 • edited

Choose a reason for hiding this comment

efriedma-quic commented May 7, 2024

erichkeane commented May 7, 2024

efriedma-quic commented May 7, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

hvdijk commented May 7, 2024

rnk commented May 7, 2024

rjmccall commented May 7, 2024 • edited

rjmccall commented May 7, 2024 • edited

efriedma-quic commented May 7, 2024

rjmccall commented May 7, 2024 • edited

Fznamznon commented May 8, 2024

Choose a reason for hiding this comment

Fznamznon May 29, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

github-actions bot commented May 29, 2024 • edited

Choose a reason for hiding this comment

Fznamznon May 29, 2024 • edited

Choose a reason for hiding this comment

Fznamznon commented May 29, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Fznamznon May 31, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Fznamznon Jun 5, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Fznamznon Jun 5, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

llvmbot commented May 7, 2024 •

edited

Fznamznon May 29, 2024 •

edited

rjmccall commented May 7, 2024 •

edited

rjmccall commented May 7, 2024 •

edited

rjmccall commented May 7, 2024 •

edited

Fznamznon May 29, 2024 •

edited

github-actions bot commented May 29, 2024 •

edited

Fznamznon May 29, 2024 •

edited

Fznamznon commented May 29, 2024 •

edited

Fznamznon May 31, 2024 •

edited

Fznamznon Jun 5, 2024 •

edited

Fznamznon Jun 5, 2024 •

edited