Variable auto-init: also auto-init alloca

Summary: alloca isn’t auto-init’d right now because it’s a different path in clang that all the other stuff we support (it’s a builtin, not an expression). Interestingly, alloca doesn’t have a type (as opposed to even VLA) so we can really only initialize it with memset. <rdar://problem/49794007> Subscribers: jkorous, dexonsmith, cfe-commits, rjmccall, glider, kees, kcc, pcc Tags: #clang Differential Revision: https://reviews.llvm.org/D60548 llvm-svn: 358243
llvm · Apr 12, 2019 · ef202c3 · ef202c3
1 parent ef03518
commit ef202c3
Show file tree

Hide file tree

Showing 6 changed files with 177 additions and 84 deletions.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17,6 +17,7 @@
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "ConstantEmitter.h"
+#include "PatternInit.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
@@ -44,6 +45,25 @@ int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
   return std::min(High, std::max(Low, Value));
 }
 
+static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, unsigned AlignmentInBytes) {
+  ConstantInt *Byte;
+  switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
+  case LangOptions::TrivialAutoVarInitKind::Uninitialized:
+    // Nothing to initialize.
+    return;
+  case LangOptions::TrivialAutoVarInitKind::Zero:
+    Byte = CGF.Builder.getInt8(0x00);
+    break;
+  case LangOptions::TrivialAutoVarInitKind::Pattern: {
+    llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
+    Byte = llvm::dyn_cast<llvm::ConstantInt>(
+        initializationPatternFor(CGF.CGM, Int8));
+    break;
+  }
+  }
+  CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
+}
+
 /// getBuiltinLibFunction - Given a builtin id for a function like
 /// "__builtin_fabsf", return a Function* for "fabsf".
 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
@@ -2259,6 +2279,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
             .getQuantity();
     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
     AI->setAlignment(SuitableAlignmentInBytes);
+    initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
     return RValue::get(AI);
   }
 
@@ -2271,6 +2292,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
         CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
     AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
     AI->setAlignment(AlignmentInBytes);
+    initializeAlloca(*this, AI, Size, AlignmentInBytes);
     return RValue::get(AI);
   }
 

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
@@ -19,6 +19,7 @@
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "ConstantEmitter.h"
+#include "PatternInit.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/CharUnits.h"
@@ -984,92 +985,13 @@ static bool shouldSplitConstantStore(CodeGenModule &CGM,
   return false;
 }
 
-static llvm::Constant *patternFor(CodeGenModule &CGM, llvm::Type *Ty) {
-  // The following value is a guaranteed unmappable pointer value and has a
-  // repeated byte-pattern which makes it easier to synthesize. We use it for
-  // pointers as well as integers so that aggregates are likely to be
-  // initialized with this repeated value.
-  constexpr uint64_t LargeValue = 0xAAAAAAAAAAAAAAAAull;
-  // For 32-bit platforms it's a bit trickier because, across systems, only the
-  // zero page can reasonably be expected to be unmapped, and even then we need
-  // a very low address. We use a smaller value, and that value sadly doesn't
-  // have a repeated byte-pattern. We don't use it for integers.
-  constexpr uint32_t SmallValue = 0x000000AA;
-  // Floating-point values are initialized as NaNs because they propagate. Using
-  // a repeated byte pattern means that it will be easier to initialize
-  // all-floating-point aggregates and arrays with memset. Further, aggregates
-  // which mix integral and a few floats might also initialize with memset
-  // followed by a handful of stores for the floats. Using fairly unique NaNs
-  // also means they'll be easier to distinguish in a crash.
-  constexpr bool NegativeNaN = true;
-  constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull;
-  if (Ty->isIntOrIntVectorTy()) {
-    unsigned BitWidth = cast<llvm::IntegerType>(
-                            Ty->isVectorTy() ? Ty->getVectorElementType() : Ty)
-                            ->getBitWidth();
-    if (BitWidth <= 64)
-      return llvm::ConstantInt::get(Ty, LargeValue);
-    return llvm::ConstantInt::get(
-        Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, LargeValue)));
-  }
-  if (Ty->isPtrOrPtrVectorTy()) {
-    auto *PtrTy = cast<llvm::PointerType>(
-        Ty->isVectorTy() ? Ty->getVectorElementType() : Ty);
-    unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth(
-        PtrTy->getAddressSpace());
-    llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth);
-    uint64_t IntValue;
-    switch (PtrWidth) {
-    default:
-      llvm_unreachable("pattern initialization of unsupported pointer width");
-    case 64:
-      IntValue = LargeValue;
-      break;
-    case 32:
-      IntValue = SmallValue;
-      break;
-    }
-    auto *Int = llvm::ConstantInt::get(IntTy, IntValue);
-    return llvm::ConstantExpr::getIntToPtr(Int, PtrTy);
-  }
-  if (Ty->isFPOrFPVectorTy()) {
-    unsigned BitWidth = llvm::APFloat::semanticsSizeInBits(
-        (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty)
-            ->getFltSemantics());
-    llvm::APInt Payload(64, NaNPayload);
-    if (BitWidth >= 64)
-      Payload = llvm::APInt::getSplat(BitWidth, Payload);
-    return llvm::ConstantFP::getQNaN(Ty, NegativeNaN, &Payload);
-  }
-  if (Ty->isArrayTy()) {
-    // Note: this doesn't touch tail padding (at the end of an object, before
-    // the next array object). It is instead handled by replaceUndef.
-    auto *ArrTy = cast<llvm::ArrayType>(Ty);
-    llvm::SmallVector<llvm::Constant *, 8> Element(
-        ArrTy->getNumElements(), patternFor(CGM, ArrTy->getElementType()));
-    return llvm::ConstantArray::get(ArrTy, Element);
-  }
-
-  // Note: this doesn't touch struct padding. It will initialize as much union
-  // padding as is required for the largest type in the union. Padding is
-  // instead handled by replaceUndef. Stores to structs with volatile members
-  // don't have a volatile qualifier when initialized according to C++. This is
-  // fine because stack-based volatiles don't really have volatile semantics
-  // anyways, and the initialization shouldn't be observable.
-  auto *StructTy = cast<llvm::StructType>(Ty);
-  llvm::SmallVector<llvm::Constant *, 8> Struct(StructTy->getNumElements());
-  for (unsigned El = 0; El != Struct.size(); ++El)
-    Struct[El] = patternFor(CGM, StructTy->getElementType(El));
-  return llvm::ConstantStruct::get(StructTy, Struct);
-}
-
 enum class IsPattern { No, Yes };
 
 /// Generate a constant filled with either a pattern or zeroes.
 static llvm::Constant *patternOrZeroFor(CodeGenModule &CGM, IsPattern isPattern,
                                         llvm::Type *Ty) {
   if (isPattern == IsPattern::Yes)
-    return patternFor(CGM, Ty);
+    return initializationPatternFor(CGM, Ty);
   else
     return llvm::Constant::getNullValue(Ty);
 }
@@ -1294,8 +1216,8 @@ static void emitStoresForPatternInit(CodeGenModule &CGM, const VarDecl &D,
                                      Address Loc, bool isVolatile,
                                      CGBuilderTy &Builder) {
   llvm::Type *ElTy = Loc.getElementType();
-  llvm::Constant *constant =
-      constWithPadding(CGM, IsPattern::Yes, patternFor(CGM, ElTy));
+  llvm::Constant *constant = constWithPadding(
+      CGM, IsPattern::Yes, initializationPatternFor(CGM, ElTy));
   assert(!isa<llvm::UndefValue>(constant));
   emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant);
 }
@@ -1818,8 +1740,8 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
 
     case LangOptions::TrivialAutoVarInitKind::Pattern: {
       llvm::Type *ElTy = Loc.getElementType();
-      llvm::Constant *Constant =
-          constWithPadding(CGM, IsPattern::Yes, patternFor(CGM, ElTy));
+      llvm::Constant *Constant = constWithPadding(
+          CGM, IsPattern::Yes, initializationPatternFor(CGM, ElTy));
       CharUnits ConstantAlign = getContext().getTypeAlignInChars(VlaSize.Type);
       llvm::BasicBlock *SetupBB = createBasicBlock("vla-setup.loop");
       llvm::BasicBlock *LoopBB = createBasicBlock("vla-init.loop");

diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
@@ -88,6 +88,7 @@ add_clang_library(clangCodeGen
   MicrosoftCXXABI.cpp
   ModuleBuilder.cpp
   ObjectFilePCHContainerOperations.cpp
+  PatternInit.cpp
   SanitizerMetadata.cpp
   SwiftCallingConv.cpp
   TargetInfo.cpp

diff --git a/clang/lib/CodeGen/PatternInit.cpp b/clang/lib/CodeGen/PatternInit.cpp
@@ -0,0 +1,93 @@
+//===--- PatternInit.cpp - Pattern Initialization -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PatternInit.h"
+#include "CodeGenModule.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Type.h"
+
+llvm::Constant *clang::CodeGen::initializationPatternFor(CodeGenModule &CGM,
+                                                         llvm::Type *Ty) {
+  // The following value is a guaranteed unmappable pointer value and has a
+  // repeated byte-pattern which makes it easier to synthesize. We use it for
+  // pointers as well as integers so that aggregates are likely to be
+  // initialized with this repeated value.
+  constexpr uint64_t LargeValue = 0xAAAAAAAAAAAAAAAAull;
+  // For 32-bit platforms it's a bit trickier because, across systems, only the
+  // zero page can reasonably be expected to be unmapped, and even then we need
+  // a very low address. We use a smaller value, and that value sadly doesn't
+  // have a repeated byte-pattern. We don't use it for integers.
+  constexpr uint32_t SmallValue = 0x000000AA;
+  // Floating-point values are initialized as NaNs because they propagate. Using
+  // a repeated byte pattern means that it will be easier to initialize
+  // all-floating-point aggregates and arrays with memset. Further, aggregates
+  // which mix integral and a few floats might also initialize with memset
+  // followed by a handful of stores for the floats. Using fairly unique NaNs
+  // also means they'll be easier to distinguish in a crash.
+  constexpr bool NegativeNaN = true;
+  constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull;
+  if (Ty->isIntOrIntVectorTy()) {
+    unsigned BitWidth = cast<llvm::IntegerType>(
+                            Ty->isVectorTy() ? Ty->getVectorElementType() : Ty)
+                            ->getBitWidth();
+    if (BitWidth <= 64)
+      return llvm::ConstantInt::get(Ty, LargeValue);
+    return llvm::ConstantInt::get(
+        Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, LargeValue)));
+  }
+  if (Ty->isPtrOrPtrVectorTy()) {
+    auto *PtrTy = cast<llvm::PointerType>(
+        Ty->isVectorTy() ? Ty->getVectorElementType() : Ty);
+    unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth(
+        PtrTy->getAddressSpace());
+    llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth);
+    uint64_t IntValue;
+    switch (PtrWidth) {
+    default:
+      llvm_unreachable("pattern initialization of unsupported pointer width");
+    case 64:
+      IntValue = LargeValue;
+      break;
+    case 32:
+      IntValue = SmallValue;
+      break;
+    }
+    auto *Int = llvm::ConstantInt::get(IntTy, IntValue);
+    return llvm::ConstantExpr::getIntToPtr(Int, PtrTy);
+  }
+  if (Ty->isFPOrFPVectorTy()) {
+    unsigned BitWidth = llvm::APFloat::semanticsSizeInBits(
+        (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty)
+            ->getFltSemantics());
+    llvm::APInt Payload(64, NaNPayload);
+    if (BitWidth >= 64)
+      Payload = llvm::APInt::getSplat(BitWidth, Payload);
+    return llvm::ConstantFP::getQNaN(Ty, NegativeNaN, &Payload);
+  }
+  if (Ty->isArrayTy()) {
+    // Note: this doesn't touch tail padding (at the end of an object, before
+    // the next array object). It is instead handled by replaceUndef.
+    auto *ArrTy = cast<llvm::ArrayType>(Ty);
+    llvm::SmallVector<llvm::Constant *, 8> Element(
+        ArrTy->getNumElements(),
+        initializationPatternFor(CGM, ArrTy->getElementType()));
+    return llvm::ConstantArray::get(ArrTy, Element);
+  }
+
+  // Note: this doesn't touch struct padding. It will initialize as much union
+  // padding as is required for the largest type in the union. Padding is
+  // instead handled by replaceUndef. Stores to structs with volatile members
+  // don't have a volatile qualifier when initialized according to C++. This is
+  // fine because stack-based volatiles don't really have volatile semantics
+  // anyways, and the initialization shouldn't be observable.
+  auto *StructTy = cast<llvm::StructType>(Ty);
+  llvm::SmallVector<llvm::Constant *, 8> Struct(StructTy->getNumElements());
+  for (unsigned El = 0; El != Struct.size(); ++El)
+    Struct[El] = initializationPatternFor(CGM, StructTy->getElementType(El));
+  return llvm::ConstantStruct::get(StructTy, Struct);
+}
diff --git a/clang/lib/CodeGen/PatternInit.h b/clang/lib/CodeGen/PatternInit.h
@@ -0,0 +1,27 @@
+//===- PatternInit - Pattern initialization ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_PATTERNINIT_H
+#define LLVM_CLANG_LIB_CODEGEN_PATTERNINIT_H
+
+namespace llvm {
+class Constant;
+class Type;
+} // namespace llvm
+
+namespace clang {
+namespace CodeGen {
+
+class CodeGenModule;
+
+llvm::Constant *initializationPatternFor(CodeGenModule &, llvm::Type *);
+
+} // end namespace CodeGen
+} // end namespace clang
+
+#endif
diff --git a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
@@ -173,6 +173,34 @@ void test_vla(int size) {
   used(ptr);
 }
 
+// UNINIT-LABEL:  test_alloca(
+// ZERO-LABEL:    test_alloca(
+// ZERO:          %[[SIZE:[a-z0-9]+]] = sext i32 %{{.*}} to i64
+// ZERO-NEXT:     %[[ALLOCA:[a-z0-9]+]] = alloca i8, i64 %[[SIZE]], align [[ALIGN:[0-9]+]]
+// ZERO-NEXT:     call void @llvm.memset{{.*}}(i8* align [[ALIGN]] %[[ALLOCA]], i8 0, i64 %[[SIZE]], i1 false)
+// PATTERN-LABEL: test_alloca(
+// PATTERN:       %[[SIZE:[a-z0-9]+]] = sext i32 %{{.*}} to i64
+// PATTERN-NEXT:  %[[ALLOCA:[a-z0-9]+]] = alloca i8, i64 %[[SIZE]], align [[ALIGN:[0-9]+]]
+// PATTERN-NEXT:  call void @llvm.memset{{.*}}(i8* align [[ALIGN]] %[[ALLOCA]], i8 -86, i64 %[[SIZE]], i1 false)
+void test_alloca(int size) {
+  void *ptr = __builtin_alloca(size);
+  used(ptr);
+}
+
+// UNINIT-LABEL:  test_alloca_with_align(
+// ZERO-LABEL:    test_alloca_with_align(
+// ZERO:          %[[SIZE:[a-z0-9]+]] = sext i32 %{{.*}} to i64
+// ZERO-NEXT:     %[[ALLOCA:[a-z0-9]+]] = alloca i8, i64 %[[SIZE]], align 128
+// ZERO-NEXT:     call void @llvm.memset{{.*}}(i8* align 128 %[[ALLOCA]], i8 0, i64 %[[SIZE]], i1 false)
+// PATTERN-LABEL: test_alloca_with_align(
+// PATTERN:       %[[SIZE:[a-z0-9]+]] = sext i32 %{{.*}} to i64
+// PATTERN-NEXT:  %[[ALLOCA:[a-z0-9]+]] = alloca i8, i64 %[[SIZE]], align 128
+// PATTERN-NEXT:  call void @llvm.memset{{.*}}(i8* align 128 %[[ALLOCA]], i8 -86, i64 %[[SIZE]], i1 false)
+void test_alloca_with_align(int size) {
+  void *ptr = __builtin_alloca_with_align(size, 1024);
+  used(ptr);
+}
+
 // UNINIT-LABEL:  test_struct_vla(
 // ZERO-LABEL:    test_struct_vla(
 // ZERO:  %[[SIZE:[0-9]+]] = mul nuw i64 %{{.*}}, 16