diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp index e3fbbb720d069..26a54f631fcfc 100644 --- a/clang/lib/Basic/Targets/BPF.cpp +++ b/clang/lib/Basic/Targets/BPF.cpp @@ -35,6 +35,9 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__BPF_CPU_VERSION__", "0"); return; } + + Builder.defineMacro("__BPF_FEATURE_ARENA_CAST"); + if (CPU.empty() || CPU == "generic" || CPU == "v1") { Builder.defineMacro("__BPF_CPU_VERSION__", "1"); return; diff --git a/clang/test/Preprocessor/bpf-predefined-macros.c b/clang/test/Preprocessor/bpf-predefined-macros.c index ff4d00ac3bcfc..fea24d1ea0ff7 100644 --- a/clang/test/Preprocessor/bpf-predefined-macros.c +++ b/clang/test/Preprocessor/bpf-predefined-macros.c @@ -61,6 +61,9 @@ int r; #ifdef __BPF_FEATURE_ST int s; #endif +#ifdef __BPF_FEATURE_ARENA_CAST +int t; +#endif // CHECK: int b; // CHECK: int c; @@ -90,6 +93,11 @@ int s; // CPU_V4: int r; // CPU_V4: int s; +// CPU_V1: int t; +// CPU_V2: int t; +// CPU_V3: int t; +// CPU_V4: int t; + // CPU_GENERIC: int g; // CPU_PROBE: int f; diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp index 0d1eef60c3b55..3145bc3d19f5d 100644 --- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp +++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp @@ -271,6 +271,7 @@ struct BPFOperand : public MCParsedAsmOperand { .Case("xchg32_32", true) .Case("cmpxchg_64", true) .Case("cmpxchg32_32", true) + .Case("addr_space_cast", true) .Default(false); } }; diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h index 5c77d183e1ef3..bbdbdbbde5322 100644 --- a/llvm/lib/Target/BPF/BPF.h +++ b/llvm/lib/Target/BPF/BPF.h @@ -66,6 +66,14 @@ class BPFIRPeepholePass : public PassInfoMixin { static bool isRequired() { return true; } }; +class BPFASpaceCastSimplifyPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } +}; + class BPFAdjustOptPass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); diff --git a/llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp b/llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp new file mode 100644 index 0000000000000..f87b299bbba65 --- /dev/null +++ b/llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp @@ -0,0 +1,92 @@ +//===-- BPFASpaceCastSimplifyPass.cpp - BPF addrspacecast simplications --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BPF.h" +#include + +#define DEBUG_TYPE "bpf-aspace-simplify" + +using namespace llvm; + +namespace { + +struct CastGEPCast { + AddrSpaceCastInst *OuterCast; + + // Match chain of instructions: + // %inner = addrspacecast N->M + // %gep = getelementptr %inner, ... + // %outer = addrspacecast M->N %gep + // Where I is %outer. + static std::optional match(Value *I) { + auto *OuterCast = dyn_cast(I); + if (!OuterCast) + return std::nullopt; + auto *GEP = dyn_cast(OuterCast->getPointerOperand()); + if (!GEP) + return std::nullopt; + auto *InnerCast = dyn_cast(GEP->getPointerOperand()); + if (!InnerCast) + return std::nullopt; + if (InnerCast->getSrcAddressSpace() != OuterCast->getDestAddressSpace()) + return std::nullopt; + if (InnerCast->getDestAddressSpace() != OuterCast->getSrcAddressSpace()) + return std::nullopt; + return CastGEPCast{OuterCast}; + } + + static PointerType *changeAddressSpace(PointerType *Ty, unsigned AS) { + return Ty->get(Ty->getContext(), AS); + } + + // Assuming match(this->OuterCast) is true, convert: + // (addrspacecast M->N (getelementptr (addrspacecast N->M ptr) ...)) + // To: + // (getelementptr ptr ...) + GetElementPtrInst *rewrite() { + auto *GEP = cast(OuterCast->getPointerOperand()); + auto *InnerCast = cast(GEP->getPointerOperand()); + unsigned AS = OuterCast->getDestAddressSpace(); + auto *NewGEP = cast(GEP->clone()); + NewGEP->setName(GEP->getName()); + NewGEP->insertAfter(OuterCast); + NewGEP->setOperand(0, InnerCast->getPointerOperand()); + auto *GEPTy = cast(GEP->getType()); + NewGEP->mutateType(changeAddressSpace(GEPTy, AS)); + OuterCast->replaceAllUsesWith(NewGEP); + OuterCast->eraseFromParent(); + if (GEP->use_empty()) + GEP->eraseFromParent(); + if (InnerCast->use_empty()) + InnerCast->eraseFromParent(); + return NewGEP; + } +}; + +} // anonymous namespace + +PreservedAnalyses BPFASpaceCastSimplifyPass::run(Function &F, + FunctionAnalysisManager &AM) { + SmallVector WorkList; + bool Changed = false; + for (BasicBlock &BB : F) { + for (Instruction &I : BB) + if (auto It = CastGEPCast::match(&I)) + WorkList.push_back(It.value()); + Changed |= !WorkList.empty(); + + while (!WorkList.empty()) { + CastGEPCast InsnChain = WorkList.pop_back_val(); + GetElementPtrInst *NewGEP = InsnChain.rewrite(); + for (User *U : NewGEP->users()) + if (auto It = CastGEPCast::match(U)) + WorkList.push_back(It.value()); + } + } + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp index 81effc9b1db46..edd59aaa6d01d 100644 --- a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp +++ b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp @@ -14,6 +14,8 @@ // optimizations are done and those builtins can be removed. // - remove llvm.bpf.getelementptr.and.load builtins. // - remove llvm.bpf.getelementptr.and.store builtins. +// - for loads and stores with base addresses from non-zero address space +// cast base address to zero address space (support for BPF arenas). // //===----------------------------------------------------------------------===// @@ -55,6 +57,7 @@ class BPFCheckAndAdjustIR final : public ModulePass { bool removeCompareBuiltin(Module &M); bool sinkMinMax(Module &M); bool removeGEPBuiltins(Module &M); + bool insertASpaceCasts(Module &M); }; } // End anonymous namespace @@ -416,11 +419,124 @@ bool BPFCheckAndAdjustIR::removeGEPBuiltins(Module &M) { return Changed; } +// Wrap ToWrap with cast to address space zero: +// - if ToWrap is a getelementptr, +// wrap it's base pointer instead and return a copy; +// - if ToWrap is Instruction, insert address space cast +// immediately after ToWrap; +// - if ToWrap is not an Instruction (function parameter +// or a global value), insert address space cast at the +// beginning of the Function F; +// - use Cache to avoid inserting too many casts; +static Value *aspaceWrapValue(DenseMap &Cache, Function *F, + Value *ToWrap) { + auto It = Cache.find(ToWrap); + if (It != Cache.end()) + return It->getSecond(); + + if (auto *GEP = dyn_cast(ToWrap)) { + Value *Ptr = GEP->getPointerOperand(); + Value *WrappedPtr = aspaceWrapValue(Cache, F, Ptr); + auto *GEPTy = cast(GEP->getType()); + auto *NewGEP = GEP->clone(); + NewGEP->insertAfter(GEP); + NewGEP->mutateType(GEPTy->getPointerTo(0)); + NewGEP->setOperand(GEP->getPointerOperandIndex(), WrappedPtr); + NewGEP->setName(GEP->getName()); + Cache[ToWrap] = NewGEP; + return NewGEP; + } + + IRBuilder IB(F->getContext()); + if (Instruction *InsnPtr = dyn_cast(ToWrap)) + IB.SetInsertPoint(*InsnPtr->getInsertionPointAfterDef()); + else + IB.SetInsertPoint(F->getEntryBlock().getFirstInsertionPt()); + auto *PtrTy = cast(ToWrap->getType()); + auto *ASZeroPtrTy = PtrTy->getPointerTo(0); + auto *ACast = IB.CreateAddrSpaceCast(ToWrap, ASZeroPtrTy, ToWrap->getName()); + Cache[ToWrap] = ACast; + return ACast; +} + +// Wrap a pointer operand OpNum of instruction I +// with cast to address space zero +static void aspaceWrapOperand(DenseMap &Cache, Instruction *I, + unsigned OpNum) { + Value *OldOp = I->getOperand(OpNum); + if (OldOp->getType()->getPointerAddressSpace() == 0) + return; + + Value *NewOp = aspaceWrapValue(Cache, I->getFunction(), OldOp); + I->setOperand(OpNum, NewOp); + // Check if there are any remaining users of old GEP, + // delete those w/o users + for (;;) { + auto *OldGEP = dyn_cast(OldOp); + if (!OldGEP) + break; + if (!OldGEP->use_empty()) + break; + OldOp = OldGEP->getPointerOperand(); + OldGEP->eraseFromParent(); + } +} + +// Support for BPF arenas: +// - for each function in the module M, update pointer operand of +// each memory access instruction (load/store/cmpxchg/atomicrmw) +// by casting it from non-zero address space to zero address space, e.g: +// +// (load (ptr addrspace (N) %p) ...) +// -> (load (addrspacecast ptr addrspace (N) %p to ptr)) +// +// - assign section with name .arena.N for globals defined in +// non-zero address space N +bool BPFCheckAndAdjustIR::insertASpaceCasts(Module &M) { + bool Changed = false; + for (Function &F : M) { + DenseMap CastsCache; + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + unsigned PtrOpNum; + + if (auto *LD = dyn_cast(&I)) + PtrOpNum = LD->getPointerOperandIndex(); + else if (auto *ST = dyn_cast(&I)) + PtrOpNum = ST->getPointerOperandIndex(); + else if (auto *CmpXchg = dyn_cast(&I)) + PtrOpNum = CmpXchg->getPointerOperandIndex(); + else if (auto *RMW = dyn_cast(&I)) + PtrOpNum = RMW->getPointerOperandIndex(); + else + continue; + + aspaceWrapOperand(CastsCache, &I, PtrOpNum); + } + } + Changed |= !CastsCache.empty(); + } + // Merge all globals within same address space into single + // .arena. section + for (GlobalVariable &G : M.globals()) { + if (G.getAddressSpace() == 0 || G.hasSection()) + continue; + SmallString<16> SecName; + raw_svector_ostream OS(SecName); + OS << ".arena." << G.getAddressSpace(); + G.setSection(SecName); + // Prevent having separate section for constants + G.setConstant(false); + } + return Changed; +} + bool BPFCheckAndAdjustIR::adjustIR(Module &M) { bool Changed = removePassThroughBuiltin(M); Changed = removeCompareBuiltin(M) || Changed; Changed = sinkMinMax(M) || Changed; Changed = removeGEPBuiltins(M) || Changed; + Changed = insertASpaceCasts(M) || Changed; return Changed; } diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index 82d3470231066..7198e9499bc32 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -420,6 +420,35 @@ let Predicates = [BPFHasMovsx] in { } } +def ADDR_SPACE_CAST + : ALU_RR { + bits<64> dst_as; + bits<64> src_as; + + let Inst{47-32} = 1; + let Inst{31-16} = dst_as{15-0}; + let Inst{15-0} = src_as{15-0}; +} + +def SrcAddrSpace : SDNodeXFormgetTargetConstant( + cast(N)->getSrcAddressSpace(), + SDLoc(N), MVT::i64); +}]>; + +def DstAddrSpace : SDNodeXFormgetTargetConstant( + cast(N)->getDestAddressSpace(), + SDLoc(N), MVT::i64); +}]>; + +def : Pat<(addrspacecast:$this GPR:$src), + (ADDR_SPACE_CAST $src, (DstAddrSpace $this), (SrcAddrSpace $this))>; + def FI_ri : TYPE_LD_ST 42) +; a = magic1(); +; else +; a = magic2(); +; a[5] = 7; +; } +; +; Using the following command: +; +; clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c + +define void @test(i64 noundef %i) { +; CHECK: if.end: +; CHECK-NEXT: [[A_0:%.*]] = phi ptr addrspace(1) +; CHECK-NEXT: [[A_01:%.*]] = addrspacecast ptr addrspace(1) [[A_0]] to ptr +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A_01]], i64 5 +; CHECK-NEXT: store i32 7, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp sgt i64 %i, 42 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = tail call ptr addrspace(1) @magic1() + br label %if.end + +if.else: ; preds = %entry + %call1 = tail call ptr addrspace(1) @magic2() + br label %if.end + +if.end: ; preds = %if.else, %if.then + %a.0 = phi ptr addrspace(1) [ %call, %if.then ], [ %call1, %if.else ] + %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %a.0, i64 5 + store i32 7, ptr addrspace(1) %arrayidx, align 4 + ret void +} + +declare ptr addrspace(1) @magic1(...) +declare ptr addrspace(1) @magic2(...) diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-1.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-1.ll new file mode 100644 index 0000000000000..32d67284d1c1b --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-simplify-1.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s + +; Check that bpf-aspace-simplify pass removes unnecessary (for BPF) +; address space casts for cast M->N -> GEP -> cast N->M chain. + +define dso_local ptr addrspace(1) @test (ptr addrspace(1) %p) { +; CHECK-LABEL: define dso_local ptr addrspace(1) @test( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 8 +; CHECK-NEXT: ret ptr addrspace(1) [[B1]] +; + entry: + %a = addrspacecast ptr addrspace(1) %p to ptr + %b = getelementptr inbounds i8, ptr %a, i64 8 + %c = addrspacecast ptr %b to ptr addrspace(1) + ret ptr addrspace(1) %c +} diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-2.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-2.ll new file mode 100644 index 0000000000000..a2965554a9733 --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-simplify-2.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s + +; Check that bpf-aspace-simplify pass does not change +; chain 'cast M->N -> GEP -> cast N->K'. + +define dso_local ptr addrspace(2) @test (ptr addrspace(1) %p) { +; CHECK-LABEL: define dso_local ptr addrspace(2) @test( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[C:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(2) +; CHECK-NEXT: ret ptr addrspace(2) [[C]] +; + entry: + %a = addrspacecast ptr addrspace(1) %p to ptr + %b = getelementptr inbounds i8, ptr %a, i64 8 + %c = addrspacecast ptr %b to ptr addrspace(2) + ret ptr addrspace(2) %c +} diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-3.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-3.ll new file mode 100644 index 0000000000000..a7736c462b44b --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-simplify-3.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s + +; Check that when bpf-aspace-simplify pass modifies chain +; 'cast M->N -> GEP -> cast N->M' it does not remove GEP, +; when that GEP is used by some other instruction. + +define dso_local ptr addrspace(1) @test (ptr addrspace(1) %p) { +; CHECK-LABEL: define dso_local ptr addrspace(1) @test( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 8 +; CHECK-NEXT: call void @sink(ptr [[B]]) +; CHECK-NEXT: ret ptr addrspace(1) [[B1]] +; + entry: + %a = addrspacecast ptr addrspace(1) %p to ptr + %b = getelementptr inbounds i8, ptr %a, i64 8 + %c = addrspacecast ptr %b to ptr addrspace(1) + call void @sink(ptr %b) + ret ptr addrspace(1) %c +} + +declare dso_local void @sink(ptr) diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-4.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-4.ll new file mode 100644 index 0000000000000..b2c384bbb6abd --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-simplify-4.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s + +; Check that bpf-aspace-simplify pass simplifies chain +; 'cast K->M -> cast M->N -> GEP -> cast N->M -> cast M->K'. + +define dso_local ptr addrspace(2) @test (ptr addrspace(2) %p) { +; CHECK-LABEL: define dso_local ptr addrspace(2) @test( +; CHECK-SAME: ptr addrspace(2) [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C12:%.*]] = getelementptr inbounds i8, ptr addrspace(2) [[P]], i64 8 +; CHECK-NEXT: ret ptr addrspace(2) [[C12]] +; + entry: + %a = addrspacecast ptr addrspace(2) %p to ptr addrspace(1) + %b = addrspacecast ptr addrspace(1) %a to ptr + %c = getelementptr inbounds i8, ptr %b, i64 8 + %d = addrspacecast ptr %c to ptr addrspace(1) + %e = addrspacecast ptr addrspace (1) %d to ptr addrspace(2) + ret ptr addrspace(2) %e +} diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-5.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-5.ll new file mode 100644 index 0000000000000..b62d25384d958 --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-simplify-5.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s + +; Check that bpf-aspace-simplify pass removes unnecessary (for BPF) +; address space casts for cast M->N -> GEP -> cast N->M chain, +; where chain is split between several BBs. + +define dso_local ptr addrspace(1) @test (ptr addrspace(1) %p) { +; CHECK-LABEL: define dso_local ptr addrspace(1) @test( +; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 8 +; CHECK-NEXT: ret ptr addrspace(1) [[B1]] +; +entry: + %a = addrspacecast ptr addrspace(1) %p to ptr + %b = getelementptr inbounds i8, ptr %a, i64 8 + br label %exit + +exit: + %c = addrspacecast ptr %b to ptr addrspace(1) + ret ptr addrspace(1) %c +} diff --git a/llvm/test/CodeGen/BPF/assembler-disassembler.s b/llvm/test/CodeGen/BPF/assembler-disassembler.s index 2bc7421c2471c..991d6edc683a3 100644 --- a/llvm/test/CodeGen/BPF/assembler-disassembler.s +++ b/llvm/test/CodeGen/BPF/assembler-disassembler.s @@ -289,3 +289,10 @@ r0 = *(u32*)skb[42] r0 = *(u8*)skb[r1] r0 = *(u16*)skb[r1] r0 = *(u32*)skb[r1] + +// CHECK: bf 10 01 00 01 00 00 00 r0 = addr_space_cast(r1, 0x0, 0x1) +// CHECK: bf 21 01 00 00 00 01 00 r1 = addr_space_cast(r2, 0x1, 0x0) +// CHECK: bf 43 01 00 2a 00 07 00 r3 = addr_space_cast(r4, 0x7, 0x2a) +r0 = addr_space_cast(r1, 0, 1) +r1 = addr_space_cast(r2, 1, 0) +r3 = addr_space_cast(r4, 7, 42)