Skip to content

Commit

Permalink
[BPF] support for BPF_ST instruction in codegen
Browse files Browse the repository at this point in the history
Generate store immediate instruction when CPUv4 is enabled.
For example:

    $ cat test.c
    struct foo {
      unsigned char  b;
      unsigned short h;
      unsigned int   w;
      unsigned long  d;
    };
    void bar(volatile struct foo *p) {
      p->b = 1;
      p->h = 2;
      p->w = 3;
      p->d = 4;
    }

    $ clang -O2 --target=bpf -mcpu=v4 test.c -c -o - | llvm-objdump -d -
    ...
    0000000000000000 <bar>:
           0:	72 01 00 00 01 00 00 00	*(u8 *)(r1 + 0x0) = 0x1
           1:	6a 01 02 00 02 00 00 00	*(u16 *)(r1 + 0x2) = 0x2
           2:	62 01 04 00 03 00 00 00	*(u32 *)(r1 + 0x4) = 0x3
           3:	7a 01 08 00 04 00 00 00	*(u64 *)(r1 + 0x8) = 0x4
           4:	95 00 00 00 00 00 00 00	exit

Take special care to:
- apply `BPFMISimplifyPatchable::checkADDrr` rewrite for BPF_ST
- validate immediate value when BPF_ST write is 64-bit:
  BPF interprets `(BPF_ST | BPF_MEM | BPF_DW)` writes as writes with
  sign extension. Thus it is fine to generate such write when
  immediate is -1, but it is incorrect to generate such write when
  immediate is +0xffff_ffff.

Differential Revision: https://reviews.llvm.org/D140804
  • Loading branch information
eddyz87 committed Aug 10, 2023
1 parent 2b8542c commit 92e28e3
Show file tree
Hide file tree
Showing 6 changed files with 355 additions and 19 deletions.
61 changes: 57 additions & 4 deletions llvm/lib/Target/BPF/BPFInstrInfo.td
Expand Up @@ -59,6 +59,7 @@ def BPFHasBswap : Predicate<"Subtarget->hasBswap()">;
def BPFHasSdivSmod : Predicate<"Subtarget->hasSdivSmod()">;
def BPFNoMovsx : Predicate<"!Subtarget->hasMovsx()">;
def BPFNoBswap : Predicate<"!Subtarget->hasBswap()">;
def BPFHasStoreImm : Predicate<"Subtarget->hasStoreImm()">;

def brtarget : Operand<OtherVT> {
let PrintMethod = "printBrTargetOperand";
Expand All @@ -69,10 +70,18 @@ def u64imm : Operand<i64> {
let PrintMethod = "printImm64Operand";
}

def gpr_or_imm : Operand<i64>;

def i64immSExt32 : PatLeaf<(i64 imm),
[{return isInt<32>(N->getSExtValue()); }]>;
def i32immSExt32 : PatLeaf<(i32 imm),
[{return isInt<32>(N->getSExtValue()); }]>;
def i64immZExt32 : PatLeaf<(i64 imm),
[{return isUInt<32>(N->getZExtValue()); }]>;

def imm_to_i64 : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i64);
}]>;

// Addressing modes.
def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [], []>;
Expand Down Expand Up @@ -447,7 +456,7 @@ class STORE<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
}

class STOREi64<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode>
: STORE<Opc, OpcodeStr, [(OpNode i64:$src, ADDRri:$addr)]>;
: STORE<Opc, OpcodeStr, [(OpNode GPR:$src, ADDRri:$addr)]>;

let Predicates = [BPFNoALU32] in {
def STW : STOREi64<BPF_W, "u32", truncstorei32>;
Expand All @@ -456,6 +465,50 @@ let Predicates = [BPFNoALU32] in {
}
def STD : STOREi64<BPF_DW, "u64", store>;

class STORE_imm<BPFWidthModifer SizeOp,
string OpcodeStr, dag Pattern>
: TYPE_LD_ST<BPF_MEM.Value, SizeOp.Value,
(outs),
(ins i64imm:$imm, MEMri:$addr),
"*("#OpcodeStr#" *)($addr) = $imm",
[Pattern]> {
bits<20> addr;
bits<32> imm;

let Inst{51-48} = addr{19-16}; // base reg
let Inst{47-32} = addr{15-0}; // offset
let Inst{31-0} = imm;
let BPFClass = BPF_ST;
}

let Predicates = [BPFHasStoreImm] in {
// Opcode (BPF_ST | BPF_MEM | BPF_DW) implies sign extension for
// value stored to memory:
// - it is fine to generate such write when immediate is -1
// - it is incorrect to generate such write when immediate is
// +0xffff_ffff.
//
// In the latter case two instructions would be generated instead of
// one BPF_ST:
// rA = 0xffffffff ll ; LD_imm64
// *(u64 *)(rB + 0) = rA ; STX
//
// For BPF_{B,H,W} the size of value stored matches size of the immediate.
def STD_imm : STORE_imm<BPF_DW, "u64", (store (i64 i64immSExt32:$imm), ADDRri:$addr)>;
def STW_imm : STORE_imm<BPF_W, "u32", (truncstorei32 (i64 i64immZExt32:$imm), ADDRri:$addr)>;
def STH_imm : STORE_imm<BPF_H, "u16", (truncstorei16 (i64 i64immZExt32:$imm), ADDRri:$addr)>;
def STB_imm : STORE_imm<BPF_B, "u8", (truncstorei8 (i64 i64immZExt32:$imm), ADDRri:$addr)>;
}

let Predicates = [BPFHasALU32, BPFHasStoreImm] in {
def : Pat<(store (i32 imm:$src), ADDRri:$dst),
(STW_imm (imm_to_i64 $src), ADDRri:$dst)>;
def : Pat<(truncstorei16 (i32 imm:$src), ADDRri:$dst),
(STH_imm (imm_to_i64 imm:$src), ADDRri:$dst)>;
def : Pat<(truncstorei8 (i32 imm:$src), ADDRri:$dst),
(STB_imm (imm_to_i64 imm:$src), ADDRri:$dst)>;
}

// LOAD instructions
class LOAD<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
: TYPE_LD_ST<ModOp.Value, SizeOp.Value,
Expand All @@ -478,12 +531,12 @@ class LOADi64<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, Pa
let isCodeGenOnly = 1 in {
def CORE_MEM : TYPE_LD_ST<BPF_MEM.Value, BPF_W.Value,
(outs GPR:$dst),
(ins u64imm:$opcode, GPR:$src, u64imm:$offset),
(ins u64imm:$opcode, gpr_or_imm:$src, u64imm:$offset),
"$dst = core_mem($opcode, $src, $offset)",
[]>;
def CORE_ALU32_MEM : TYPE_LD_ST<BPF_MEM.Value, BPF_W.Value,
(outs GPR32:$dst),
(ins u64imm:$opcode, GPR:$src, u64imm:$offset),
(ins u64imm:$opcode, gpr_or_imm:$src, u64imm:$offset),
"$dst = core_alu32_mem($opcode, $src, $offset)",
[]>;
let Constraints = "$dst = $src" in {
Expand Down Expand Up @@ -1002,7 +1055,7 @@ class STORE32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
}

class STOREi32<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode>
: STORE32<Opc, OpcodeStr, [(OpNode i32:$src, ADDRri:$addr)]>;
: STORE32<Opc, OpcodeStr, [(OpNode GPR32:$src, ADDRri:$addr)]>;

let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
def STW32 : STOREi32<BPF_W, "u32", store>;
Expand Down
45 changes: 31 additions & 14 deletions llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
Expand Up @@ -93,11 +93,35 @@ void BPFMISimplifyPatchable::initialize(MachineFunction &MFParm) {
LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n");
}

static bool isST(unsigned Opcode) {
return Opcode == BPF::STB_imm || Opcode == BPF::STH_imm ||
Opcode == BPF::STW_imm || Opcode == BPF::STD_imm;
}

static bool isSTX32(unsigned Opcode) {
return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32;
}

static bool isSTX64(unsigned Opcode) {
return Opcode == BPF::STB || Opcode == BPF::STH || Opcode == BPF::STW ||
Opcode == BPF::STD;
}

static bool isLDX32(unsigned Opcode) {
return Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32;
}

static bool isLDX64(unsigned Opcode) {
return Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW ||
Opcode == BPF::LDD;
}

static bool isLDSX(unsigned Opcode) {
return Opcode == BPF::LDBSX || Opcode == BPF::LDHSX || Opcode == BPF::LDWSX;
}

bool BPFMISimplifyPatchable::isLoadInst(unsigned Opcode) {
return Opcode == BPF::LDD || Opcode == BPF::LDW || Opcode == BPF::LDH ||
Opcode == BPF::LDB || Opcode == BPF::LDW32 || Opcode == BPF::LDH32 ||
Opcode == BPF::LDB32 || Opcode == BPF::LDWSX || Opcode == BPF::LDHSX ||
Opcode == BPF::LDBSX;
return isLDX32(Opcode) || isLDX64(Opcode) || isLDSX(Opcode);
}

void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
Expand All @@ -118,14 +142,9 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
MachineInstr *DefInst = MO.getParent();
unsigned Opcode = DefInst->getOpcode();
unsigned COREOp;
if (Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW ||
Opcode == BPF::LDD || Opcode == BPF::STB || Opcode == BPF::STH ||
Opcode == BPF::STW || Opcode == BPF::STD || Opcode == BPF::LDWSX ||
Opcode == BPF::LDHSX || Opcode == BPF::LDBSX)
if (isLDX64(Opcode) || isLDSX(Opcode) || isSTX64(Opcode) || isST(Opcode))
COREOp = BPF::CORE_MEM;
else if (Opcode == BPF::LDB32 || Opcode == BPF::LDH32 ||
Opcode == BPF::LDW32 || Opcode == BPF::STB32 ||
Opcode == BPF::STH32 || Opcode == BPF::STW32)
else if (isLDX32(Opcode) || isSTX32(Opcode))
COREOp = BPF::CORE_ALU32_MEM;
else
continue;
Expand All @@ -138,9 +157,7 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
// Reject the form:
// %1 = ADD_rr %2, %3
// *(type *)(%2 + 0) = %1
if (Opcode == BPF::STB || Opcode == BPF::STH || Opcode == BPF::STW ||
Opcode == BPF::STD || Opcode == BPF::STB32 || Opcode == BPF::STH32 ||
Opcode == BPF::STW32) {
if (isSTX64(Opcode) || isSTX32(Opcode)) {
const MachineOperand &Opnd = DefInst->getOperand(0);
if (Opnd.isReg() && Opnd.getReg() == MO.getReg())
continue;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/BPF/BPFSubtarget.cpp
Expand Up @@ -33,6 +33,9 @@ static cl::opt<bool> Disable_sdiv_smod("disable-sdiv-smod", cl::Hidden,
cl::init(false), cl::desc("Disable sdiv/smod insns"));
static cl::opt<bool> Disable_gotol("disable-gotol", cl::Hidden, cl::init(false),
cl::desc("Disable gotol insn"));
static cl::opt<bool>
Disable_StoreImm("disable-storeimm", cl::Hidden, cl::init(false),
cl::desc("Disable BPF_ST (immediate store) insn"));

void BPFSubtarget::anchor() {}

Expand All @@ -54,6 +57,7 @@ void BPFSubtarget::initializeEnvironment() {
HasBswap = false;
HasSdivSmod = false;
HasGotol = false;
HasStoreImm = false;
}

void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
Expand All @@ -80,6 +84,7 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
HasBswap = !Disable_bswap;
HasSdivSmod = !Disable_sdiv_smod;
HasGotol = !Disable_gotol;
HasStoreImm = !Disable_StoreImm;
return;
}
}
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/BPF/BPFSubtarget.h
Expand Up @@ -57,7 +57,7 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
bool UseDwarfRIS;

// whether cpu v4 insns are enabled.
bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol;
bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm;

public:
// This constructor initializes the data members to match that
Expand All @@ -79,6 +79,7 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
bool hasBswap() const { return HasBswap; }
bool hasSdivSmod() const { return HasSdivSmod; }
bool hasGotol() const { return HasGotol; }
bool hasStoreImm() const { return HasStoreImm; }

const BPFInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const BPFFrameLowering *getFrameLowering() const override {
Expand Down
156 changes: 156 additions & 0 deletions llvm/test/CodeGen/BPF/CORE/field-reloc-st-imm.ll
@@ -0,0 +1,156 @@
; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck %s

; Make sure that CO-RE relocations had been generated correctly for
; BPF_ST (store immediate) instructions and that
; BPFMISimplifyPatchable optimizations had been applied.
;
; Generated from the following source code:
;
; #define __pai __attribute__((preserve_access_index))
;
; struct foo {
; unsigned char b;
; unsigned short h;
; unsigned int w;
; unsigned long d;
; } __pai;
;
; void bar(volatile struct foo *p) {
; p->b = 1;
; p->h = 2;
; p->w = 3;
; p->d = 4;
; }
;
; Using the following command:
;
; clang -g -O2 -S -emit-llvm -mcpu=v4 --target=bpfel test.c

target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"

@"llvm.foo:0:0$0:0" = external global i64, !llvm.preserve.access.index !0 #0
@"llvm.foo:0:2$0:1" = external global i64, !llvm.preserve.access.index !0 #0
@"llvm.foo:0:4$0:2" = external global i64, !llvm.preserve.access.index !0 #0
@"llvm.foo:0:8$0:3" = external global i64, !llvm.preserve.access.index !0 #0

; Function Attrs: nofree nounwind
define dso_local void @bar(ptr noundef %p) local_unnamed_addr #1 !dbg !18 {
entry:
call void @llvm.dbg.value(metadata ptr %p, metadata !24, metadata !DIExpression()), !dbg !25
%0 = load i64, ptr @"llvm.foo:0:0$0:0", align 8
%1 = getelementptr i8, ptr %p, i64 %0
%2 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 0, ptr %1)
store volatile i8 1, ptr %2, align 8, !dbg !26, !tbaa !27
%3 = load i64, ptr @"llvm.foo:0:2$0:1", align 8
%4 = getelementptr i8, ptr %p, i64 %3
%5 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 1, ptr %4)
store volatile i16 2, ptr %5, align 2, !dbg !34, !tbaa !35
%6 = load i64, ptr @"llvm.foo:0:4$0:2", align 8
%7 = getelementptr i8, ptr %p, i64 %6
%8 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 2, ptr %7)
store volatile i32 3, ptr %8, align 4, !dbg !36, !tbaa !37
%9 = load i64, ptr @"llvm.foo:0:8$0:3", align 8
%10 = getelementptr i8, ptr %p, i64 %9
%11 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 3, ptr %10)
store volatile i64 4, ptr %11, align 8, !dbg !38, !tbaa !39
ret void, !dbg !40
}

; CHECK: [[L0:.Ltmp.*]]:
; CHECK: *(u8 *)(r1 + 0) = 1
; CHECK: [[L2:.Ltmp.*]]:
; CHECK: *(u16 *)(r1 + 2) = 2
; CHECK: [[L4:.Ltmp.*]]:
; CHECK: *(u32 *)(r1 + 4) = 3
; CHECK: [[L6:.Ltmp.*]]:
; CHECK: *(u64 *)(r1 + 8) = 4

; CHECK: .section .BTF
; ...
; CHECK: .long [[FOO:.*]] # BTF_KIND_STRUCT(id = [[FOO_ID:.*]])
; ...
; CHECK: .ascii "foo" # string offset=[[FOO]]
; CHECK: .ascii ".text" # string offset=[[TEXT:.*]]
; CHECK: .ascii "0:0" # string offset=[[S1:.*]]
; CHECK: .ascii "0:1" # string offset=[[S2:.*]]
; CHECK: .ascii "0:2" # string offset=[[S3:.*]]
; CHECK: .ascii "0:3" # string offset=[[S4:.*]]

; CHECK: .section .BTF.ext
; ...
; CHECK: .long [[#]] # FieldReloc
; CHECK-NEXT: .long [[TEXT]] # Field reloc section string offset=[[TEXT]]
; CHECK-NEXT: .long [[#]]
; CHECK-NEXT: .long [[L0]]
; CHECK-NEXT: .long [[FOO_ID]]
; CHECK-NEXT: .long [[S1]]
; CHECK-NEXT: .long 0
; CHECK-NEXT: .long [[L2]]
; CHECK-NEXT: .long [[FOO_ID]]
; CHECK-NEXT: .long [[S2]]
; CHECK-NEXT: .long 0
; CHECK-NEXT: .long [[L4]]
; CHECK-NEXT: .long [[FOO_ID]]
; CHECK-NEXT: .long [[S3]]
; CHECK-NEXT: .long 0
; CHECK-NEXT: .long [[L6]]
; CHECK-NEXT: .long [[FOO_ID]]
; CHECK-NEXT: .long [[S4]]
; CHECK-NEXT: .long 0

; Function Attrs: nofree nosync nounwind memory(none)
declare ptr @llvm.bpf.passthrough.p0.p0(i32, ptr) #2

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare void @llvm.dbg.value(metadata, metadata, metadata) #3

attributes #0 = { "btf_ama" }
attributes #1 = { nofree nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v4" }
attributes #2 = { nofree nosync nounwind memory(none) }
attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

!llvm.dbg.cu = !{!11}
!llvm.module.flags = !{!12, !13, !14, !15, !16}
!llvm.ident = !{!17}

!0 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "foo", file: !1, line: 3, size: 128, elements: !2)
!1 = !DIFile(filename: "some-file.c", directory: "/some/dir", checksumkind: CSK_MD5, checksum: "e5d03b4d39dfffadc6c607e956c37996")
!2 = !{!3, !5, !7, !9}
!3 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !0, file: !1, line: 4, baseType: !4, size: 8)
!4 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char)
!5 = !DIDerivedType(tag: DW_TAG_member, name: "h", scope: !0, file: !1, line: 5, baseType: !6, size: 16, offset: 16)
!6 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned)
!7 = !DIDerivedType(tag: DW_TAG_member, name: "w", scope: !0, file: !1, line: 6, baseType: !8, size: 32, offset: 32)
!8 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
!9 = !DIDerivedType(tag: DW_TAG_member, name: "d", scope: !0, file: !1, line: 7, baseType: !10, size: 64, offset: 64)
!10 = !DIBasicType(name: "unsigned long", size: 64, encoding: DW_ATE_unsigned)
!11 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 18.0.0 ...", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
!12 = !{i32 7, !"Dwarf Version", i32 5}
!13 = !{i32 2, !"Debug Info Version", i32 3}
!14 = !{i32 1, !"wchar_size", i32 4}
!15 = !{i32 7, !"frame-pointer", i32 2}
!16 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
!17 = !{!"clang version 18.0.0 ..."}
!18 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 10, type: !19, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !11, retainedNodes: !23)
!19 = !DISubroutineType(types: !20)
!20 = !{null, !21}
!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !22, size: 64)
!22 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !0)
!23 = !{!24}
!24 = !DILocalVariable(name: "p", arg: 1, scope: !18, file: !1, line: 10, type: !21)
!25 = !DILocation(line: 0, scope: !18)
!26 = !DILocation(line: 11, column: 8, scope: !18)
!27 = !{!28, !29, i64 0}
!28 = !{!"foo", !29, i64 0, !31, i64 2, !32, i64 4, !33, i64 8}
!29 = !{!"omnipotent char", !30, i64 0}
!30 = !{!"Simple C/C++ TBAA"}
!31 = !{!"short", !29, i64 0}
!32 = !{!"int", !29, i64 0}
!33 = !{!"long", !29, i64 0}
!34 = !DILocation(line: 12, column: 8, scope: !18)
!35 = !{!28, !31, i64 2}
!36 = !DILocation(line: 13, column: 8, scope: !18)
!37 = !{!28, !32, i64 4}
!38 = !DILocation(line: 14, column: 8, scope: !18)
!39 = !{!28, !33, i64 8}
!40 = !DILocation(line: 15, column: 1, scope: !18)

0 comments on commit 92e28e3

Please sign in to comment.