Skip to content

Commit

Permalink
[BPF] support for BPF_ST instruction in codegen
Browse files Browse the repository at this point in the history
Generate store immediate instruction when CPUv4 is enabled.
For example:

    $ cat test.c
    struct foo {
      unsigned char  b;
      unsigned short h;
      unsigned int   w;
      unsigned long  d;
    };
    void bar(volatile struct foo *p) {
      p->b = 1;
      p->h = 2;
      p->w = 3;
      p->d = 4;
    }

    $ clang -O2 --target=bpf -mcpu=v4 test.c -c -o - | llvm-objdump -d -
    ...
    0000000000000000 <bar>:
           0:	72 01 00 00 01 00 00 00	*(u8 *)(r1 + 0x0) = 0x1
           1:	6a 01 02 00 02 00 00 00	*(u16 *)(r1 + 0x2) = 0x2
           2:	62 01 04 00 03 00 00 00	*(u32 *)(r1 + 0x4) = 0x3
           3:	7a 01 08 00 04 00 00 00	*(u64 *)(r1 + 0x8) = 0x4
           4:	95 00 00 00 00 00 00 00	exit

Take special care to:
- apply `BPFMISimplifyPatchable::checkADDrr` rewrite for BPF_ST
- validate immediate value when BPF_ST write is 64-bit:
  BPF interprets `(BPF_ST | BPF_MEM | BPF_DW)` writes as writes with
  sign extension. Thus it is fine to generate such write when
  immediate is -1, but it is incorrect to generate such write when
  immediate is +0xffff_ffff.

This commit was previously reverted in e66affa.
The reason for revert was an unrelated bug in BPF backend,
triggered by test case added in this commit if LLVM is built
with LLVM_ENABLE_EXPENSIVE_CHECKS.
The bug was fixed in D157806.

Differential Revision: https://reviews.llvm.org/D140804
  • Loading branch information
eddyz87 committed Aug 16, 2023
1 parent 3c2a669 commit 8f28e80
Show file tree
Hide file tree
Showing 6 changed files with 326 additions and 4 deletions.
55 changes: 53 additions & 2 deletions llvm/lib/Target/BPF/BPFInstrInfo.td
Expand Up @@ -59,6 +59,7 @@ def BPFHasBswap : Predicate<"Subtarget->hasBswap()">;
def BPFHasSdivSmod : Predicate<"Subtarget->hasSdivSmod()">;
def BPFNoMovsx : Predicate<"!Subtarget->hasMovsx()">;
def BPFNoBswap : Predicate<"!Subtarget->hasBswap()">;
def BPFHasStoreImm : Predicate<"Subtarget->hasStoreImm()">;

def brtarget : Operand<OtherVT> {
let PrintMethod = "printBrTargetOperand";
Expand All @@ -75,6 +76,12 @@ def i64immSExt32 : PatLeaf<(i64 imm),
[{return isInt<32>(N->getSExtValue()); }]>;
def i32immSExt32 : PatLeaf<(i32 imm),
[{return isInt<32>(N->getSExtValue()); }]>;
def i64immZExt32 : PatLeaf<(i64 imm),
[{return isUInt<32>(N->getZExtValue()); }]>;

def imm_to_i64 : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i64);
}]>;

// Addressing modes.
def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [], []>;
Expand Down Expand Up @@ -449,7 +456,7 @@ class STORE<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
}

class STOREi64<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode>
: STORE<Opc, OpcodeStr, [(OpNode i64:$src, ADDRri:$addr)]>;
: STORE<Opc, OpcodeStr, [(OpNode GPR:$src, ADDRri:$addr)]>;

let Predicates = [BPFNoALU32] in {
def STW : STOREi64<BPF_W, "u32", truncstorei32>;
Expand All @@ -458,6 +465,50 @@ let Predicates = [BPFNoALU32] in {
}
def STD : STOREi64<BPF_DW, "u64", store>;

class STORE_imm<BPFWidthModifer SizeOp,
string OpcodeStr, dag Pattern>
: TYPE_LD_ST<BPF_MEM.Value, SizeOp.Value,
(outs),
(ins i64imm:$imm, MEMri:$addr),
"*("#OpcodeStr#" *)($addr) = $imm",
[Pattern]> {
bits<20> addr;
bits<32> imm;

let Inst{51-48} = addr{19-16}; // base reg
let Inst{47-32} = addr{15-0}; // offset
let Inst{31-0} = imm;
let BPFClass = BPF_ST;
}

let Predicates = [BPFHasStoreImm] in {
// Opcode (BPF_ST | BPF_MEM | BPF_DW) implies sign extension for
// value stored to memory:
// - it is fine to generate such write when immediate is -1
// - it is incorrect to generate such write when immediate is
// +0xffff_ffff.
//
// In the latter case two instructions would be generated instead of
// one BPF_ST:
// rA = 0xffffffff ll ; LD_imm64
// *(u64 *)(rB + 0) = rA ; STX
//
// For BPF_{B,H,W} the size of value stored matches size of the immediate.
def STD_imm : STORE_imm<BPF_DW, "u64", (store (i64 i64immSExt32:$imm), ADDRri:$addr)>;
def STW_imm : STORE_imm<BPF_W, "u32", (truncstorei32 (i64 i64immZExt32:$imm), ADDRri:$addr)>;
def STH_imm : STORE_imm<BPF_H, "u16", (truncstorei16 (i64 i64immZExt32:$imm), ADDRri:$addr)>;
def STB_imm : STORE_imm<BPF_B, "u8", (truncstorei8 (i64 i64immZExt32:$imm), ADDRri:$addr)>;
}

let Predicates = [BPFHasALU32, BPFHasStoreImm] in {
def : Pat<(store (i32 imm:$src), ADDRri:$dst),
(STW_imm (imm_to_i64 $src), ADDRri:$dst)>;
def : Pat<(truncstorei16 (i32 imm:$src), ADDRri:$dst),
(STH_imm (imm_to_i64 imm:$src), ADDRri:$dst)>;
def : Pat<(truncstorei8 (i32 imm:$src), ADDRri:$dst),
(STB_imm (imm_to_i64 imm:$src), ADDRri:$dst)>;
}

// LOAD instructions
class LOAD<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
: TYPE_LD_ST<ModOp.Value, SizeOp.Value,
Expand Down Expand Up @@ -1007,7 +1058,7 @@ class STORE32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
}

class STOREi32<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode>
: STORE32<Opc, OpcodeStr, [(OpNode i32:$src, ADDRri:$addr)]>;
: STORE32<Opc, OpcodeStr, [(OpNode GPR32:$src, ADDRri:$addr)]>;

let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
def STW32 : STOREi32<BPF_W, "u32", store>;
Expand Down
7 changes: 6 additions & 1 deletion llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
Expand Up @@ -93,6 +93,11 @@ void BPFMISimplifyPatchable::initialize(MachineFunction &MFParm) {
LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n");
}

static bool isST(unsigned Opcode) {
return Opcode == BPF::STB_imm || Opcode == BPF::STH_imm ||
Opcode == BPF::STW_imm || Opcode == BPF::STD_imm;
}

static bool isSTX32(unsigned Opcode) {
return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32;
}
Expand Down Expand Up @@ -141,7 +146,7 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
COREOp = BPF::CORE_LD64;
else if (isLDX32(Opcode))
COREOp = BPF::CORE_LD32;
else if (isSTX64(Opcode) || isSTX32(Opcode))
else if (isSTX64(Opcode) || isSTX32(Opcode) || isST(Opcode))
COREOp = BPF::CORE_ST;
else
continue;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/BPF/BPFSubtarget.cpp
Expand Up @@ -33,6 +33,9 @@ static cl::opt<bool> Disable_sdiv_smod("disable-sdiv-smod", cl::Hidden,
cl::init(false), cl::desc("Disable sdiv/smod insns"));
static cl::opt<bool> Disable_gotol("disable-gotol", cl::Hidden, cl::init(false),
cl::desc("Disable gotol insn"));
static cl::opt<bool>
Disable_StoreImm("disable-storeimm", cl::Hidden, cl::init(false),
cl::desc("Disable BPF_ST (immediate store) insn"));

void BPFSubtarget::anchor() {}

Expand All @@ -54,6 +57,7 @@ void BPFSubtarget::initializeEnvironment() {
HasBswap = false;
HasSdivSmod = false;
HasGotol = false;
HasStoreImm = false;
}

void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
Expand All @@ -80,6 +84,7 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
HasBswap = !Disable_bswap;
HasSdivSmod = !Disable_sdiv_smod;
HasGotol = !Disable_gotol;
HasStoreImm = !Disable_StoreImm;
return;
}
}
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/BPF/BPFSubtarget.h
Expand Up @@ -57,7 +57,7 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
bool UseDwarfRIS;

// whether cpu v4 insns are enabled.
bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol;
bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm;

public:
// This constructor initializes the data members to match that
Expand All @@ -79,6 +79,7 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
bool hasBswap() const { return HasBswap; }
bool hasSdivSmod() const { return HasSdivSmod; }
bool hasGotol() const { return HasGotol; }
bool hasStoreImm() const { return HasStoreImm; }

const BPFInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const BPFFrameLowering *getFrameLowering() const override {
Expand Down
156 changes: 156 additions & 0 deletions llvm/test/CodeGen/BPF/CORE/field-reloc-st-imm.ll
@@ -0,0 +1,156 @@
; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck %s

; Make sure that CO-RE relocations had been generated correctly for
; BPF_ST (store immediate) instructions and that
; BPFMISimplifyPatchable optimizations had been applied.
;
; Generated from the following source code:
;
; #define __pai __attribute__((preserve_access_index))
;
; struct foo {
; unsigned char b;
; unsigned short h;
; unsigned int w;
; unsigned long d;
; } __pai;
;
; void bar(volatile struct foo *p) {
; p->b = 1;
; p->h = 2;
; p->w = 3;
; p->d = 4;
; }
;
; Using the following command:
;
; clang -g -O2 -S -emit-llvm -mcpu=v4 --target=bpfel test.c

target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"

@"llvm.foo:0:0$0:0" = external global i64, !llvm.preserve.access.index !0 #0
@"llvm.foo:0:2$0:1" = external global i64, !llvm.preserve.access.index !0 #0
@"llvm.foo:0:4$0:2" = external global i64, !llvm.preserve.access.index !0 #0
@"llvm.foo:0:8$0:3" = external global i64, !llvm.preserve.access.index !0 #0

; Function Attrs: nofree nounwind
define dso_local void @bar(ptr noundef %p) local_unnamed_addr #1 !dbg !18 {
entry:
call void @llvm.dbg.value(metadata ptr %p, metadata !24, metadata !DIExpression()), !dbg !25
%0 = load i64, ptr @"llvm.foo:0:0$0:0", align 8
%1 = getelementptr i8, ptr %p, i64 %0
%2 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 0, ptr %1)
store volatile i8 1, ptr %2, align 8, !dbg !26, !tbaa !27
%3 = load i64, ptr @"llvm.foo:0:2$0:1", align 8
%4 = getelementptr i8, ptr %p, i64 %3
%5 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 1, ptr %4)
store volatile i16 2, ptr %5, align 2, !dbg !34, !tbaa !35
%6 = load i64, ptr @"llvm.foo:0:4$0:2", align 8
%7 = getelementptr i8, ptr %p, i64 %6
%8 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 2, ptr %7)
store volatile i32 3, ptr %8, align 4, !dbg !36, !tbaa !37
%9 = load i64, ptr @"llvm.foo:0:8$0:3", align 8
%10 = getelementptr i8, ptr %p, i64 %9
%11 = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 3, ptr %10)
store volatile i64 4, ptr %11, align 8, !dbg !38, !tbaa !39
ret void, !dbg !40
}

; CHECK: [[L0:.Ltmp.*]]:
; CHECK: *(u8 *)(r1 + 0) = 1
; CHECK: [[L2:.Ltmp.*]]:
; CHECK: *(u16 *)(r1 + 2) = 2
; CHECK: [[L4:.Ltmp.*]]:
; CHECK: *(u32 *)(r1 + 4) = 3
; CHECK: [[L6:.Ltmp.*]]:
; CHECK: *(u64 *)(r1 + 8) = 4

; CHECK: .section .BTF
; ...
; CHECK: .long [[FOO:.*]] # BTF_KIND_STRUCT(id = [[FOO_ID:.*]])
; ...
; CHECK: .ascii "foo" # string offset=[[FOO]]
; CHECK: .ascii ".text" # string offset=[[TEXT:.*]]
; CHECK: .ascii "0:0" # string offset=[[S1:.*]]
; CHECK: .ascii "0:1" # string offset=[[S2:.*]]
; CHECK: .ascii "0:2" # string offset=[[S3:.*]]
; CHECK: .ascii "0:3" # string offset=[[S4:.*]]

; CHECK: .section .BTF.ext
; ...
; CHECK: .long [[#]] # FieldReloc
; CHECK-NEXT: .long [[TEXT]] # Field reloc section string offset=[[TEXT]]
; CHECK-NEXT: .long [[#]]
; CHECK-NEXT: .long [[L0]]
; CHECK-NEXT: .long [[FOO_ID]]
; CHECK-NEXT: .long [[S1]]
; CHECK-NEXT: .long 0
; CHECK-NEXT: .long [[L2]]
; CHECK-NEXT: .long [[FOO_ID]]
; CHECK-NEXT: .long [[S2]]
; CHECK-NEXT: .long 0
; CHECK-NEXT: .long [[L4]]
; CHECK-NEXT: .long [[FOO_ID]]
; CHECK-NEXT: .long [[S3]]
; CHECK-NEXT: .long 0
; CHECK-NEXT: .long [[L6]]
; CHECK-NEXT: .long [[FOO_ID]]
; CHECK-NEXT: .long [[S4]]
; CHECK-NEXT: .long 0

; Function Attrs: nofree nosync nounwind memory(none)
declare ptr @llvm.bpf.passthrough.p0.p0(i32, ptr) #2

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare void @llvm.dbg.value(metadata, metadata, metadata) #3

attributes #0 = { "btf_ama" }
attributes #1 = { nofree nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v4" }
attributes #2 = { nofree nosync nounwind memory(none) }
attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

!llvm.dbg.cu = !{!11}
!llvm.module.flags = !{!12, !13, !14, !15, !16}
!llvm.ident = !{!17}

!0 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "foo", file: !1, line: 3, size: 128, elements: !2)
!1 = !DIFile(filename: "some-file.c", directory: "/some/dir", checksumkind: CSK_MD5, checksum: "e5d03b4d39dfffadc6c607e956c37996")
!2 = !{!3, !5, !7, !9}
!3 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !0, file: !1, line: 4, baseType: !4, size: 8)
!4 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char)
!5 = !DIDerivedType(tag: DW_TAG_member, name: "h", scope: !0, file: !1, line: 5, baseType: !6, size: 16, offset: 16)
!6 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned)
!7 = !DIDerivedType(tag: DW_TAG_member, name: "w", scope: !0, file: !1, line: 6, baseType: !8, size: 32, offset: 32)
!8 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
!9 = !DIDerivedType(tag: DW_TAG_member, name: "d", scope: !0, file: !1, line: 7, baseType: !10, size: 64, offset: 64)
!10 = !DIBasicType(name: "unsigned long", size: 64, encoding: DW_ATE_unsigned)
!11 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 18.0.0 ...", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
!12 = !{i32 7, !"Dwarf Version", i32 5}
!13 = !{i32 2, !"Debug Info Version", i32 3}
!14 = !{i32 1, !"wchar_size", i32 4}
!15 = !{i32 7, !"frame-pointer", i32 2}
!16 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
!17 = !{!"clang version 18.0.0 ..."}
!18 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 10, type: !19, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !11, retainedNodes: !23)
!19 = !DISubroutineType(types: !20)
!20 = !{null, !21}
!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !22, size: 64)
!22 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !0)
!23 = !{!24}
!24 = !DILocalVariable(name: "p", arg: 1, scope: !18, file: !1, line: 10, type: !21)
!25 = !DILocation(line: 0, scope: !18)
!26 = !DILocation(line: 11, column: 8, scope: !18)
!27 = !{!28, !29, i64 0}
!28 = !{!"foo", !29, i64 0, !31, i64 2, !32, i64 4, !33, i64 8}
!29 = !{!"omnipotent char", !30, i64 0}
!30 = !{!"Simple C/C++ TBAA"}
!31 = !{!"short", !29, i64 0}
!32 = !{!"int", !29, i64 0}
!33 = !{!"long", !29, i64 0}
!34 = !DILocation(line: 12, column: 8, scope: !18)
!35 = !{!28, !31, i64 2}
!36 = !DILocation(line: 13, column: 8, scope: !18)
!37 = !{!28, !32, i64 4}
!38 = !DILocation(line: 14, column: 8, scope: !18)
!39 = !{!28, !33, i64 8}
!40 = !DILocation(line: 15, column: 1, scope: !18)

0 comments on commit 8f28e80

Please sign in to comment.