Skip to content

Commit

Permalink
[CodeGen] Emit a more accurate alignment for non-temporal loads/stores (
Browse files Browse the repository at this point in the history
#75675)

Call EmitPointerWithAlignment to compute the alignment based on the
underlying lvalue's alignment when it's available.
  • Loading branch information
ahatanak committed Dec 18, 2023
1 parent dd45be0 commit 31429e7
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
8 changes: 4 additions & 4 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,19 +232,19 @@ static Value *MakeBinaryAtomicValue(

static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
Value *Val = CGF.EmitScalarExpr(E->getArg(0));
Value *Address = CGF.EmitScalarExpr(E->getArg(1));
Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));

Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getArg(0)->getType());
LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
LV.setNontemporal(true);
CGF.EmitStoreOfScalar(Val, LV, false);
return nullptr;
}

static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
Value *Address = CGF.EmitScalarExpr(E->getArg(0));
Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));

LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
LV.setNontemporal(true);
return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
}
Expand Down
14 changes: 14 additions & 0 deletions clang/test/CodeGen/Nontemporal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,17 @@ void test_all_sizes(void) // CHECK-LABEL: test_all_sizes
vf2 = __builtin_nontemporal_load(&vf1); // CHECK: load <4 x float>{{.*}}align 16, !nontemporal
vc2 = __builtin_nontemporal_load(&vc1); // CHECK: load <8 x i8>{{.*}}align 8, !nontemporal
}

struct S { char c[16]; };
S x;

typedef int v4si __attribute__ ((vector_size(16)));

// CHECK-LABEL: define void @_Z14test_alignmentv()
// CHECK: load <4 x i32>, ptr @x, align 1, !nontemporal
// CHECK: store <4 x i32> %1, ptr @x, align 1, !nontemporal

void test_alignment() {
auto t = __builtin_nontemporal_load((v4si*)x.c);
__builtin_nontemporal_store(t, (v4si*)x.c);
}

0 comments on commit 31429e7

Please sign in to comment.