55 changes: 55 additions & 0 deletions llvm/test/Transforms/InferAlignment/irregular-size.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt < %s -passes=no-op-function -S | FileCheck %s

define void @non_pow2_size(i177 %X) {
; CHECK-LABEL: define void @non_pow2_size
; CHECK-SAME: (i177 [[X:%.*]]) {
; CHECK-NEXT: [[A:%.*]] = alloca i177, align 1
; CHECK-NEXT: [[L1:%.*]] = load i177, ptr [[A]], align 1
; CHECK-NEXT: store i177 [[X]], ptr [[A]], align 1
; CHECK-NEXT: ret void
;
%A = alloca i177, align 1
%L1 = load i177, ptr %A, align 1
store i177 %X, ptr %A, align 1
ret void
}

; TODO: For non-byte-sized vectors, current implementation assumes there is
; padding to the next byte boundary between elements.
@vector_i4 = constant [16 x <2 x i4>] zeroinitializer, align 8

define void @load_vector_i4(i4 %X) {
; CHECK-LABEL: define void @load_vector_i4
; CHECK-SAME: (i4 [[X:%.*]]) {
; CHECK-NEXT: [[PTR_0:%.*]] = getelementptr [16 x <2 x i4>], ptr @vector_i4, i64 0, i64 1
; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr [16 x <2 x i4>], ptr @vector_i4, i64 0, i64 2
; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr [16 x <2 x i4>], ptr @vector_i4, i64 0, i64 4
; CHECK-NEXT: [[PTR_3:%.*]] = getelementptr [16 x <2 x i4>], ptr @vector_i4, i64 0, i64 8
; CHECK-NEXT: [[RES_0:%.*]] = load i4, ptr [[PTR_0]], align 1
; CHECK-NEXT: [[RES_1:%.*]] = load i4, ptr [[PTR_1]], align 1
; CHECK-NEXT: [[RES_2:%.*]] = load i4, ptr [[PTR_2]], align 1
; CHECK-NEXT: [[RES_3:%.*]] = load i4, ptr [[PTR_3]], align 1
; CHECK-NEXT: store i4 [[X]], ptr [[PTR_0]], align 1
; CHECK-NEXT: store i4 [[X]], ptr [[PTR_1]], align 1
; CHECK-NEXT: store i4 [[X]], ptr [[PTR_2]], align 1
; CHECK-NEXT: store i4 [[X]], ptr [[PTR_3]], align 1
; CHECK-NEXT: ret void
;
%ptr.0 = getelementptr [16 x <2 x i4>], ptr @vector_i4, i64 0, i64 1
%ptr.1 = getelementptr [16 x <2 x i4>], ptr @vector_i4, i64 0, i64 2
%ptr.2 = getelementptr [16 x <2 x i4>], ptr @vector_i4, i64 0, i64 4
%ptr.3 = getelementptr [16 x <2 x i4>], ptr @vector_i4, i64 0, i64 8

%res.0 = load i4, ptr %ptr.0, align 1
%res.1 = load i4, ptr %ptr.1, align 1
%res.2 = load i4, ptr %ptr.2, align 1
%res.3 = load i4, ptr %ptr.3, align 1

store i4 %X, ptr %ptr.0, align 1
store i4 %X, ptr %ptr.1, align 1
store i4 %X, ptr %ptr.2, align 1
store i4 %X, ptr %ptr.3, align 1

ret void
}
248 changes: 248 additions & 0 deletions llvm/test/Transforms/InferAlignment/propagate-assume.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt < %s -passes=no-op-function -S | FileCheck %s

; ------------------------------------------------------------------------------
; Simple test
; ------------------------------------------------------------------------------

define void @simple_forwardpropagate(ptr %a) {
; CHECK-LABEL: define void @simple_forwardpropagate
; CHECK-SAME: (ptr [[A:%.*]]) {
; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
; CHECK-NEXT: [[LOAD_A:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: store i32 345, ptr [[A]], align 4
; CHECK-NEXT: ret void
;
%ptrint = ptrtoint ptr %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)

%load.a = load i32, ptr %a, align 4
store i32 345, ptr %a, align 4

ret void
}

define void @simple_backpropagate(ptr %a) {
; CHECK-LABEL: define void @simple_backpropagate
; CHECK-SAME: (ptr [[A:%.*]]) {
; CHECK-NEXT: [[LOAD_A:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: store i32 345, ptr [[A]], align 4
; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
; CHECK-NEXT: ret void
;
%load.a = load i32, ptr %a, align 4
store i32 345, ptr %a, align 4

%ptrint = ptrtoint ptr %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)

ret void
}

define void @simple_forwardpropagate_bundle(ptr %a) {
; CHECK-LABEL: define void @simple_forwardpropagate_bundle
; CHECK-SAME: (ptr [[A:%.*]]) {
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i32 32) ]
; CHECK-NEXT: [[LOAD_A:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: store i32 345, ptr [[A]], align 4
; CHECK-NEXT: ret void
;
call void @llvm.assume(i1 true) ["align"(ptr %a, i32 32)]
%load.a = load i32, ptr %a, align 4
store i32 345, ptr %a, align 4
ret void
}

define void @simple_backpropagate_bundle(ptr %a) {
; CHECK-LABEL: define void @simple_backpropagate_bundle
; CHECK-SAME: (ptr [[A:%.*]]) {
; CHECK-NEXT: [[LOAD_A:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: store i32 345, ptr [[A]], align 4
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i32 32) ]
; CHECK-NEXT: ret void
;
%load.a = load i32, ptr %a, align 4
store i32 345, ptr %a, align 4
call void @llvm.assume(i1 true) ["align"(ptr %a, i32 32)]
ret void
}

; ------------------------------------------------------------------------------
; Complex test
; ------------------------------------------------------------------------------

define void @loop_forwardpropagate(ptr %a, ptr %b) {
; CHECK-LABEL: define void @loop_forwardpropagate
; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63
; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
; CHECK-NEXT: [[PTRINT2:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT: [[MASKEDPTR2:%.*]] = and i64 [[PTRINT2]], 63
; CHECK-NEXT: [[MASKEDCOND2:%.*]] = icmp eq i64 [[MASKEDPTR2]], 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKEDCOND2]])
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
; CHECK-NEXT: [[LOAD_B:%.*]] = load i32, ptr [[GEP_B]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[LOAD_B]], 1
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]]
; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP_A]], align 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 16
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[I_NEXT]], 1648
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
%ptrint = ptrtoint ptr %a to i64
%maskedptr = and i64 %ptrint, 63
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)

%ptrint2 = ptrtoint ptr %b to i64
%maskedptr2 = and i64 %ptrint2, 63
%maskedcond2 = icmp eq i64 %maskedptr2, 0
tail call void @llvm.assume(i1 %maskedcond2)

br label %for.body

for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]

%gep.b = getelementptr inbounds i32, ptr %b, i64 %i
%load.b = load i32, ptr %gep.b, align 4
%add = add nsw i32 %load.b, 1

%gep.a = getelementptr inbounds i32, ptr %a, i64 %i
store i32 %add, ptr %gep.a, align 4

%i.next = add nuw nsw i64 %i, 16
%cmp = icmp slt i64 %i.next, 1648

br i1 %cmp, label %for.body, label %for.end

for.end:
ret void
}

define void @loop_forwardpropagate_bundle(ptr %a, ptr %b) {
; CHECK-LABEL: define void @loop_forwardpropagate_bundle
; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i32 64) ]
; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[B]], i32 64) ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
; CHECK-NEXT: [[LOAD_B:%.*]] = load i32, ptr [[GEP_B]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[LOAD_B]], 1
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]]
; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP_A]], align 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 16
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[I_NEXT]], 1648
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
entry:
tail call void @llvm.assume(i1 true) ["align"(ptr %a, i32 64)]
tail call void @llvm.assume(i1 true) ["align"(ptr %b, i32 64)]
br label %for.body

for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]

%gep.b = getelementptr inbounds i32, ptr %b, i64 %i
%load.b = load i32, ptr %gep.b, align 4
%add = add nsw i32 %load.b, 1

%gep.a = getelementptr inbounds i32, ptr %a, i64 %i
store i32 %add, ptr %gep.a, align 4

%i.next = add nuw nsw i64 %i, 16
%cmp = icmp slt i64 %i.next, 1648

br i1 %cmp, label %for.body, label %for.end

for.end:
ret void
}

; Check that assume is propagated backwards through all
; operations that are `isGuaranteedToTransferExecutionToSuccessor`
; (it should reach the load and mark it as `align 32`).
define void @complex_backpropagate(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: define void @complex_backpropagate
; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) {
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 8
; CHECK-NEXT: [[LOAD_A:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: [[LOAD_B:%.*]] = load i32, ptr [[B]], align 4
; CHECK-NEXT: store i32 [[LOAD_B]], ptr [[A]], align 4
; CHECK-NEXT: [[OBJ_SIZE:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[C]], i1 false, i1 false, i1 false)
; CHECK-NEXT: store i64 [[OBJ_SIZE]], ptr [[ALLOCA]], align 4
; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
; CHECK-NEXT: ret void
;
%alloca = alloca i64
%load.a = load i32, ptr %a, align 4

%load.b = load i32, ptr %b
store i32 %load.b, ptr %a

%obj.size = call i64 @llvm.objectsize.i64.p0(ptr %c, i1 false)
store i64 %obj.size, ptr %alloca

%ptrint = ptrtoint ptr %a to i64
%maskedptr = and i64 %ptrint, 31
%maskcond = icmp eq i64 %maskedptr, 0
tail call void @llvm.assume(i1 %maskcond)

ret void
}

define void @complex_backpropagate_bundle(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: define void @complex_backpropagate_bundle
; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) {
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 8
; CHECK-NEXT: [[LOAD_A:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: [[LOAD_B:%.*]] = load i32, ptr [[B]], align 4
; CHECK-NEXT: store i32 [[LOAD_B]], ptr [[A]], align 4
; CHECK-NEXT: [[OBJ_SIZE:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[C]], i1 false, i1 false, i1 false)
; CHECK-NEXT: store i64 [[OBJ_SIZE]], ptr [[ALLOCA]], align 4
; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i32 32) ]
; CHECK-NEXT: ret void
;
%alloca = alloca i64
%load.a = load i32, ptr %a, align 4

%load.b = load i32, ptr %b
store i32 %load.b, ptr %a

%obj.size = call i64 @llvm.objectsize.i64.p0(ptr %c, i1 false)
store i64 %obj.size, ptr %alloca

tail call void @llvm.assume(i1 true) ["align"(ptr %a, i32 32)]

ret void
}

declare i64 @llvm.objectsize.i64.p0(ptr, i1)
declare void @llvm.assume(i1)
77 changes: 77 additions & 0 deletions llvm/test/Transforms/InferAlignment/ptrmask.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt < %s -passes=no-op-function -S | FileCheck %s

; ------------------------------------------------------------------------------
; load instructions
; ------------------------------------------------------------------------------

define void @load(ptr align 1 %ptr) {
; CHECK-LABEL: define void @load
; CHECK-SAME: (ptr align 1 [[PTR:%.*]]) {
; CHECK-NEXT: [[ALIGNED_0:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 -2)
; CHECK-NEXT: [[ALIGNED_1:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 -4)
; CHECK-NEXT: [[ALIGNED_2:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 -8)
; CHECK-NEXT: [[LOAD_0:%.*]] = load <16 x i8>, ptr [[ALIGNED_0]], align 1
; CHECK-NEXT: [[LOAD_1:%.*]] = load <16 x i8>, ptr [[ALIGNED_1]], align 1
; CHECK-NEXT: [[LOAD_2:%.*]] = load <16 x i8>, ptr [[ALIGNED_2]], align 1
; CHECK-NEXT: ret void
;
%aligned.0 = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -2)
%aligned.1 = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -4)
%aligned.2 = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8)

%load.0 = load <16 x i8>, ptr %aligned.0, align 1
%load.1 = load <16 x i8>, ptr %aligned.1, align 1
%load.2 = load <16 x i8>, ptr %aligned.2, align 1

ret void
}

; ------------------------------------------------------------------------------
; store instructions
; ------------------------------------------------------------------------------

define void @store(ptr align 1 %ptr) {
; CHECK-LABEL: define void @store
; CHECK-SAME: (ptr align 1 [[PTR:%.*]]) {
; CHECK-NEXT: [[ALIGNED_0:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 -2)
; CHECK-NEXT: [[ALIGNED_1:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 -4)
; CHECK-NEXT: [[ALIGNED_2:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 -8)
; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[ALIGNED_0]], align 1
; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[ALIGNED_1]], align 1
; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[ALIGNED_2]], align 1
; CHECK-NEXT: ret void
;
%aligned.0 = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -2)
%aligned.1 = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -4)
%aligned.2 = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8)

store <16 x i8> zeroinitializer, ptr %aligned.0, align 1
store <16 x i8> zeroinitializer, ptr %aligned.1, align 1
store <16 x i8> zeroinitializer, ptr %aligned.2, align 1

ret void
}

; ------------------------------------------------------------------------------
; Overaligned pointer
; ------------------------------------------------------------------------------

; Underlying alignment greater than alignment forced by ptrmask
define void @ptrmask_overaligned(ptr align 16 %ptr) {
; CHECK-LABEL: define void @ptrmask_overaligned
; CHECK-SAME: (ptr align 16 [[PTR:%.*]]) {
; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 -8)
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[ALIGNED]], align 1
; CHECK-NEXT: ret void
;
%aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8)

%load = load <16 x i8>, ptr %aligned, align 1
store <16 x i8> zeroinitializer, ptr %aligned, align 1

ret void
}

declare ptr @llvm.ptrmask.p0.i64(ptr, i64)
26 changes: 26 additions & 0 deletions llvm/test/Transforms/InferAlignment/undef-and-null.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -passes=no-op-function -S < %s | FileCheck %s

define void @load_undef_null(ptr %P) {
; CHECK-LABEL: define void @load_undef_null
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: [[RET_0:%.*]] = load i32, ptr undef, align 4
; CHECK-NEXT: [[RET_1:%.*]] = load i32, ptr null, align 4
; CHECK-NEXT: ret void
;
%ret.0 = load i32, ptr undef
%ret.1 = load i32, ptr null
ret void
}

define void @store_undef_null(ptr %P) {
; CHECK-LABEL: define void @store_undef_null
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: store i32 123, ptr undef, align 4
; CHECK-NEXT: store i32 124, ptr null, align 4
; CHECK-NEXT: ret void
;
store i32 123, ptr undef
store i32 124, ptr null
ret void
}
111 changes: 111 additions & 0 deletions llvm/test/Transforms/InferAlignment/vector.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt < %s -passes=no-op-function -S | FileCheck %s

; InferAlignment should be able to prove vector alignment in the
; presence of a few mild address computation tricks.

; ------------------------------------------------------------------------------
; alloca
; ------------------------------------------------------------------------------

define void @alloca(<2 x i64> %y) {
; CHECK-LABEL: define void @alloca
; CHECK-SAME: (<2 x i64> [[Y:%.*]]) {
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca <2 x i64>, align 16
; CHECK-NEXT: [[LOAD:%.*]] = load <2 x i64>, ptr [[ALLOCA]], align 1
; CHECK-NEXT: store <2 x i64> [[Y]], ptr [[ALLOCA]], align 1
; CHECK-NEXT: ret void
;
%alloca = alloca <2 x i64>
%load = load <2 x i64>, ptr %alloca, align 1
store <2 x i64> %y, ptr %alloca, align 1
ret void
}

; ------------------------------------------------------------------------------
; global
; ------------------------------------------------------------------------------

@x.vector = external global <2 x i64>, align 16

define void @global(<2 x i64> %y) {
; CHECK-LABEL: define void @global
; CHECK-SAME: (<2 x i64> [[Y:%.*]]) {
; CHECK-NEXT: [[LOAD:%.*]] = load <2 x i64>, ptr @x.vector, align 1
; CHECK-NEXT: store <2 x i64> [[Y]], ptr @x.vector, align 1
; CHECK-NEXT: ret void
;
%load = load <2 x i64>, ptr @x.vector, align 1
store <2 x i64> %y, ptr @x.vector, align 1
ret void
}

; ------------------------------------------------------------------------------
; getelementptr
; ------------------------------------------------------------------------------

@vector = external global <2 x i64>, align 16
@vector.arr = external global [13 x <2 x i64>], align 16

; ------------------------------------------------------------------------------
; 1d access
; ------------------------------------------------------------------------------

define void @vector_singular(i32 %i, <2 x i64> %y) {
; CHECK-LABEL: define void @vector_singular
; CHECK-SAME: (i32 [[I:%.*]], <2 x i64> [[Y:%.*]]) {
; CHECK-NEXT: [[GEP:%.*]] = getelementptr <2 x i64>, ptr @vector, i32 [[I]]
; CHECK-NEXT: [[LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 1
; CHECK-NEXT: store <2 x i64> [[Y]], ptr [[GEP]], align 1
; CHECK-NEXT: ret void
;
%gep = getelementptr <2 x i64>, ptr @vector, i32 %i
%load = load <2 x i64>, ptr %gep, align 1
store <2 x i64> %y, ptr %gep, align 1
ret void
}

; ------------------------------------------------------------------------------
; 2d access
; ------------------------------------------------------------------------------

define void @vector_array(i32 %i, i32 %j, <2 x i64> %y) {
; CHECK-LABEL: define void @vector_array
; CHECK-SAME: (i32 [[I:%.*]], i32 [[J:%.*]], <2 x i64> [[Y:%.*]]) {
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [13 x <2 x i64>], ptr @vector.arr, i32 [[I]], i32 [[J]]
; CHECK-NEXT: [[LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 1
; CHECK-NEXT: store <2 x i64> [[Y]], ptr [[GEP]], align 1
; CHECK-NEXT: ret void
;
%gep = getelementptr [13 x <2 x i64>], ptr @vector.arr, i32 %i, i32 %j
%load = load <2 x i64>, ptr %gep, align 1
store <2 x i64> %y, ptr %gep, align 1
ret void
}

; ------------------------------------------------------------------------------
; non-vector array type
; ------------------------------------------------------------------------------

; When we see a unaligned load or store from an insufficiently aligned global or
; alloca, increase the alignment, turning it into an aligned load or store.
@x.array = internal global [4 x i32] zeroinitializer

define void @nonvector_array() {
; CHECK-LABEL: define void @nonvector_array() {
; CHECK-NEXT: [[LOAD_0:%.*]] = load <16 x i8>, ptr @x.array, align 1
; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @x.array, align 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [4 x i32], ptr @x.array, i16 0, i16 2
; CHECK-NEXT: [[LOAD_1:%.*]] = load <16 x i8>, ptr [[GEP]], align 1
; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[GEP]], align 1
; CHECK-NEXT: ret void
;
%load.0 = load <16 x i8>, ptr @x.array, align 1
store <16 x i8> zeroinitializer, ptr @x.array, align 1

%gep = getelementptr [4 x i32], ptr @x.array, i16 0, i16 2
%load.1 = load <16 x i8>, ptr %gep, align 1
store <16 x i8> zeroinitializer, ptr %gep, align 1

ret void
}
32 changes: 32 additions & 0 deletions llvm/test/Transforms/InferAlignment/volatile.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt < %s -passes=no-op-function -S | FileCheck %s

define void @load_volatile() {
; CHECK-LABEL: define void @load_volatile() {
; CHECK-NEXT: [[A:%.*]] = alloca { i32 }, align 8
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[LOAD_A:%.*]] = load volatile i32, ptr [[A]], align 4
; CHECK-NEXT: [[LOAD_B:%.*]] = load volatile i32, ptr [[B]], align 4
; CHECK-NEXT: ret void
;
%a = alloca { i32 }
%b = alloca i32
%load.a = load volatile i32, ptr %a
%load.b = load volatile i32, ptr %b
ret void
}

define void @store_volatile() {
; CHECK-LABEL: define void @store_volatile() {
; CHECK-NEXT: [[A:%.*]] = alloca { i32 }, align 8
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: store volatile i32 123, ptr [[A]], align 4
; CHECK-NEXT: store volatile i32 123, ptr [[B]], align 4
; CHECK-NEXT: ret void
;
%a = alloca { i32 }
%b = alloca i32
store volatile i32 123, ptr %a
store volatile i32 123, ptr %b
ret void
}
36 changes: 36 additions & 0 deletions llvm/test/Transforms/InferAlignment/vscale.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -passes=no-op-function -S < %s | FileCheck %s

; <4 x i32> -> 16 byte alignment
define void @alignment_sustain(ptr align 16 %ptr) {
; CHECK-LABEL: define void @alignment_sustain
; CHECK-SAME: (ptr align 16 [[PTR:%.*]]) {
; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[PTR]], i32 3
; CHECK-NEXT: [[LOAD:%.*]] = load <4 x i32>, ptr [[GEP]], align 16
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[GEP]], align 16
; CHECK-NEXT: ret void
;
%gep = getelementptr <vscale x 4 x i32>, ptr %ptr, i32 3

%load = load <4 x i32>, ptr %gep, align 16
store <4 x i32> zeroinitializer, ptr %gep, align 16

ret void
}

; <8 x i32> -> 32 byte alignment
define void @alignment_increase(ptr align 32 %ptr) {
; CHECK-LABEL: define void @alignment_increase
; CHECK-SAME: (ptr align 32 [[PTR:%.*]]) {
; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 8 x i32>, ptr [[PTR]], i32 3
; CHECK-NEXT: [[LOAD:%.*]] = load <8 x i32>, ptr [[GEP]], align 16
; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr [[GEP]], align 16
; CHECK-NEXT: ret void
;
%gep = getelementptr <vscale x 8 x i32>, ptr %ptr, i32 3

%load = load <8 x i32>, ptr %gep, align 16
store <8 x i32> zeroinitializer, ptr %gep, align 16

ret void
}