18 changes: 9 additions & 9 deletions llvm/test/Analysis/CostModel/RISCV/cast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1075,7 +1075,7 @@ define void @trunc() {
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_v64i1 = trunc <64 x i8> undef to <64 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i16_v64i1 = trunc <64 x i16> undef to <64 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i32_v64i1 = trunc <64 x i32> undef to <64 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64i1 = trunc <64 x i64> undef to <64 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i64_v64i1 = trunc <64 x i64> undef to <64 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i16_v128i8 = trunc <128 x i16> undef to <128 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i32_v128i8 = trunc <128 x i32> undef to <128 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128i64_v128i8 = trunc <128 x i64> undef to <128 x i8>
Expand All @@ -1085,7 +1085,7 @@ define void @trunc() {
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i8_v128i1 = trunc <128 x i8> undef to <128 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v128i16_v128i1 = trunc <128 x i16> undef to <128 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i32_v128i1 = trunc <128 x i32> undef to <128 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v128i64_v128i1 = trunc <128 x i64> undef to <128 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i64_v128i1 = trunc <128 x i64> undef to <128 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v256i16_v256i8 = trunc <256 x i16> undef to <256 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i32_v256i8 = trunc <256 x i32> undef to <256 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v256i64_v256i8 = trunc <256 x i64> undef to <256 x i8>
Expand All @@ -1095,7 +1095,7 @@ define void @trunc() {
; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v256i8_v256i1 = trunc <256 x i8> undef to <256 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v256i16_v256i1 = trunc <256 x i16> undef to <256 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i32_v256i1 = trunc <256 x i32> undef to <256 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v256i64_v256i1 = trunc <256 x i64> undef to <256 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v256i64_v256i1 = trunc <256 x i64> undef to <256 x i1>
; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1i8 = trunc <vscale x 1 x i16> undef to <vscale x 1 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i32_nxv1i8 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i64_nxv1i8 = trunc <vscale x 1 x i64> undef to <vscale x 1 x i8>
Expand Down Expand Up @@ -1227,8 +1227,8 @@ define void @trunc() {
; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i64_v64i32 = trunc <64 x i64> undef to <64 x i32>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_v64i1 = trunc <64 x i8> undef to <64 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i16_v64i1 = trunc <64 x i16> undef to <64 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i32_v64i1 = trunc <64 x i32> undef to <64 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64i1 = trunc <64 x i64> undef to <64 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v64i32_v64i1 = trunc <64 x i32> undef to <64 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v64i64_v64i1 = trunc <64 x i64> undef to <64 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i16_v128i8 = trunc <128 x i16> undef to <128 x i8>
; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i32_v128i8 = trunc <128 x i32> undef to <128 x i8>
; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128i64_v128i8 = trunc <128 x i64> undef to <128 x i8>
Expand All @@ -1237,8 +1237,8 @@ define void @trunc() {
; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i64_v128i32 = trunc <128 x i64> undef to <128 x i32>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i8_v128i1 = trunc <128 x i8> undef to <128 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v128i16_v128i1 = trunc <128 x i16> undef to <128 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i32_v128i1 = trunc <128 x i32> undef to <128 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v128i64_v128i1 = trunc <128 x i64> undef to <128 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v128i32_v128i1 = trunc <128 x i32> undef to <128 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v128i64_v128i1 = trunc <128 x i64> undef to <128 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v256i16_v256i8 = trunc <256 x i16> undef to <256 x i8>
; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i32_v256i8 = trunc <256 x i32> undef to <256 x i8>
; RV64-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v256i64_v256i8 = trunc <256 x i64> undef to <256 x i8>
Expand All @@ -1247,8 +1247,8 @@ define void @trunc() {
; RV64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v256i64_v256i32 = trunc <256 x i64> undef to <256 x i32>
; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v256i8_v256i1 = trunc <256 x i8> undef to <256 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v256i16_v256i1 = trunc <256 x i16> undef to <256 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i32_v256i1 = trunc <256 x i32> undef to <256 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v256i64_v256i1 = trunc <256 x i64> undef to <256 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v256i32_v256i1 = trunc <256 x i32> undef to <256 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v256i64_v256i1 = trunc <256 x i64> undef to <256 x i1>
; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1i8 = trunc <vscale x 1 x i16> undef to <vscale x 1 x i8>
; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i32_nxv1i8 = trunc <vscale x 1 x i32> undef to <vscale x 1 x i8>
; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i64_nxv1i8 = trunc <vscale x 1 x i64> undef to <vscale x 1 x i8>
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Analysis/CostModel/RISCV/fca-load-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

define void @load(ptr %p) {
; CHECK-LABEL: 'load'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load [2 x i64], ptr %p, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = load [4 x i64], ptr %p, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = load { i64, i64 }, ptr %p, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = load { i64, i32 }, ptr %p, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load [2 x i64], ptr %p, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = load [4 x i64], ptr %p, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = load { i64, i64 }, ptr %p, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = load { i64, i32 }, ptr %p, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
load [2 x i64], ptr %p
Expand All @@ -20,10 +20,10 @@ define void @load(ptr %p) {

define void @store(ptr %p) {
; CHECK-LABEL: 'store'
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store [2 x i64] undef, ptr %p, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store [4 x i64] undef, ptr %p, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store { i64, i64 } undef, ptr %p, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store { i64, i32 } undef, ptr %p, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store [2 x i64] undef, ptr %p, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store [4 x i64] undef, ptr %p, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store { i64, i64 } undef, ptr %p, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store { i64, i32 } undef, ptr %p, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
store [2 x i64] undef, ptr %p
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Analysis/CostModel/RISCV/load-to-trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
; Check that cost is 1 for unusual load to register sized load.
define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc = trunc i128 %out to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
;
%out = load i128, ptr %ptr
Expand All @@ -19,7 +19,7 @@ define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {

define i128 @loadUnusualInteger(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualInteger'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, ptr %ptr, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
;
%out = load i128, ptr %ptr
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ define void @load(ptr %p) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = load <vscale x 8 x i32>, ptr %p, align 32
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %38 = load <vscale x 16 x i32>, ptr %p, align 64
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %39 = load <vscale x 32 x i32>, ptr %p, align 128
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load i64, ptr %p, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load i64, ptr %p, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <1 x i64>, ptr %p, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = load <2 x i64>, ptr %p, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %43 = load <4 x i64>, ptr %p, align 32
Expand Down Expand Up @@ -187,7 +187,7 @@ define void @store(ptr %p) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <vscale x 8 x i32> undef, ptr %p, align 32
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <vscale x 16 x i32> undef, ptr %p, align 64
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <vscale x 32 x i32> undef, ptr %p, align 128
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr %p, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr %p, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, ptr %p, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr %p, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, ptr %p, align 32
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/CostModel/SystemZ/load-to-trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
; Check that cost is 1 for unusual load to register sized load.
define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
;
Expand All @@ -19,7 +19,7 @@ define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {

define i128 @loadUnusualInteger(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualInteger'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
;
%out = load i128, ptr %ptr
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/CostModel/X86/load-to-trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
; Check that cost is 1 for unusual load to register sized load.
define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
;
Expand All @@ -20,7 +20,7 @@ define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {

define i128 @loadUnusualInteger(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualInteger'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
;
%out = load i128, ptr %ptr
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll
Original file line number Diff line number Diff line change
Expand Up @@ -542,20 +542,20 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1>
Expand All @@ -569,8 +569,8 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
Expand All @@ -587,17 +587,17 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512VL512-LABEL: 'trunc_vXi1'
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
Expand All @@ -608,29 +608,29 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SKX256-LABEL: 'trunc_vXi1'
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1>
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1>
Expand All @@ -650,14 +650,14 @@ define i32 @trunc_vXi1() "min-legal-vector-width"="256" {
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1>
; SKX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1>
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/CostModel/X86/size-cost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ define double @bitcast_i64_f64(i64 %x) {

define ptr @inttoptr_i64_p64(i64 %x) {
; CHECK-LABEL: 'inttoptr_i64_p64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = inttoptr i64 %x to ptr
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = inttoptr i64 %x to ptr
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret ptr %r
;
%r = inttoptr i64 %x to ptr
Expand All @@ -57,7 +57,7 @@ define ptr @inttoptr_i64_p64(i64 %x) {

define i64 @ptrtoint_p64_i64(ptr %x) {
; CHECK-LABEL: 'ptrtoint_p64_i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = ptrtoint ptr %x to i64
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = ptrtoint ptr %x to i64
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
;
%r = ptrtoint ptr %x to i64
Expand Down
325 changes: 21 additions & 304 deletions llvm/test/Analysis/CostModel/X86/trunc-codesize.ll

Large diffs are not rendered by default.

325 changes: 21 additions & 304 deletions llvm/test/Analysis/CostModel/X86/trunc-latency.ll

Large diffs are not rendered by default.

325 changes: 21 additions & 304 deletions llvm/test/Analysis/CostModel/X86/trunc-sizelatency.ll

Large diffs are not rendered by default.

252 changes: 126 additions & 126 deletions llvm/test/Analysis/CostModel/X86/trunc.ll

Large diffs are not rendered by default.

14 changes: 8 additions & 6 deletions llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ define void @test_simple_sink(ptr %base, i64 %offset) {
; CHECK-NEXT: [[TST:%.*]] = load i1, ptr [[ADDR]], align 1
; CHECK-NEXT: br i1 [[TST]], label [[NEXT:%.*]], label [[END:%.*]]
; CHECK: next:
; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[OFFSET]]
; CHECK-NEXT: [[TMP1:%.*]] = load volatile i1, ptr [[SUNKADDR]], align 1
; CHECK-NEXT: [[SUNKADDR:%.*]] = trunc i64 [[OFFSET]] to i32
; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[BASE]], i32 [[SUNKADDR]]
; CHECK-NEXT: [[TMP1:%.*]] = load volatile i1, ptr [[SUNKADDR1]], align 1
; CHECK-NEXT: ret void
; CHECK: end:
; CHECK-NEXT: ret void
Expand All @@ -33,8 +34,9 @@ define void @test_inbounds_sink(ptr %base, i64 %offset) {
; CHECK-NEXT: [[TST:%.*]] = load i1, ptr [[ADDR]], align 1
; CHECK-NEXT: br i1 [[TST]], label [[NEXT:%.*]], label [[END:%.*]]
; CHECK: next:
; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[BASE]], i64 [[OFFSET]]
; CHECK-NEXT: [[TMP1:%.*]] = load volatile i1, ptr [[SUNKADDR]], align 1
; CHECK-NEXT: [[SUNKADDR:%.*]] = trunc i64 [[OFFSET]] to i32
; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr inbounds i8, ptr [[BASE]], i32 [[SUNKADDR]]
; CHECK-NEXT: [[TMP1:%.*]] = load volatile i1, ptr [[SUNKADDR1]], align 1
; CHECK-NEXT: ret void
; CHECK: end:
; CHECK-NEXT: ret void
Expand All @@ -61,8 +63,8 @@ define void @test_add_sink(ptr %base, i64 %offset) {
; CHECK-NEXT: [[TST:%.*]] = load i1, ptr [[ADDR]], align 1
; CHECK-NEXT: br i1 [[TST]], label [[NEXT:%.*]], label [[END:%.*]]
; CHECK: next:
; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[OFFSET]]
; CHECK-NEXT: [[TMP1:%.*]] = load volatile i1, ptr [[SUNKADDR]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[ADDR64]] to ptr
; CHECK-NEXT: [[TMP2:%.*]] = load volatile i1, ptr [[TMP1]], align 1
; CHECK-NEXT: ret void
; CHECK: end:
; CHECK-NEXT: ret void
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/sme-aarch64-svcount-O3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
define target("aarch64.svcount") @test_alloca_store_reload(target("aarch64.svcount") %val0, target("aarch64.svcount") %val1, ptr %iptr, ptr %pptr, i64 %N) nounwind {
; CHECK-LABEL: @test_alloca_store_reload(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i64 0, ptr [[IPTR:%.*]], align 4
; CHECK-NEXT: store i64 0, ptr [[IPTR:%.*]], align 8
; CHECK-NEXT: store target("aarch64.svcount") [[VAL0:%.*]], ptr [[PPTR:%.*]], align 2
; CHECK-NEXT: [[I1_PEEL:%.*]] = icmp eq i64 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[I1_PEEL]], label [[LOOP_EXIT:%.*]], label [[LOOP_BODY:%.*]]
; CHECK: loop.body:
; CHECK-NEXT: [[IND:%.*]] = phi i64 [ [[IND_NEXT:%.*]], [[LOOP_BODY]] ], [ 1, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[IPTR_GEP:%.*]] = getelementptr i64, ptr [[IPTR]], i64 [[IND]]
; CHECK-NEXT: store i64 [[IND]], ptr [[IPTR_GEP]], align 4
; CHECK-NEXT: store i64 [[IND]], ptr [[IPTR_GEP]], align 8
; CHECK-NEXT: store target("aarch64.svcount") [[VAL1:%.*]], ptr [[PPTR]], align 2
; CHECK-NEXT: [[IND_NEXT]] = add i64 [[IND]], 1
; CHECK-NEXT: [[I1:%.*]] = icmp eq i64 [[IND]], [[N]]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,12 @@ define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(ptr addr
define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
; AKF_HSA-NEXT: store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 4
; AKF_HSA-NEXT: store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 8
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
; ATTRIBUTOR_HSA-NEXT: store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 4
; ATTRIBUTOR_HSA-NEXT: store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 8
; ATTRIBUTOR_HSA-NEXT: ret void
;
store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) %out
Expand Down
100 changes: 12 additions & 88 deletions llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,13 @@ define void @sincos_f32(float %x, ptr addrspace(1) nocapture writeonly %sin_out,
; CHECK-LABEL: define void @sincos_f32
; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4
; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[__SINCOS_]])
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__SINCOS_]], align 4
; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4
; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5)
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr
; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4
; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4
; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]])
; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4
; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4
; CHECK-NEXT: ret void
;
entry:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -166,13 +166,13 @@ define void @use_dispatch_id() #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_id
; AKF_HSA-SAME: () #[[ATTR1]] {
; AKF_HSA-NEXT: [[VAL:%.*]] = call i64 @llvm.amdgcn.dispatch.id()
; AKF_HSA-NEXT: store volatile i64 [[VAL]], ptr addrspace(1) undef, align 4
; AKF_HSA-NEXT: store volatile i64 [[VAL]], ptr addrspace(1) undef, align 8
; AKF_HSA-NEXT: ret void
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_dispatch_id
; ATTRIBUTOR_HSA-SAME: () #[[ATTR9:[0-9]+]] {
; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = call i64 @llvm.amdgcn.dispatch.id()
; ATTRIBUTOR_HSA-NEXT: store volatile i64 [[VAL]], ptr addrspace(1) undef, align 4
; ATTRIBUTOR_HSA-NEXT: store volatile i64 [[VAL]], ptr addrspace(1) undef, align 8
; ATTRIBUTOR_HSA-NEXT: ret void
;
%val = call i64 @llvm.amdgcn.dispatch.id()
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ define void @use_everything_else() {
; CHECK-NEXT: [[VAL8:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: store volatile i32 [[VAL8]], ptr addrspace(1) null, align 4
; CHECK-NEXT: [[VAL9:%.*]] = call i64 @llvm.amdgcn.dispatch.id()
; CHECK-NEXT: store volatile i64 [[VAL9]], ptr addrspace(1) null, align 4
; CHECK-NEXT: store volatile i64 [[VAL9]], ptr addrspace(1) null, align 8
; CHECK-NEXT: ret void
;
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/lds-reject-absolute-addresses.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ define amdgpu_kernel void @kern() {
ret void
}

!0 = !{i64 0, i64 1}
!0 = !{i32 0, i32 1}

Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ define void @bar() addrspace(1) {
; CHECK: @[[__INIT_ARRAY_END:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)]
; CHECK: @[[__FINI_ARRAY_START:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)]
; CHECK: @[[__FINI_ARRAY_END:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)]
; CHECK: @[[LLVM_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata"
; CHECK: @[[LLVM_USED:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata"
; CHECK: @[[FOO_ALIAS:[a-zA-Z0-9_$"\\.-]+]] = hidden alias void (), ptr @foo
;.
; CHECK-LABEL: define void @foo(
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ define internal void @bar() {
; CHECK: @[[__INIT_ARRAY_END:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)]
; CHECK: @[[__FINI_ARRAY_START:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)]
; CHECK: @[[__FINI_ARRAY_END:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)]
; CHECK: @[[LLVM_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata"
; CHECK: @[[LLVM_USED:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata"
;.
; CHECK-LABEL: define internal void @foo() {
; CHECK-NEXT: ret void
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

;.
; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t poison, align 8, !absolute_symbol !0
; CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
; CHECK: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t poison, align 16, !absolute_symbol !0
; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t poison, align 16, !absolute_symbol !0
; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t poison, align 2, !absolute_symbol !0
Expand Down Expand Up @@ -99,4 +99,4 @@ define void @f0() {
; CHECK: attributes #3 = { "amdgpu-lds-size"="4" }
; CHECK: attributes #4 = { "amdgpu-lds-size"="9" }

; CHECK: !0 = !{i64 0, i64 1}
; CHECK: !0 = !{i32 0, i32 1}
8 changes: 3 additions & 5 deletions llvm/test/CodeGen/AMDGPU/lower-kernel-lds-super-align.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
; SUPER-ALIGN_ON: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t poison, align 16
; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t poison, align 8

; SUPER-ALIGN_ON: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t poison, align 16
; SUPER-ALIGN_ON: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t poison, align 8
; SUPER-ALIGN_OFF: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t poison, align 4

; CHECK-LABEL: @k1
Expand Down Expand Up @@ -133,11 +133,9 @@ define amdgpu_kernel void @k3(i64 %x) {
; Check that aligment is not propagated if use is not a pointer operand.

; CHECK-LABEL: @k4
; SUPER-ALIGN_ON: store i32 poison, ptr addrspace(3) %gep, align 8
; SUPER-ALIGN_OFF: store i32 poison, ptr addrspace(3) %gep, align 4
; CHECK: store i32 poison, ptr addrspace(3) %gep, align 4
; CHECK: store ptr addrspace(3) %gep, ptr poison, align 4
; SUPER-ALIGN_ON: %val1 = cmpxchg volatile ptr addrspace(3) %gep, i32 1, i32 2 monotonic monotonic, align 8
; SUPER-ALIGN_OFF: %val1 = cmpxchg volatile ptr addrspace(3) %gep, i32 1, i32 2 monotonic monotonic, align 4
; CHECK: %val1 = cmpxchg volatile ptr addrspace(3) %gep, i32 1, i32 2 monotonic monotonic, align 4
; CHECK: %val2 = cmpxchg volatile ptr poison, ptr addrspace(3) %gep, ptr addrspace(3) poison monotonic monotonic, align 4
define amdgpu_kernel void @k4() {
%gep = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) @lds.6, i64 1
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,4 @@ define amdgpu_ps void @k2() {
ret void
}

; CHECK: !0 = !{i64 0, i64 1}
; CHECK: !0 = !{i32 0, i32 1}
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly
; CHECK: attributes #[[ATTR0]] = { "amdgpu-lds-size"="7" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
;.
; CHECK: [[META0:![0-9]+]] = !{i64 0, i64 1}
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 1}
; CHECK: [[META1:![0-9]+]] = !{!2}
; CHECK: [[META2:![0-9]+]] = distinct !{!2, !3}
; CHECK: [[META3:![0-9]+]] = distinct !{!3}
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ bb:
!8 = !{!"omnipotent char", !9, i64 0}
!9 = !{!"Simple C++ TBAA"}

; CHECK:!0 = !{i64 0, i64 1}
; CHECK:!0 = !{i32 0, i32 1}
; CHECK:!1 = !{!2, !3, i64 0}
; CHECK:!2 = !{!"no_clobber_ds_load_stores_x2_preexisting_aa", !3, i64 0}
; CHECK:!3 = !{!"int", !4, i64 0}
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ bb:
ret void
}

; CHECK: !0 = !{i64 0, i64 1}
; CHECK: !0 = !{i32 0, i32 1}
; CHECK: !1 = !{!2}
; CHECK: !2 = distinct !{!2, !3}
; CHECK: !3 = distinct !{!3}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
; CHECK: @dynamic_kernel_only = external addrspace(3) global [0 x double]
; CHECK: @dynamic_shared8 = external addrspace(3) global [0 x i64], align 8
; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t poison, align 4, !absolute_symbol !0
; CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
; CHECK: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"

; Alignment of these must be the maximum of the alignment of the reachable symbols
; CHECK: @llvm.amdgcn.expect_align1.dynlds = external addrspace(3) global [0 x i8], align 1, !absolute_symbol !0
Expand Down Expand Up @@ -103,7 +103,7 @@ define void @use_shared8() #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DYNAMIC_SHARED8]], align 4
; CHECK-NEXT: [[DYNAMIC_SHARED81:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i64], ptr addrspace(3) [[DYNAMIC_SHARED81]], i32 0, i32 7
; CHECK-NEXT: store i64 3, ptr addrspace(3) [[ARRAYIDX]], align 4
; CHECK-NEXT: store i64 3, ptr addrspace(3) [[ARRAYIDX]], align 8
; CHECK-NEXT: ret void
;
%arrayidx = getelementptr inbounds [0 x i64], ptr addrspace(3) @dynamic_shared8, i32 0, i32 7
Expand Down Expand Up @@ -149,7 +149,7 @@ define amdgpu_kernel void @expect_align8() {
; CHECK-LABEL: define amdgpu_kernel void @expect_align8() !llvm.amdgcn.lds.kernel.id !5 {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds) ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i64], ptr addrspace(3) @dynamic_shared8, i32 0, i32 9
; CHECK-NEXT: store i64 3, ptr addrspace(3) [[ARRAYIDX]], align 4
; CHECK-NEXT: store i64 3, ptr addrspace(3) [[ARRAYIDX]], align 8
; CHECK-NEXT: call void @use_shared8()
; CHECK-NEXT: ret void
;
Expand Down Expand Up @@ -188,8 +188,8 @@ attributes #0 = { noinline }
; CHECK: attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

; CHECK: !0 = !{i64 0, i64 1}
; CHECK: !1 = !{i64 4, i64 5}
; CHECK: !0 = !{i32 0, i32 1}
; CHECK: !1 = !{i32 4, i32 5}
; CHECK: !2 = !{i32 0}
; CHECK: !3 = !{i32 1}
; CHECK: !4 = !{i32 2}
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/lower-module-lds-used-list.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
@llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(3) @tolower to ptr), ptr addrspacecast (ptr addrspace(1) @ignored to ptr)], section "llvm.metadata"

; @ignored still in list, @tolower removed, llvm.amdgcn.module.lds appended
; CHECK: @llvm.compiler.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(1) @ignored to ptr), ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
; CHECK: @llvm.compiler.used = appending addrspace(1) global [2 x ptr] [ptr addrspacecast (ptr addrspace(1) @ignored to ptr), ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"

@llvm.compiler.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(3) @tolower to ptr), ptr addrspacecast (ptr addrspace(1) @ignored to ptr)], section "llvm.metadata"

Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn--amdhsa -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=hybrid | FileCheck -check-prefix=OPT %s
; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s --amdgpu-lower-module-lds-strategy=hybrid | FileCheck -check-prefix=GCN %s

Expand All @@ -12,7 +12,7 @@
@unused = addrspace(3) global i16 poison

; OPT: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t poison, align 16, !absolute_symbol !0
; OPT: @llvm.compiler.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
; OPT: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
; OPT: @llvm.amdgcn.kernel.kernel_no_table.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_no_table.lds.t poison, align 8, !absolute_symbol !0
; OPT: @llvm.amdgcn.kernel.k01.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k01.lds.t poison, align 4, !absolute_symbol !1
; OPT: @llvm.amdgcn.kernel.k23.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k23.lds.t poison, align 8, !absolute_symbol !0
Expand Down Expand Up @@ -73,12 +73,12 @@ define void @f2() {
; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V22]], align 4
; OPT-NEXT: [[V23:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 4
; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 8
; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 4
; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V2]], align 4
; OPT-NEXT: [[V21:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 4
; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 8
; OPT-NEXT: ret void
;
; GCN-LABEL: f2:
Expand Down Expand Up @@ -193,7 +193,7 @@ define amdgpu_kernel void @k01() {

define amdgpu_kernel void @k23() {
; OPT-LABEL: @k23(
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ], !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ], !alias.scope !4, !noalias !7
; OPT-NEXT: call void @f2()
; OPT-NEXT: call void @f3()
; OPT-NEXT: ret void
Expand Down Expand Up @@ -231,12 +231,12 @@ define amdgpu_kernel void @k23() {
; Access and allocate three variables
define amdgpu_kernel void @k123() {
; OPT-LABEL: @k123(
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ], !alias.scope [[META10:![0-9]+]], !noalias [[META13:![0-9]+]]
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ], !alias.scope !10, !noalias !13
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; OPT-NEXT: call void @f1()
; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope [[META13]], !noalias [[META10]]
; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !13, !noalias !10
; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 8
; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope [[META13]], !noalias [[META10]]
; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !13, !noalias !10
; OPT-NEXT: call void @f2()
; OPT-NEXT: ret void
;
Expand Down Expand Up @@ -285,14 +285,14 @@ define amdgpu_kernel void @k123() {


; OPT: attributes #0 = { "amdgpu-lds-size"="8" }
; OPT: attributes #1 = { "amdgpu-lds-size"="12" }
; OPT: attributes #2 = { "amdgpu-lds-size"="20" }
; OPT: attributes #1 = { "amdgpu-lds-size"="16" }
; OPT: attributes #2 = { "amdgpu-lds-size"="24" }
; OPT: attributes #3 = { nocallback nofree nosync nounwind willreturn memory(none) }
; OPT: attributes #4 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

; OPT: !0 = !{i64 0, i64 1}
; OPT: !1 = !{i64 4, i64 5}
; OPT: !2 = !{i64 8, i64 9}
; OPT: !0 = !{i32 0, i32 1}
; OPT: !1 = !{i32 4, i32 5}
; OPT: !2 = !{i32 8, i32 9}
; OPT: !3 = !{i32 1}
; OPT: !4 = !{!5}
; OPT: !5 = distinct !{!5, !6}
Expand Down
9 changes: 4 additions & 5 deletions llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,12 @@ define void @f2() {
; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V22]], align 4
; OPT-NEXT: [[V23:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 4
; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 8
; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 4
; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2
; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V2]], align 4
; OPT-NEXT: [[V21:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 4
; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 8
; OPT-NEXT: ret void
;
; GCN-LABEL: f2:
Expand Down Expand Up @@ -300,7 +300,7 @@ define amdgpu_kernel void @k23() {
; Access and allocate three variables
define amdgpu_kernel void @k123() {
; OPT-LABEL: define amdgpu_kernel void @k123(
; OPT-SAME: ) #[[ATTR2:[0-9]+]] !llvm.amdgcn.lds.kernel.id !13 {
; OPT-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id !13 {
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ], !alias.scope !14, !noalias !17
; OPT-NEXT: call void @f1()
; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope !20, !noalias !21
Expand Down Expand Up @@ -352,8 +352,7 @@ define amdgpu_kernel void @k123() {
; OPT: declare i32 @llvm.amdgcn.lds.kernel.id()

; OPT: attributes #0 = { "amdgpu-lds-size"="8" }
; OPT: attributes #1 = { "amdgpu-lds-size"="12" }
; OPT: attributes #2 = { "amdgpu-lds-size"="16" }
; OPT: attributes #1 = { "amdgpu-lds-size"="16" }

!0 = !{i64 0, i64 1}
!1 = !{i32 0}
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
; CHECK: @__init_array_end = external addrspace(1) constant [0 x ptr addrspace(1)]
; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
; CHECK: @__fini_array_end = external addrspace(1) constant [0 x ptr addrspace(1)]
; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini]
; CHECK: @llvm.used = appending addrspace(1) global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini]
; UTC_ARGS: --enable


Expand Down
4 changes: 3 additions & 1 deletion llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(ptr addrspace(3) nocap
; IR-NEXT: bb:
; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4:[0-9]+]]
; IR-NEXT: [[MY_TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(3) [[ARG:%.*]], i32 [[MY_TMP]]
; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, ptr addrspace(3) [[MY_TMP1]], align 4
; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, ptr addrspace(3) [[MY_TMP1]], align 8
; IR-NEXT: br label [[BB5:%.*]]
; IR: bb3:
; IR-NEXT: br i1 true, label [[BB4:%.*]], label [[BB13:%.*]]
Expand Down Expand Up @@ -93,6 +93,7 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(ptr addrspace(3) nocap
; IR: bb23:
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]])
; IR-NEXT: ret void
;
bb:
%my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%my.tmp1 = getelementptr inbounds i64, ptr addrspace(3) %arg, i32 %my.tmp
Expand Down Expand Up @@ -276,6 +277,7 @@ define amdgpu_kernel void @nested_loop_conditions(ptr addrspace(1) nocapture %ar
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]])
; IR-NEXT: store volatile i32 0, ptr addrspace(1) undef, align 4
; IR-NEXT: ret void
;
bb:
%my.tmp1134 = load volatile i32, ptr addrspace(1) undef
%my.tmp1235 = icmp slt i32 %my.tmp1134, 9
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AMDGPU/opencl-printf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ define amdgpu_kernel void @format_str_f(float %f32.0, double %f64, float %f32.1,
; GCN-NEXT: [[PRINTBUFFNEXTPTR5:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR4]], i32 4
; GCN-NEXT: store i32 [[I32:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR5]], align 4
; GCN-NEXT: [[PRINTBUFFNEXTPTR6:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR5]], i32 4
; GCN-NEXT: store i64 [[I64:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], align 4
; GCN-NEXT: store i64 [[I64:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], align 8
; GCN-NEXT: [[PRINTBUFFNEXTPTR7:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], i32 8
; GCN-NEXT: store <2 x float> <float 1.000000e+00, float 2.000000e+00>, ptr addrspace(1) [[PRINTBUFFNEXTPTR7]], align 8
; GCN-NEXT: [[PRINTBUFFNEXTPTR8:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR7]], i32 8
Expand All @@ -108,7 +108,7 @@ define void @format_str_ptr(ptr %ptr.flat, ptr addrspace(3) %ptr.lds, ptr addrsp
; R600-NEXT: ret void
;
; GCN-LABEL: @format_str_ptr(
; GCN-NEXT: [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 44)
; GCN-NEXT: [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 36)
; GCN-NEXT: br label [[DOTSPLIT:%.*]]
; GCN: .split:
; GCN-NEXT: [[TMP1:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null
Expand All @@ -120,12 +120,12 @@ define void @format_str_ptr(ptr %ptr.flat, ptr addrspace(3) %ptr.lds, ptr addrsp
; GCN-NEXT: [[PRINTBUFFGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 4
; GCN-NEXT: store ptr [[PTR_FLAT:%.*]], ptr addrspace(1) [[PRINTBUFFGEP]], align 8
; GCN-NEXT: [[PRINTBUFFNEXTPTR:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFGEP]], i32 8
; GCN-NEXT: store ptr addrspace(3) [[PTR_LDS:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR]], align 8
; GCN-NEXT: [[PRINTBUFFNEXTPTR1:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR]], i32 8
; GCN-NEXT: store ptr addrspace(3) [[PTR_LDS:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR]], align 4
; GCN-NEXT: [[PRINTBUFFNEXTPTR1:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR]], i32 4
; GCN-NEXT: store ptr addrspace(1) [[PTR_GLOBAL:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR1]], align 8
; GCN-NEXT: [[PRINTBUFFNEXTPTR2:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR1]], i32 8
; GCN-NEXT: store ptr addrspace(5) [[PTR_STACK:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR2]], align 8
; GCN-NEXT: [[PRINTBUFFNEXTPTR3:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR2]], i32 8
; GCN-NEXT: store ptr addrspace(5) [[PTR_STACK:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR2]], align 4
; GCN-NEXT: [[PRINTBUFFNEXTPTR3:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR2]], i32 4
; GCN-NEXT: store ptr addrspace(4) [[PTR_CONST:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR3]], align 8
; GCN-NEXT: br label [[TMP3]]
; GCN: 3:
Expand All @@ -145,7 +145,7 @@ define amdgpu_kernel void @format_str_d(i1 %i1, i4 %i4, i8 %i8, i24 %i24, i16 %i
; GCN-NEXT: [[TMP2:%.*]] = sext i4 [[I4:%.*]] to i32
; GCN-NEXT: [[TMP3:%.*]] = sext i8 [[I8:%.*]] to i32
; GCN-NEXT: [[TMP4:%.*]] = sext i16 [[I16:%.*]] to i32
; GCN-NEXT: [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 68)
; GCN-NEXT: [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 72)
; GCN-NEXT: br label [[DOTSPLIT:%.*]]
; GCN: .split:
; GCN-NEXT: [[TMP5:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null
Expand All @@ -167,11 +167,11 @@ define amdgpu_kernel void @format_str_d(i1 %i1, i4 %i4, i8 %i8, i24 %i24, i16 %i
; GCN-NEXT: [[PRINTBUFFNEXTPTR4:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR3]], i32 4
; GCN-NEXT: store i32 [[I32:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR4]], align 4
; GCN-NEXT: [[PRINTBUFFNEXTPTR5:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR4]], i32 4
; GCN-NEXT: store i64 [[I64:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR5]], align 4
; GCN-NEXT: store i64 [[I64:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR5]], align 8
; GCN-NEXT: [[PRINTBUFFNEXTPTR6:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR5]], i32 8
; GCN-NEXT: store i96 [[I96:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], align 4
; GCN-NEXT: [[PRINTBUFFNEXTPTR7:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], i32 12
; GCN-NEXT: store i128 [[I128:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR7]], align 4
; GCN-NEXT: store i96 [[I96:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], align 8
; GCN-NEXT: [[PRINTBUFFNEXTPTR7:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], i32 16
; GCN-NEXT: store i128 [[I128:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR7]], align 8
; GCN-NEXT: [[PRINTBUFFNEXTPTR8:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR7]], i32 16
; GCN-NEXT: store i32 1234, ptr addrspace(1) [[PRINTBUFFNEXTPTR8]], align 4
; GCN-NEXT: br label [[TMP7]]
Expand All @@ -192,7 +192,7 @@ define amdgpu_kernel void @format_str_u(i1 %i1, i4 %i4, i8 %i8, i24 %i24, i16 %i
; GCN-NEXT: [[TMP2:%.*]] = zext i4 [[I4:%.*]] to i32
; GCN-NEXT: [[TMP3:%.*]] = zext i8 [[I8:%.*]] to i32
; GCN-NEXT: [[TMP4:%.*]] = zext i16 [[I16:%.*]] to i32
; GCN-NEXT: [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 68)
; GCN-NEXT: [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 72)
; GCN-NEXT: br label [[DOTSPLIT:%.*]]
; GCN: .split:
; GCN-NEXT: [[TMP5:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null
Expand All @@ -214,11 +214,11 @@ define amdgpu_kernel void @format_str_u(i1 %i1, i4 %i4, i8 %i8, i24 %i24, i16 %i
; GCN-NEXT: [[PRINTBUFFNEXTPTR4:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR3]], i32 4
; GCN-NEXT: store i32 [[I32:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR4]], align 4
; GCN-NEXT: [[PRINTBUFFNEXTPTR5:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR4]], i32 4
; GCN-NEXT: store i64 [[I64:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR5]], align 4
; GCN-NEXT: store i64 [[I64:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR5]], align 8
; GCN-NEXT: [[PRINTBUFFNEXTPTR6:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR5]], i32 8
; GCN-NEXT: store i96 [[I96:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], align 4
; GCN-NEXT: [[PRINTBUFFNEXTPTR7:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], i32 12
; GCN-NEXT: store i128 [[I128:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR7]], align 4
; GCN-NEXT: store i96 [[I96:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], align 8
; GCN-NEXT: [[PRINTBUFFNEXTPTR7:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], i32 16
; GCN-NEXT: store i128 [[I128:%.*]], ptr addrspace(1) [[PRINTBUFFNEXTPTR7]], align 8
; GCN-NEXT: [[PRINTBUFFNEXTPTR8:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR7]], i32 16
; GCN-NEXT: store i32 1234, ptr addrspace(1) [[PRINTBUFFNEXTPTR8]], align 4
; GCN-NEXT: br label [[TMP7]]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,10 @@ define i32 @cmpxchg_private_i32(ptr addrspace(5) %ptr) {

define i64 @cmpxchg_private_i64(ptr addrspace(5) %ptr) {
; IR-LABEL: @cmpxchg_private_i64(
; IR-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(5) [[PTR:%.*]], align 4
; IR-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(5) [[PTR:%.*]], align 8
; IR-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
; IR-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 1, i64 [[TMP1]]
; IR-NEXT: store i64 [[TMP3]], ptr addrspace(5) [[PTR]], align 4
; IR-NEXT: store i64 [[TMP3]], ptr addrspace(5) [[PTR]], align 8
; IR-NEXT: [[TMP4:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP1]], 0
; IR-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } [[TMP4]], i1 [[TMP2]], 1
; IR-NEXT: [[RESULT_0:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ define amdgpu_vs void @promote_load_from_store_aggr() #0 {
; CHECK-NEXT: [[FOO3_FCA_0_EXTRACT:%.*]] = extractvalue [2 x float] [[FOO3]], 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float [[FOO3_FCA_0_EXTRACT]], i32 0
; CHECK-NEXT: [[FOO3_FCA_1_EXTRACT:%.*]] = extractvalue [2 x float] [[FOO3]], 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[FOO3_FCA_1_EXTRACT]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[FOO3_FCA_1_EXTRACT]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 [[FOO1]]
; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[TMP3]], i32 1
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ define amdgpu_kernel void @memset_all_zero(i64 %val) {
; CHECK-LABEL: @memset_all_zero(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <6 x i64> zeroinitializer, i64 [[VAL:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <6 x i64> [[TMP0]], i64 [[VAL]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <6 x i64> [[TMP0]], i64 [[VAL]], i32 1
; CHECK-NEXT: ret void
;
entry:
Expand All @@ -24,7 +24,7 @@ define amdgpu_kernel void @memset_all_5(i64 %val) {
; CHECK-LABEL: @memset_all_5(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> <i64 361700864190383365, i64 361700864190383365, i64 361700864190383365, i64 361700864190383365>, i64 [[VAL:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL]], i32 1
; CHECK-NEXT: ret void
;
entry:
Expand All @@ -42,7 +42,7 @@ define amdgpu_kernel void @memset_volatile_nopromote(i64 %val) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[STACK]], i8 0, i64 32, i1 true)
; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 4
; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8
; CHECK-NEXT: ret void
;
entry:
Expand All @@ -57,7 +57,7 @@ define amdgpu_kernel void @memset_badsize_nopromote(i64 %val) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[STACK]], i8 0, i64 31, i1 true)
; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 4
; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8
; CHECK-NEXT: ret void
;
entry:
Expand All @@ -73,7 +73,7 @@ define amdgpu_kernel void @memset_offset_ptr_nopromote(i64 %val) {
; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [4 x i64], ptr addrspace(5) [[STACK]], i64 0, i64 1
; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[GEP]], i8 0, i64 24, i1 true)
; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 4
; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8
; CHECK-NEXT: ret void
;
entry:
Expand Down
3 changes: 0 additions & 3 deletions llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-rewrite-out-arguments < %s | FileCheck %s
; Temporarily add an explicit datalayout until https://reviews.llvm.org/D141060 lands
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
target triple = "amdgcn-amd-amdhsa"

define void @no_ret_blocks() #0 {
unreachable
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/BPF/CORE/field-reloc-bitfield-1-bpfeb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ define dso_local i32 @test(ptr %arg) local_unnamed_addr #0 !dbg !13 {
; CHECK-ALU64-NEXT: # %bb.0: # %entry
; CHECK-ALU64-NEXT: #DEBUG_VALUE: test:arg <- $r1
; CHECK-ALU64-NEXT: .Ltmp0:
; CHECK-ALU64-NEXT: r1 = 20
; CHECK-ALU64-NEXT: r1 = 16
; CHECK-ALU64-NEXT: .Ltmp1:
; CHECK-ALU64-NEXT: .Ltmp2:
; CHECK-ALU64-NEXT: .Ltmp3:
; CHECK-ALU64-NEXT: r0 = 4
; CHECK-ALU64-NEXT: r0 = 8
; CHECK-ALU64-NEXT: .Ltmp4:
; CHECK-ALU64-NEXT: .loc 1 12 69 prologue_end # test.c:12:69
; CHECK-ALU64-NEXT: .Ltmp5:
Expand All @@ -67,11 +67,11 @@ define dso_local i32 @test(ptr %arg) local_unnamed_addr #0 !dbg !13 {
; CHECK-ALU32-NEXT: # %bb.0: # %entry
; CHECK-ALU32-NEXT: #DEBUG_VALUE: test:arg <- $r1
; CHECK-ALU32-NEXT: .Ltmp0:
; CHECK-ALU32-NEXT: r1 = 20
; CHECK-ALU32-NEXT: r1 = 16
; CHECK-ALU32-NEXT: .Ltmp1:
; CHECK-ALU32-NEXT: .Ltmp2:
; CHECK-ALU32-NEXT: .Ltmp3:
; CHECK-ALU32-NEXT: r0 = 4
; CHECK-ALU32-NEXT: r0 = 8
; CHECK-ALU32-NEXT: .Ltmp4:
; CHECK-ALU32-NEXT: .loc 1 12 69 prologue_end # test.c:12:69
; CHECK-ALU32-NEXT: .Ltmp5:
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/BPF/CORE/field-reloc-bitfield-1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,18 @@ define dso_local i32 @test(ptr %arg) local_unnamed_addr #0 !dbg !13 {
; CHECK-ALU64-NEXT: # %bb.0: # %entry
; CHECK-ALU64-NEXT: #DEBUG_VALUE: test:arg <- $r1
; CHECK-ALU64-NEXT: .Ltmp0:
; CHECK-ALU64-NEXT: r1 = 20
; CHECK-ALU64-NEXT: r1 = 16
; CHECK-ALU64-NEXT: .Ltmp1:
; CHECK-ALU64-NEXT: .Ltmp2:
; CHECK-ALU64-NEXT: .Ltmp3:
; CHECK-ALU64-NEXT: r0 = 4
; CHECK-ALU64-NEXT: r0 = 8
; CHECK-ALU64-NEXT: .Ltmp4:
; CHECK-ALU64-NEXT: .loc 1 12 69 prologue_end # test.c:12:69
; CHECK-ALU64-NEXT: .Ltmp5:
; CHECK-ALU64-NEXT: .Ltmp6:
; CHECK-ALU64-NEXT: r0 += r1
; CHECK-ALU64-NEXT: .Ltmp7:
; CHECK-ALU64-NEXT: r1 = 50
; CHECK-ALU64-NEXT: r1 = 18
; CHECK-ALU64-NEXT: .loc 1 13 67 # test.c:13:67
; CHECK-ALU64-NEXT: .Ltmp8:
; CHECK-ALU64-NEXT: r0 += r1
Expand All @@ -67,18 +67,18 @@ define dso_local i32 @test(ptr %arg) local_unnamed_addr #0 !dbg !13 {
; CHECK-ALU32-NEXT: # %bb.0: # %entry
; CHECK-ALU32-NEXT: #DEBUG_VALUE: test:arg <- $r1
; CHECK-ALU32-NEXT: .Ltmp0:
; CHECK-ALU32-NEXT: r1 = 20
; CHECK-ALU32-NEXT: r1 = 16
; CHECK-ALU32-NEXT: .Ltmp1:
; CHECK-ALU32-NEXT: .Ltmp2:
; CHECK-ALU32-NEXT: .Ltmp3:
; CHECK-ALU32-NEXT: r0 = 4
; CHECK-ALU32-NEXT: r0 = 8
; CHECK-ALU32-NEXT: .Ltmp4:
; CHECK-ALU32-NEXT: .loc 1 12 69 prologue_end # test.c:12:69
; CHECK-ALU32-NEXT: .Ltmp5:
; CHECK-ALU32-NEXT: .Ltmp6:
; CHECK-ALU32-NEXT: w0 += w1
; CHECK-ALU32-NEXT: .Ltmp7:
; CHECK-ALU32-NEXT: r1 = 50
; CHECK-ALU32-NEXT: r1 = 18
; CHECK-ALU32-NEXT: .loc 1 13 67 # test.c:13:67
; CHECK-ALU32-NEXT: .Ltmp8:
; CHECK-ALU32-NEXT: w0 += w1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/expand-large-div-rem-sdiv129.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
define void @sdiv129(ptr %ptr, ptr %out) nounwind {
; CHECK-LABEL: @sdiv129(
; CHECK-NEXT: _udiv-special-cases:
; CHECK-NEXT: [[A:%.*]] = load i129, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: [[A:%.*]] = load i129, ptr [[PTR:%.*]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = freeze i129 [[A]]
; CHECK-NEXT: [[TMP1:%.*]] = freeze i129 3
; CHECK-NEXT: [[TMP2:%.*]] = ashr i129 [[TMP0]], 128
Expand Down Expand Up @@ -66,7 +66,7 @@ define void @sdiv129(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP48:%.*]] = phi i129 [ [[TMP25]], [[UDIV_LOOP_EXIT]] ], [ [[TMP20]], [[_UDIV_SPECIAL_CASES:%.*]] ]
; CHECK-NEXT: [[TMP49:%.*]] = xor i129 [[TMP48]], [[TMP8]]
; CHECK-NEXT: [[TMP50:%.*]] = sub i129 [[TMP49]], [[TMP8]]
; CHECK-NEXT: store i129 [[TMP50]], ptr [[OUT:%.*]], align 4
; CHECK-NEXT: store i129 [[TMP50]], ptr [[OUT:%.*]], align 16
; CHECK-NEXT: ret void
;
%a = load i129, ptr %ptr
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/expand-large-div-rem-srem129.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-LABEL: @test(
; CHECK-NEXT: _udiv-special-cases:
; CHECK-NEXT: [[A:%.*]] = load i129, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: [[A:%.*]] = load i129, ptr [[PTR:%.*]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = freeze i129 [[A]]
; CHECK-NEXT: [[TMP1:%.*]] = freeze i129 3
; CHECK-NEXT: [[TMP2:%.*]] = ashr i129 [[TMP0]], 128
Expand Down Expand Up @@ -69,7 +69,7 @@ define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP51:%.*]] = sub i129 [[TMP8]], [[TMP50]]
; CHECK-NEXT: [[TMP52:%.*]] = xor i129 [[TMP51]], [[TMP2]]
; CHECK-NEXT: [[TMP53:%.*]] = sub i129 [[TMP52]], [[TMP2]]
; CHECK-NEXT: store i129 [[TMP53]], ptr [[OUT:%.*]], align 4
; CHECK-NEXT: store i129 [[TMP53]], ptr [[OUT:%.*]], align 16
; CHECK-NEXT: ret void
;
%a = load i129, ptr %ptr
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/expand-large-div-rem-udiv129.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-LABEL: @test(
; CHECK-NEXT: _udiv-special-cases:
; CHECK-NEXT: [[A:%.*]] = load i129, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: [[A:%.*]] = load i129, ptr [[PTR:%.*]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = freeze i129 3
; CHECK-NEXT: [[TMP1:%.*]] = freeze i129 [[A]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i129 [[TMP0]], 0
Expand Down Expand Up @@ -55,7 +55,7 @@ define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: br i1 [[TMP38]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
; CHECK: udiv-end:
; CHECK-NEXT: [[TMP39:%.*]] = phi i129 [ [[TMP16]], [[UDIV_LOOP_EXIT]] ], [ [[TMP11]], [[_UDIV_SPECIAL_CASES:%.*]] ]
; CHECK-NEXT: store i129 [[TMP39]], ptr [[OUT:%.*]], align 4
; CHECK-NEXT: store i129 [[TMP39]], ptr [[OUT:%.*]], align 16
; CHECK-NEXT: ret void
;
%a = load i129, ptr %ptr
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/expand-large-div-rem-urem129.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-LABEL: @test(
; CHECK-NEXT: _udiv-special-cases:
; CHECK-NEXT: [[A:%.*]] = load i129, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: [[A:%.*]] = load i129, ptr [[PTR:%.*]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = freeze i129 [[A]]
; CHECK-NEXT: [[TMP1:%.*]] = freeze i129 3
; CHECK-NEXT: [[TMP2:%.*]] = freeze i129 [[TMP1]]
Expand Down Expand Up @@ -59,7 +59,7 @@ define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP41:%.*]] = phi i129 [ [[TMP18]], [[UDIV_LOOP_EXIT]] ], [ [[TMP13]], [[_UDIV_SPECIAL_CASES:%.*]] ]
; CHECK-NEXT: [[TMP42:%.*]] = mul i129 [[TMP1]], [[TMP41]]
; CHECK-NEXT: [[TMP43:%.*]] = sub i129 [[TMP0]], [[TMP42]]
; CHECK-NEXT: store i129 [[TMP43]], ptr [[OUT:%.*]], align 4
; CHECK-NEXT: store i129 [[TMP43]], ptr [[OUT:%.*]], align 16
; CHECK-NEXT: ret void
;
%a = load i129, ptr %ptr
Expand Down
120 changes: 60 additions & 60 deletions llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ target triple = "i386-unknown-linux-gnu"
define i32 @foo() #0 {
; CHECK-LABEL: define i32 @foo() comdat {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @__sanitizer_cov_trace_pc_guard(ptr @__sancov_gen_) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: call void @__sanitizer_cov_trace_pc_guard(ptr inttoptr (i32 ptrtoint (ptr @__sancov_gen_ to i32) to ptr)) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: ret i32 0
;
entry:
ret i32 0
}

; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_indir(i64)
; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_indir(i32)
; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp1(i8 zeroext, i8 zeroext)
; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp2(i16 zeroext, i16 zeroext)
; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp4(i32 zeroext, i32 zeroext)
Expand All @@ -24,7 +24,7 @@ entry:
; CHECK-DAG: declare void @__sanitizer_cov_trace_const_cmp8(i64, i64)
; CHECK-DAG: declare void @__sanitizer_cov_trace_div4(i32 zeroext)
; CHECK-DAG: declare void @__sanitizer_cov_trace_div8(i64)
; CHECK-DAG: declare void @__sanitizer_cov_trace_gep(i64)
; CHECK-DAG: declare void @__sanitizer_cov_trace_gep(i32)
; CHECK-DAG: declare void @__sanitizer_cov_trace_switch(i64, ptr)
; CHECK-DAG: declare void @__sanitizer_cov_trace_pc()
; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_guard(ptr)
Expand Down
1,766 changes: 1,160 additions & 606 deletions llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll

Large diffs are not rendered by default.

306 changes: 144 additions & 162 deletions llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -165,32 +165,31 @@ define half @test_atomicrmw_fmax_f16_global_align4(ptr addrspace(1) %ptr, half %

define half @test_atomicrmw_fmax_f16_local(ptr addrspace(3) %ptr, half %value) {
; GCN-LABEL: @test_atomicrmw_fmax_f16_local(
; GCN-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) [[PTR:%.*]], i64 -4)
; GCN-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i64
; GCN-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; GCN-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; GCN-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; GCN-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; GCN-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
; GCN-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
; GCN-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
; GCN-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
; GCN-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]]
; GCN-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; GCN-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4
; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
; GCN: atomicrmw.start:
; GCN-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; GCN-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
; GCN-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; GCN-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to half
; GCN-NEXT: [[TMP5:%.*]] = call half @llvm.maxnum.f16(half [[TMP4]], half [[VALUE:%.*]])
; GCN-NEXT: [[TMP6:%.*]] = bitcast half [[TMP5]] to i16
; GCN-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP6]] to i32
; GCN-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
; GCN-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; GCN-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GCN-NEXT: [[TMP7:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1
; GCN-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0
; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GCN: atomicrmw.end:
; GCN-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
; GCN-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
; GCN-NEXT: [[TMP8:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GCN-NEXT: ret half [[TMP8]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,32 +165,31 @@ define half @test_atomicrmw_fmin_f16_global_align4(ptr addrspace(1) %ptr, half %

define half @test_atomicrmw_fmin_f16_local(ptr addrspace(3) %ptr, half %value) {
; GCN-LABEL: @test_atomicrmw_fmin_f16_local(
; GCN-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) [[PTR:%.*]], i64 -4)
; GCN-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i64
; GCN-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; GCN-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; GCN-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; GCN-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; GCN-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
; GCN-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
; GCN-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
; GCN-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
; GCN-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]]
; GCN-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; GCN-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4
; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
; GCN: atomicrmw.start:
; GCN-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; GCN-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
; GCN-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; GCN-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to half
; GCN-NEXT: [[TMP5:%.*]] = call half @llvm.minnum.f16(half [[TMP4]], half [[VALUE:%.*]])
; GCN-NEXT: [[TMP6:%.*]] = bitcast half [[TMP5]] to i16
; GCN-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP6]] to i32
; GCN-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
; GCN-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; GCN-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GCN-NEXT: [[TMP7:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1
; GCN-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0
; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GCN: atomicrmw.end:
; GCN-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
; GCN-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
; GCN-NEXT: [[TMP8:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GCN-NEXT: ret half [[TMP8]]
Expand Down
51 changes: 24 additions & 27 deletions llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fsub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -165,32 +165,31 @@ define half @test_atomicrmw_fsub_f16_global_align4(ptr addrspace(1) %ptr, half %

define half @test_atomicrmw_fsub_f16_local(ptr addrspace(3) %ptr, half %value) {
; GCN-LABEL: @test_atomicrmw_fsub_f16_local(
; GCN-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) [[PTR:%.*]], i64 -4)
; GCN-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i64
; GCN-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; GCN-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; GCN-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; GCN-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; GCN-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
; GCN-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
; GCN-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
; GCN-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
; GCN-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]]
; GCN-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; GCN-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4
; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
; GCN: atomicrmw.start:
; GCN-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; GCN-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
; GCN-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; GCN-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to half
; GCN-NEXT: [[NEW:%.*]] = fsub half [[TMP4]], [[VALUE:%.*]]
; GCN-NEXT: [[TMP5:%.*]] = bitcast half [[NEW]] to i16
; GCN-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
; GCN-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
; GCN-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; GCN-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GCN-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GCN-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GCN: atomicrmw.end:
; GCN-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
; GCN-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
; GCN-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to half
; GCN-NEXT: ret half [[TMP7]]
Expand Down Expand Up @@ -285,32 +284,31 @@ define float @test_atomicrmw_fsub_f32_global_strictfp(ptr addrspace(1) %ptr, flo

define bfloat @test_atomicrmw_fadd_bf16_local(ptr addrspace(3) %ptr, bfloat %value) {
; GCN-LABEL: @test_atomicrmw_fadd_bf16_local(
; GCN-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) [[PTR:%.*]], i64 -4)
; GCN-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i64
; GCN-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; GCN-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; GCN-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; GCN-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; GCN-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
; GCN-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
; GCN-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
; GCN-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
; GCN-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]]
; GCN-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; GCN-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4
; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
; GCN: atomicrmw.start:
; GCN-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; GCN-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
; GCN-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; GCN-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
; GCN-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP4]], [[VALUE:%.*]]
; GCN-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16
; GCN-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
; GCN-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
; GCN-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; GCN-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GCN-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] monotonic monotonic, align 4
; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GCN-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GCN: atomicrmw.end:
; GCN-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
; GCN-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
; GCN-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to bfloat
; GCN-NEXT: ret bfloat [[TMP7]]
Expand Down Expand Up @@ -471,32 +469,31 @@ define bfloat @test_atomicrmw_fadd_bf16_global_system_align4(ptr addrspace(1) %p

define bfloat @test_atomicrmw_fadd_bf16_local_strictfp(ptr addrspace(3) %ptr, bfloat %value) #2 {
; GCN-LABEL: @test_atomicrmw_fadd_bf16_local_strictfp(
; GCN-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) [[PTR:%.*]], i64 -4)
; GCN-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i64
; GCN-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; GCN-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; GCN-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; GCN-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; GCN-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
; GCN-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
; GCN-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
; GCN-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
; GCN-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]]
; GCN-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; GCN-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4
; GCN-NEXT: br label [[ATOMICRMW_START:%.*]]
; GCN: atomicrmw.start:
; GCN-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; GCN-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
; GCN-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; GCN-NEXT: [[TMP4:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
; GCN-NEXT: [[NEW:%.*]] = fadd bfloat [[TMP4]], [[VALUE:%.*]]
; GCN-NEXT: [[TMP5:%.*]] = bitcast bfloat [[NEW]] to i16
; GCN-NEXT: [[EXTENDED:%.*]] = zext i16 [[TMP5]] to i32
; GCN-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
; GCN-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; GCN-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; GCN-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] monotonic monotonic, align 4
; GCN-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; GCN-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; GCN-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; GCN: atomicrmw.end:
; GCN-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; GCN-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
; GCN-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
; GCN-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED3]] to bfloat
; GCN-NEXT: ret bfloat [[TMP7]]
Expand Down
14 changes: 5 additions & 9 deletions llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,12 @@ define i1 @test_cmpxchg_seq_cst(ptr %addr, i128 %desire, i128 %new) {
; PWR7-NEXT: [[TMP0:%.*]] = alloca i128, align 8
; PWR7-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP0]])
; PWR7-NEXT: store i128 [[DESIRE:%.*]], ptr [[TMP0]], align 8
; PWR7-NEXT: [[TMP1:%.*]] = alloca i128, align 8
; PWR7-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP1]])
; PWR7-NEXT: store i128 [[NEW:%.*]], ptr [[TMP1]], align 8
; PWR7-NEXT: [[TMP2:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 16, ptr [[ADDR:%.*]], ptr [[TMP0]], ptr [[TMP1]], i32 5, i32 5)
; PWR7-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP1]])
; PWR7-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP0]], align 8
; PWR7-NEXT: [[TMP1:%.*]] = call zeroext i1 @__atomic_compare_exchange_16(ptr [[ADDR:%.*]], ptr [[TMP0]], i128 [[NEW:%.*]], i32 5, i32 5)
; PWR7-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8
; PWR7-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP0]])
; PWR7-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP3]], 0
; PWR7-NEXT: [[TMP5:%.*]] = insertvalue { i128, i1 } [[TMP4]], i1 [[TMP2]], 1
; PWR7-NEXT: [[SUCC:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1
; PWR7-NEXT: [[TMP3:%.*]] = insertvalue { i128, i1 } poison, i128 [[TMP2]], 0
; PWR7-NEXT: [[TMP4:%.*]] = insertvalue { i128, i1 } [[TMP3]], i1 [[TMP1]], 1
; PWR7-NEXT: [[SUCC:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
; PWR7-NEXT: ret i1 [[SUCC]]
;
entry:
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/Transforms/AtomicExpand/X86/expand-atomic-libcall.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

define i256 @atomic_load256_libcall(ptr %ptr) nounwind {
; CHECK-LABEL: @atomic_load256_libcall(
; CHECK-NEXT: [[TMP1:%.*]] = alloca i256, align 8
; CHECK-NEXT: [[TMP1:%.*]] = alloca i256, align 16
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[TMP1]])
; CHECK-NEXT: call void @__atomic_load(i64 32, ptr [[PTR:%.*]], ptr [[TMP1]], i32 0)
; CHECK-NEXT: [[TMP2:%.*]] = load i256, ptr [[TMP1]], align 8
; CHECK-NEXT: call void @__atomic_load(i32 32, ptr [[PTR:%.*]], ptr [[TMP1]], i32 0)
; CHECK-NEXT: [[TMP2:%.*]] = load i256, ptr [[TMP1]], align 16
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[TMP1]])
; CHECK-NEXT: ret i256 [[TMP2]]
;
Expand All @@ -18,10 +18,10 @@ define i256 @atomic_load256_libcall(ptr %ptr) nounwind {
define i256 @atomic_load256_libcall_as1(ptr addrspace(1) %ptr) nounwind {
; CHECK-LABEL: @atomic_load256_libcall_as1(
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR:%.*]] to ptr
; CHECK-NEXT: [[TMP2:%.*]] = alloca i256, align 8
; CHECK-NEXT: [[TMP2:%.*]] = alloca i256, align 16
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[TMP2]])
; CHECK-NEXT: call void @__atomic_load(i64 32, ptr [[TMP1]], ptr [[TMP2]], i32 0)
; CHECK-NEXT: [[TMP3:%.*]] = load i256, ptr [[TMP2]], align 8
; CHECK-NEXT: call void @__atomic_load(i32 32, ptr [[TMP1]], ptr [[TMP2]], i32 0)
; CHECK-NEXT: [[TMP3:%.*]] = load i256, ptr [[TMP2]], align 16
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[TMP2]])
; CHECK-NEXT: ret i256 [[TMP3]]
;
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ define i64 @no_sink_local_to_flat(i1 %pred, ptr addrspace(3) %ptr) {
; CHECK-NEXT: [[PTR_CAST:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr addrspace(3) [[PTR]], align 4
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr addrspace(3) [[PTR]], align 8
; CHECK-NEXT: ret i64 [[V1]]
; CHECK: l2:
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[PTR_CAST]], align 4
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[PTR_CAST]], align 8
; CHECK-NEXT: ret i64 [[V2]]
;
%ptr_cast = addrspacecast ptr addrspace(3) %ptr to ptr
Expand All @@ -31,10 +31,10 @@ define i64 @no_sink_private_to_flat(i1 %pred, ptr addrspace(5) %ptr) {
; CHECK-NEXT: [[PTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr addrspace(5) [[PTR]], align 4
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr addrspace(5) [[PTR]], align 8
; CHECK-NEXT: ret i64 [[V1]]
; CHECK: l2:
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[PTR_CAST]], align 4
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[PTR_CAST]], align 8
; CHECK-NEXT: ret i64 [[V2]]
;
%ptr_cast = addrspacecast ptr addrspace(5) %ptr to ptr
Expand All @@ -55,11 +55,11 @@ define i64 @sink_global_to_flat(i1 %pred, ptr addrspace(1) %ptr) {
; CHECK-SAME: i1 [[PRED:%.*]], ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: ret i64 [[V1]]
; CHECK: l2:
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[TMP1]], align 4
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[TMP1]], align 8
; CHECK-NEXT: ret i64 [[V2]]
;
%ptr_cast = addrspacecast ptr addrspace(1) %ptr to ptr
Expand All @@ -79,11 +79,11 @@ define i64 @sink_flat_to_global(i1 %pred, ptr %ptr) {
; CHECK-SAME: i1 [[PRED:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 4
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 8
; CHECK-NEXT: ret i64 [[V1]]
; CHECK: l2:
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(1)
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(1) [[TMP1]], align 4
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(1) [[TMP1]], align 8
; CHECK-NEXT: ret i64 [[V2]]
;
%ptr_cast = addrspacecast ptr %ptr to ptr addrspace(1)
Expand All @@ -103,11 +103,11 @@ define i64 @sink_flat_to_constant(i1 %pred, ptr %ptr) {
; CHECK-SAME: i1 [[PRED:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 4
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 8
; CHECK-NEXT: ret i64 [[V1]]
; CHECK: l2:
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(4)
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(4) [[TMP1]], align 4
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(4) [[TMP1]], align 8
; CHECK-NEXT: ret i64 [[V2]]
;
%ptr_cast = addrspacecast ptr %ptr to ptr addrspace(4)
Expand All @@ -125,13 +125,13 @@ l2:
define i64 @sink_flat_to_local(i1 %pred, ptr %ptr) {
; CHECK-LABEL: define i64 @sink_flat_to_local(
; CHECK-SAME: i1 [[PRED:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[PTR_CAST:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(3)
; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 4
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 8
; CHECK-NEXT: ret i64 [[V1]]
; CHECK: l2:
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(3)
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(3) [[TMP1]], align 4
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(3) [[PTR_CAST]], align 8
; CHECK-NEXT: ret i64 [[V2]]
;
%ptr_cast = addrspacecast ptr %ptr to ptr addrspace(3)
Expand All @@ -149,13 +149,13 @@ l2:
define i64 @sink_flat_to_private(i1 %pred, ptr %ptr) {
; CHECK-LABEL: define i64 @sink_flat_to_private(
; CHECK-SAME: i1 [[PRED:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[PTR_CAST:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 4
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 8
; CHECK-NEXT: ret i64 [[V1]]
; CHECK: l2:
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(5) [[PTR_CAST]], align 8
; CHECK-NEXT: ret i64 [[V2]]
;
%ptr_cast = addrspacecast ptr %ptr to ptr addrspace(5)
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/Transforms/DivRemPairs/X86/div-expanded-rem-pair.ll
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ define i64 @remainder_triangle_i64(i64 %a, i64 %b, ptr %rp) {
; CHECK-NEXT: [[REM:%.*]] = urem i64 [[A]], [[B]]
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: store i64 [[REM]], ptr [[RP]], align 4
; CHECK-NEXT: store i64 [[REM]], ptr [[RP]], align 8
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: ret i64 [[DIV]]
Expand Down Expand Up @@ -246,7 +246,7 @@ define i128 @remainder_triangle_i128(i128 %a, i128 %b, ptr %rp) {
; CHECK: if.then:
; CHECK-NEXT: [[TMP0:%.*]] = mul i128 [[DIV]], [[B_FROZEN]]
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i128 [[A_FROZEN]], [[TMP0]]
; CHECK-NEXT: store i128 [[REM_DECOMPOSED]], ptr [[RP]], align 4
; CHECK-NEXT: store i128 [[REM_DECOMPOSED]], ptr [[RP]], align 16
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: ret i128 [[DIV]]
Expand Down Expand Up @@ -275,7 +275,7 @@ define i64 @remainder_triangle_i64_multiple_rem_edges(i64 %a, i64 %b, i64 %c, pt
; CHECK-NEXT: i64 2, label [[SW_BB]]
; CHECK-NEXT: ]
; CHECK: sw.bb:
; CHECK-NEXT: store i64 [[REM]], ptr [[RP:%.*]], align 4
; CHECK-NEXT: store i64 [[REM]], ptr [[RP:%.*]], align 8
; CHECK-NEXT: br label [[SW_DEFAULT]]
; CHECK: sw.default:
; CHECK-NEXT: ret i64 [[DIV]]
Expand Down Expand Up @@ -306,7 +306,7 @@ define i64 @remainder_triangle_i64_multiple_div_edges(i64 %a, i64 %b, i64 %c, pt
; CHECK-NEXT: i64 2, label [[SW_BB]]
; CHECK-NEXT: ]
; CHECK: sw.default:
; CHECK-NEXT: store i64 [[REM]], ptr [[RP:%.*]], align 4
; CHECK-NEXT: store i64 [[REM]], ptr [[RP:%.*]], align 8
; CHECK-NEXT: br label [[SW_BB]]
; CHECK: sw.bb:
; CHECK-NEXT: ret i64 [[DIV]]
Expand Down Expand Up @@ -339,7 +339,7 @@ define i64 @remainder_triangle_i64_maythrow_rem(i64 %a, i64 %b, ptr %rp) {
; CHECK: if.then:
; CHECK-NEXT: call void @maythrow()
; CHECK-NEXT: [[REM:%.*]] = urem i64 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: store i64 [[REM]], ptr [[RP]], align 4
; CHECK-NEXT: store i64 [[REM]], ptr [[RP]], align 8
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[A]], [[B]]
Expand Down Expand Up @@ -369,7 +369,7 @@ define i64 @remainder_triangle_i64_maythrow_div(i64 %a, i64 %b, ptr %rp) {
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[REM:%.*]] = urem i64 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: store i64 [[REM]], ptr [[RP]], align 4
; CHECK-NEXT: store i64 [[REM]], ptr [[RP]], align 8
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: call void @maythrow()
Expand Down Expand Up @@ -401,7 +401,7 @@ define i128 @remainder_triangle_i128_maythrow_rem(i128 %a, i128 %b, ptr %rp) {
; CHECK: if.then:
; CHECK-NEXT: call void @maythrow()
; CHECK-NEXT: [[REM:%.*]] = urem i128 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: store i128 [[REM]], ptr [[RP]], align 4
; CHECK-NEXT: store i128 [[REM]], ptr [[RP]], align 16
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[DIV:%.*]] = udiv i128 [[A]], [[B]]
Expand Down Expand Up @@ -431,7 +431,7 @@ define i128 @remainder_triangle_i128_maythrow_div(i128 %a, i128 %b, ptr %rp) {
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[REM:%.*]] = urem i128 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: store i128 [[REM]], ptr [[RP]], align 4
; CHECK-NEXT: store i128 [[REM]], ptr [[RP]], align 16
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: call void @maythrow()
Expand Down Expand Up @@ -464,7 +464,7 @@ define i64 @remainder_not_triangle_i32(i64 %a, i64 %b, i64 %c, ptr %rp) {
; CHECK-NEXT: ]
; CHECK: sw.bb:
; CHECK-NEXT: [[REM:%.*]] = urem i64 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: store i64 [[REM]], ptr [[RP:%.*]], align 4
; CHECK-NEXT: store i64 [[REM]], ptr [[RP:%.*]], align 8
; CHECK-NEXT: br label [[SW_BB1]]
; CHECK: sw.bb1:
; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[A]], [[B]]
Expand Down Expand Up @@ -501,7 +501,7 @@ define i64 @remainder_not_triangle_i32_2(i64 %a, i64 %b, i64 %c, ptr %rp) {
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END3:%.*]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[REM:%.*]] = urem i64 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: store i64 [[REM]], ptr [[RP]], align 4
; CHECK-NEXT: store i64 [[REM]], ptr [[RP]], align 8
; CHECK-NEXT: [[TOBOOL1_NOT:%.*]] = icmp eq i64 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[TOBOOL1_NOT]], label [[IF_END3]], label [[RETURN:%.*]]
; CHECK: if.end3:
Expand Down
Loading