Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14815,6 +14815,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
return Res;

if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse() && !VT.isVector()) {
SDValue Res =
DAG.getFreeze(DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0)));
return DAG.getNode(ISD::AssertSext, DL, VT, Res,
DAG.getValueType(N0.getOperand(0).getValueType()));
}

return SDValue();
}

Expand Down Expand Up @@ -15194,6 +15201,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
return SDValue(CSENode, 0);
}

if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse() && !VT.isVector()) {
SDValue Res =
DAG.getFreeze(DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)));
return DAG.getNode(ISD::AssertZext, DL, VT, Res,
DAG.getValueType(N0.getOperand(0).getValueType()));
}

return SDValue();
}

Expand Down Expand Up @@ -15362,6 +15376,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
return Res;

if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse())
return DAG.getFreeze(
DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(0)));

return SDValue();
}

Expand Down Expand Up @@ -16911,6 +16929,11 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
return LegalShuffle;
}

if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse()) {
SDLoc DL(N);
return DAG.getFreeze(DAG.getNode(ISD::BITCAST, DL, VT, N0.getOperand(0)));
}

return SDValue();
}

Expand Down Expand Up @@ -16943,6 +16966,11 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
// example https://reviews.llvm.org/D136529#4120959.
if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
return SDValue();
// Avoid folding extensions and bitcasts. Each of these operations handles
// FREEZE in their own respective visitors.
if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND ||
N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::BITCAST)
return SDValue();

// Fold freeze(op(x, ...)) -> op(freeze(x), ...).
// Try to push freeze through instructions that propagate but don't produce
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3448,6 +3448,12 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
return false;

// If we have a large vector type (even if illegal), don't bitcast to large
// (illegal) scalar types. Better to load fewer vectors and extract.
if (LoadVT.isVector() && !BitcastVT.isVector() && LoadVT.isInteger() &&
BitcastVT.isInteger() && (LoadVT.getSizeInBits() % 128) == 0)
return false;

// If both types are legal vectors, it's always ok to convert them.
if (LoadVT.isVector() && BitcastVT.isVector() &&
isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
Expand Down
118 changes: 118 additions & 0 deletions llvm/test/CodeGen/AArch64/freeze-bitcast-ext-load.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s

define double @test_bitcast_freeze_load(ptr %p) {
; CHECK-LABEL: test_bitcast_freeze_load:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
%v = load <2 x float>, ptr %p
%f = freeze <2 x float> %v
%b = bitcast <2 x float> %f to double
ret double %b
}

define i32 @test_sext_freeze_load_i8(ptr %p) {
; CHECK-LABEL: test_sext_freeze_load_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrsb w0, [x0]
; CHECK-NEXT: ret
%v = load i8, ptr %p
%f = freeze i8 %v
%e = sext i8 %f to i32
ret i32 %e
}

define i64 @test_sext_freeze_load_i32(ptr %p) {
; CHECK-LABEL: test_sext_freeze_load_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrsw x0, [x0]
; CHECK-NEXT: ret
%v = load i32, ptr %p
%f = freeze i32 %v
%e = sext i32 %f to i64
ret i64 %e
}

define i64 @test_sext_freeze_load_i16(ptr %p) {
; CHECK-LABEL: test_sext_freeze_load_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrsh x0, [x0]
; CHECK-NEXT: ret
%v = load i16, ptr %p
%f = freeze i16 %v
%e = sext i16 %f to i64
ret i64 %e
}

define i32 @test_zext_freeze_load_i8(ptr %p) {
; CHECK-LABEL: test_zext_freeze_load_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrb w0, [x0]
; CHECK-NEXT: ret
%v = load i8, ptr %p
%f = freeze i8 %v
%e = zext i8 %f to i32
ret i32 %e
}

define i64 @test_zext_freeze_load_i32(ptr %p) {
; CHECK-LABEL: test_zext_freeze_load_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr w0, [x0]
; CHECK-NEXT: ret
%v = load i32, ptr %p
%f = freeze i32 %v
%e = zext i32 %f to i64
ret i64 %e
}

define i64 @test_zext_freeze_load_i16(ptr %p) {
; CHECK-LABEL: test_zext_freeze_load_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrh w0, [x0]
; CHECK-NEXT: ret
%v = load i16, ptr %p
%f = freeze i16 %v
%e = zext i16 %f to i64
ret i64 %e
}

define i32 @test_sext_freeze_load_multiuse(ptr %p) {
; CHECK-LABEL: test_sext_freeze_load_multiuse:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrb w8, [x0]
; CHECK-NEXT: sxtb w9, w8
; CHECK-NEXT: add w0, w9, w8, uxtb
; CHECK-NEXT: ret
%v = load i8, ptr %p
%f = freeze i8 %v
%e = sext i8 %f to i32
%z = zext i8 %f to i32
%r = add i32 %e, %z
ret i32 %r
}

define <4 x i32> @test_sext_freeze_load_v4i16(ptr %p) {
; CHECK-LABEL: test_sext_freeze_load_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: ret
%v = load <4 x i16>, ptr %p
%f = freeze <4 x i16> %v
%e = sext <4 x i16> %f to <4 x i32>
ret <4 x i32> %e
}

define <4 x i32> @test_zext_freeze_load_v4i16(ptr %p) {
; CHECK-LABEL: test_zext_freeze_load_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ret
%v = load <4 x i16>, ptr %p
%f = freeze <4 x i16> %v
%e = zext <4 x i16> %f to <4 x i32>
ret <4 x i32> %e
}
12 changes: 8 additions & 4 deletions llvm/test/CodeGen/AArch64/freeze.ll
Original file line number Diff line number Diff line change
Expand Up @@ -376,10 +376,14 @@ define i32 @freeze_anonstruct() {
}

define i32 @freeze_anonstruct2() {
; CHECK-LABEL: freeze_anonstruct2:
; CHECK: // %bb.0:
; CHECK-NEXT: add w0, w8, w8, uxth
; CHECK-NEXT: ret
; CHECK-SD-LABEL: freeze_anonstruct2:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: freeze_anonstruct2:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: add w0, w8, w8, uxth
; CHECK-GI-NEXT: ret
%y1 = freeze {i32, i16} undef
%v1 = extractvalue {i32, i16} %y1, 0
%v2 = extractvalue {i32, i16} %y1, 1
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/pr66603.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
define i32 @PR66603(double %x) nounwind {
; CHECK-LABEL: PR66603:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtzs w8, d0
; CHECK-NEXT: sxtb w0, w8
; CHECK-NEXT: fcvtzs w0, d0
; CHECK-NEXT: ret
%as_i8 = fptosi double %x to i8
%frozen_i8 = freeze i8 %as_i8
Expand Down
22 changes: 12 additions & 10 deletions llvm/test/CodeGen/AArch64/vector-compress.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,16 @@ define <4 x i32> @test_compress_v4i32(<4 x i32> %vec, <4 x i1> %mask) {
; CHECK-NEXT: shl.4s v1, v1, #31
; CHECK-NEXT: cmlt.4s v1, v1, #0
; CHECK-NEXT: mov.s w9, v1[1]
; CHECK-NEXT: fmov w11, s1
; CHECK-NEXT: mov.s w10, v1[2]
; CHECK-NEXT: and x12, x11, #0x1
; CHECK-NEXT: fmov w11, s1
; CHECK-NEXT: bfi x8, x11, #2, #1
; CHECK-NEXT: mov x11, sp
; CHECK-NEXT: and x11, x11, #0x1
; CHECK-NEXT: and x9, x9, #0x1
; CHECK-NEXT: add x9, x12, x9
; CHECK-NEXT: and w10, w10, #0x1
; CHECK-NEXT: add x9, x11, x9
; CHECK-NEXT: mov x11, sp
; CHECK-NEXT: st1.s { v0 }[1], [x8]
; CHECK-NEXT: sub w10, w9, w10
; CHECK-NEXT: add w10, w9, w10
; CHECK-NEXT: orr x9, x11, x9, lsl #2
; CHECK-NEXT: bfi x11, x10, #2, #2
; CHECK-NEXT: st1.s { v0 }[2], [x9]
Expand Down Expand Up @@ -420,15 +421,16 @@ define <3 x i32> @test_compress_narrow(<3 x i32> %vec, <3 x i1> %mask) {
; CHECK-NEXT: shl.4s v1, v1, #31
; CHECK-NEXT: cmlt.4s v1, v1, #0
; CHECK-NEXT: mov.s w8, v1[1]
; CHECK-NEXT: fmov w10, s1
; CHECK-NEXT: mov.s w9, v1[2]
; CHECK-NEXT: and x12, x10, #0x1
; CHECK-NEXT: fmov w10, s1
; CHECK-NEXT: bfi x11, x10, #2, #1
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: and x10, x10, #0x1
; CHECK-NEXT: and x8, x8, #0x1
; CHECK-NEXT: add x8, x12, x8
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: add x8, x10, x8
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: st1.s { v0 }[1], [x11]
; CHECK-NEXT: sub w9, w8, w9
; CHECK-NEXT: add w9, w8, w9
; CHECK-NEXT: orr x8, x10, x8, lsl #2
; CHECK-NEXT: bfi x10, x9, #2, #2
; CHECK-NEXT: st1.s { v0 }[2], [x8]
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AArch64/vselect-ext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -594,10 +594,10 @@ define void @extension_in_loop_v16i8_to_v16i32(ptr %src, ptr %dst) {
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: cmge.16b v5, v4, #0
; CHECK-NEXT: tbl.16b v7, { v4 }, v0
; CHECK-NEXT: tbl.16b v16, { v4 }, v1
; CHECK-NEXT: tbl.16b v18, { v4 }, v2
; CHECK-NEXT: tbl.16b v4, { v4 }, v3
; CHECK-NEXT: tbl.16b v7, { v4 }, v3
; CHECK-NEXT: tbl.16b v16, { v4 }, v2
; CHECK-NEXT: tbl.16b v18, { v4 }, v1
; CHECK-NEXT: tbl.16b v4, { v4 }, v0
; CHECK-NEXT: sshll2.8h v6, v5, #0
; CHECK-NEXT: sshll.8h v5, v5, #0
; CHECK-NEXT: sshll2.4s v17, v6, #0
Expand Down Expand Up @@ -664,10 +664,10 @@ define void @extension_in_loop_as_shuffle_v16i8_to_v16i32(ptr %src, ptr %dst) {
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: cmge.16b v5, v4, #0
; CHECK-NEXT: tbl.16b v7, { v4 }, v0
; CHECK-NEXT: tbl.16b v16, { v4 }, v1
; CHECK-NEXT: tbl.16b v18, { v4 }, v2
; CHECK-NEXT: tbl.16b v4, { v4 }, v3
; CHECK-NEXT: tbl.16b v7, { v4 }, v3
; CHECK-NEXT: tbl.16b v16, { v4 }, v2
; CHECK-NEXT: tbl.16b v18, { v4 }, v1
; CHECK-NEXT: tbl.16b v4, { v4 }, v0
; CHECK-NEXT: sshll2.8h v6, v5, #0
; CHECK-NEXT: sshll.8h v5, v5, #0
; CHECK-NEXT: sshll2.4s v17, v6, #0
Expand Down Expand Up @@ -735,10 +735,10 @@ define void @shuffle_in_loop_is_no_extend_v16i8_to_v16i32(ptr %src, ptr %dst) {
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: cmge.16b v5, v4, #0
; CHECK-NEXT: tbl.16b v7, { v4 }, v0
; CHECK-NEXT: tbl.16b v16, { v4 }, v1
; CHECK-NEXT: tbl.16b v18, { v4 }, v2
; CHECK-NEXT: tbl.16b v4, { v4 }, v3
; CHECK-NEXT: tbl.16b v7, { v4 }, v3
; CHECK-NEXT: tbl.16b v16, { v4 }, v2
; CHECK-NEXT: tbl.16b v18, { v4 }, v1
; CHECK-NEXT: tbl.16b v4, { v4 }, v0
; CHECK-NEXT: sshll2.8h v6, v5, #0
; CHECK-NEXT: sshll.8h v5, v5, #0
; CHECK-NEXT: sshll2.4s v17, v6, #0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7769,7 +7769,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
;
; GFX6-LABEL: sdiv_i64_pow2_shl_denom:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
; GFX6-NEXT: s_load_dword s0, s[4:5], 0xd
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
Expand Down Expand Up @@ -7938,7 +7938,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
;
; GFX9-LABEL: sdiv_i64_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x34
; GFX9-NEXT: s_load_dword s0, s[4:5], 0x34
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_lshl_b64 s[0:1], 0x1000, s0
; GFX9-NEXT: s_ashr_i32 s6, s1, 31
Expand Down Expand Up @@ -9037,7 +9037,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
;
; GFX6-LABEL: srem_i64_pow2_shl_denom:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
; GFX6-NEXT: s_load_dword s0, s[4:5], 0xd
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_lshl_b64 s[0:1], 0x1000, s0
Expand Down Expand Up @@ -9208,7 +9208,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
;
; GFX9-LABEL: srem_i64_pow2_shl_denom:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x34
; GFX9-NEXT: s_load_dword s0, s[4:5], 0x34
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_lshl_b64 s[0:1], 0x1000, s0
; GFX9-NEXT: s_ashr_i32 s2, s1, 31
Expand Down
Loading