Skip to content

Commit

Permalink
[X86] Don't zero/sign-extend i1, i8, or i16 return values to 32 bits …
Browse files Browse the repository at this point in the history
…(PR22532)

This matches GCC and MSVC's behaviour, and saves on code size.

We were already not extending i1 return values on x86_64 after r127766. This
takes that patch further by applying it to x86 target as well, and also for i8
and i16.

The ABI docs have been unclear about the required behaviour here. The new i386
psABI [1] clearly states (Table 2.4, page 14) that i1, i8, and i16 return
vales do not need to be extended beyond 8 bits. The x86_64 ABI doc is being
updated to say the same [2].

Differential Revision: http://reviews.llvm.org/D16907

 [1]. https://01.org/sites/default/files/file_attach/intel386-psabi-1.0.pdf
 [2]. https://groups.google.com/d/msg/x86-64-abi/E8O33onbnGQ/_RFWw_ixDQAJ

llvm-svn: 260133
  • Loading branch information
zmodem committed Feb 8, 2016
1 parent bc130af commit 850ec6c
Show file tree
Hide file tree
Showing 19 changed files with 207 additions and 66 deletions.
3 changes: 1 addition & 2 deletions llvm/docs/LangRef.rst
Expand Up @@ -907,8 +907,7 @@ Currently, only the following parameter attributes are defined:
``zeroext``
This indicates to the code generator that the parameter or return
value should be zero-extended to the extent required by the target's
ABI (which is usually 32-bits, but is 8-bits for a i1 on x86-64) by
the caller (for a parameter) or the callee (for a return value).
ABI by the caller (for a parameter) or the callee (for a return value).
``signext``
This indicates to the code generator that the parameter or return
value should be sign-extended to the extent required by the target's
Expand Down
12 changes: 6 additions & 6 deletions llvm/include/llvm/Target/TargetLowering.h
Expand Up @@ -2537,12 +2537,12 @@ class TargetLowering : public TargetLoweringBase {
}

/// Return the type that should be used to zero or sign extend a
/// zeroext/signext integer argument or return value. FIXME: Most C calling
/// convention requires the return type to be promoted, but this is not true
/// all the time, e.g. i1 on x86-64. It is also not necessary for non-C
/// calling conventions. The frontend should handle this and include all of
/// the necessary information.
virtual EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
/// zeroext/signext integer return value. FIXME: Some C calling conventions
/// require the return type to be promoted, but this is not true all the time,
/// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling
/// conventions. The frontend should handle this and include all of the
/// necessary information.
virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
ISD::NodeType /*ExtendKind*/) const {
EVT MinVT = getRegisterType(Context, MVT::i32);
return VT.bitsLT(MinVT) ? MinVT : VT;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Expand Up @@ -1381,7 +1381,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
EVT VT = ValueVTs[j];

if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
VT = TLI.getTypeForExtArgOrReturn(Context, VT, ExtendKind);
VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);

unsigned NumParts = TLI.getNumRegisters(Context, VT);
MVT PartVT = TLI.getRegisterType(Context, VT);
Expand Down
15 changes: 7 additions & 8 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -2374,15 +2374,14 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
return true;
}

EVT
X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const {
MVT ReturnMVT;
// TODO: Is this also valid on 32-bit?
if (Subtarget.is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const {
MVT ReturnMVT = MVT::i32;

if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) {
// The ABI does not require i1, i8 or i16 to be extended.
ReturnMVT = MVT::i8;
else
ReturnMVT = MVT::i32;
}

EVT MinVT = getRegisterType(Context, ReturnMVT);
return VT.bitsLT(MinVT) ? MinVT : VT;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.h
Expand Up @@ -1092,8 +1092,8 @@ namespace llvm {

bool mayBeEmittedAsTailCall(CallInst *CI) const override;

EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const override;
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const override;

bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
bool isVarArg,
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
Expand Up @@ -2,9 +2,10 @@

@X = global i32 0 ; <i32*> [#uses=1]

define signext i8 @_Z3fooi(i32 %x) {
define i32 @_Z3fooi(i32 %x) {
entry:
store i32 %x, i32* @X, align 4
%retval67 = trunc i32 %x to i8 ; <i8> [#uses=1]
ret i8 %retval67
%retval = sext i8 %retval67 to i32
ret i32 %retval
}
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/X86/3addr-16bit.ll
Expand Up @@ -12,7 +12,7 @@ entry:

; 64BIT-LABEL: t1:
; 64BIT-NOT: movw %si, %ax
; 64BIT: leal 1(%rsi), %eax
; 64BIT: leal 1(%rsi), %ebx
%0 = icmp eq i16 %k, %c ; <i1> [#uses=1]
%1 = add i16 %k, 1 ; <i16> [#uses=3]
br i1 %0, label %bb, label %bb1
Expand All @@ -34,8 +34,8 @@ entry:

; 64BIT-LABEL: t2:
; 64BIT-NOT: movw %si, %ax
; 64BIT: leal -1(%rsi), %eax
; 64BIT: movzwl %ax
; 64BIT: leal -1(%rsi), %ebx
; 64BIT: movzwl %bx
%0 = icmp eq i16 %k, %c ; <i1> [#uses=1]
%1 = add i16 %k, -1 ; <i16> [#uses=3]
br i1 %0, label %bb, label %bb1
Expand All @@ -59,7 +59,7 @@ entry:

; 64BIT-LABEL: t3:
; 64BIT-NOT: movw %si, %ax
; 64BIT: leal 2(%rsi), %eax
; 64BIT: leal 2(%rsi), %ebx
%0 = add i16 %k, 2 ; <i16> [#uses=3]
%1 = icmp eq i16 %k, %c ; <i1> [#uses=1]
br i1 %1, label %bb, label %bb1
Expand All @@ -82,7 +82,7 @@ entry:

; 64BIT-LABEL: t4:
; 64BIT-NOT: movw %si, %ax
; 64BIT: leal (%rsi,%rdi), %eax
; 64BIT: leal (%rsi,%rdi), %ebx
%0 = add i16 %k, %c ; <i16> [#uses=3]
%1 = icmp eq i16 %k, %c ; <i1> [#uses=1]
br i1 %1, label %bb, label %bb1
Expand Down
37 changes: 23 additions & 14 deletions llvm/test/CodeGen/X86/bool-zext.ll
@@ -1,10 +1,15 @@
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s -check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64

; X64: @bar1
; Check that the argument gets zero-extended before calling.
; X86-LABEL: bar1
; X86: movzbl
; X86: calll
; X64-LABEL: bar1
; X64: movzbl
; X64: jmp
; WIN64: @bar1
; WIN64-LABEL: bar1
; WIN64: movzbl
; WIN64: callq
define void @bar1(i1 zeroext %v1) nounwind ssp {
Expand All @@ -14,10 +19,11 @@ entry:
ret void
}

; X64: @bar2
; Check that on x86-64 the arguments are simply forwarded.
; X64-LABEL: bar2
; X64-NOT: movzbl
; X64: jmp
; WIN64: @bar2
; WIN64-LABEL: bar2
; WIN64-NOT: movzbl
; WIN64: callq
define void @bar2(i8 zeroext %v1) nounwind ssp {
Expand All @@ -27,16 +33,19 @@ entry:
ret void
}

; X64: @bar3
; X64: callq
; X64-NOT: movzbl
; X64-NOT: and
; X64: ret
; WIN64: @bar3
; WIN64: callq
; WIN64-NOT: movzbl
; WIN64-NOT: and
; WIN64: ret
; Check that i1 return values are not zero-extended.
; X86-LABEL: bar3
; X86: call
; X86-NEXT: {{add|pop}}
; X86-NEXT: ret
; X64-LABEL: bar3
; X64: call
; X64-NEXT: {{add|pop}}
; X64-NEXT: ret
; WIN64-LABEL: bar3
; WIN64: call
; WIN64-NEXT: {{add|pop}}
; WIN64-NEXT: ret
define zeroext i1 @bar3() nounwind ssp {
entry:
%call = call i1 @foo2() nounwind
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/divrem8_ext.ll
Expand Up @@ -6,9 +6,9 @@ target triple = "x86_64-apple-macosx10.10.0"
define zeroext i8 @test_udivrem_zext_ah(i8 %x, i8 %y) {
; CHECK-LABEL: test_udivrem_zext_ah
; CHECK: divb
; CHECK: movzbl %ah, [[REG_REM:%[a-z0-9]+]]
; CHECK: movzbl %ah, %e[[REG_REM:[a-z]]]x
; CHECK: movb %al, ([[REG_ZPTR:%[a-z0-9]+]])
; CHECK: movl [[REG_REM]], %eax
; CHECK: movb %[[REG_REM]]l, %al
; CHECK: ret
%div = udiv i8 %x, %y
store i8 %div, i8* @z
Expand Down Expand Up @@ -51,9 +51,9 @@ define signext i8 @test_sdivrem_sext_ah(i8 %x, i8 %y) {
; CHECK-LABEL: test_sdivrem_sext_ah
; CHECK: cbtw
; CHECK: idivb
; CHECK: movsbl %ah, [[REG_REM:%[a-z0-9]+]]
; CHECK: movsbl %ah, %e[[REG_REM:[a-z]]]x
; CHECK: movb %al, ([[REG_ZPTR]])
; CHECK: movl [[REG_REM]], %eax
; CHECK: movb %[[REG_REM]]l, %al
; CHECK: ret
%div = sdiv i8 %x, %y
store i8 %div, i8* @z
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/float-conv-elim.ll
Expand Up @@ -21,7 +21,7 @@ define i32 @foo2(i8 %a) #0 {

; CHECK-LABEL: bar
; CHECK-NOT: cvt
; CHECK: movl
; CHECK: movb
define zeroext i8 @bar(i8 zeroext %a) #0 {
%conv = uitofp i8 %a to float
%conv1 = fptoui float %conv to i8
Expand Down
24 changes: 21 additions & 3 deletions llvm/test/CodeGen/X86/h-registers-3.ll
@@ -1,13 +1,31 @@
; RUN: llc < %s -march=x86 | grep mov | count 1
; RUN: llc < %s -march=x86-64 | grep mov | count 1
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | grep mov | count 1
; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X86
; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s -check-prefix=X32

define zeroext i8 @foo() nounwind ssp {
entry:
%0 = tail call zeroext i16 (...) @bar() nounwind
%1 = lshr i16 %0, 8
%2 = trunc i16 %1 to i8
ret i8 %2

; X86-LABEL: foo
; X86: calll
; X86-NEXT: movb %ah, %al
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl

; X64-LABEL: foo
; X64: callq
; X64-NEXT: shrl $8, %eax
; X64-NEXT: popq
; X64-NEXT: retq

; X32-LABEL: foo
; X32: callq
; X32-NEXT: shrl $8, %eax
; X32-NEXT: popq
; X32-NEXT: retq
}

declare zeroext i16 @bar(...)
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/promote-i16.ll
Expand Up @@ -3,19 +3,19 @@
define signext i16 @foo(i16 signext %x) nounwind {
entry:
; CHECK-LABEL: foo:
; CHECK-NOT: movzwl
; CHECK: movswl 4(%esp), %eax
; CHECK: xorl $21998, %eax
; CHECK: movzwl 4(%esp), %eax
; CHECK-NEXT: xorl $21998, %eax
; CHECK-NEXT: retl
%0 = xor i16 %x, 21998
ret i16 %0
}

define signext i16 @bar(i16 signext %x) nounwind {
entry:
; CHECK-LABEL: bar:
; CHECK-NOT: movzwl
; CHECK: movswl 4(%esp), %eax
; CHECK: xorl $-10770, %eax
; CHECK: movzwl 4(%esp), %eax
; CHECK-NEXT: xorl $54766, %eax
; CHECK-NEXT: retl
%0 = xor i16 %x, 54766
ret i16 %0
}
105 changes: 105 additions & 0 deletions llvm/test/CodeGen/X86/return-ext.ll
@@ -0,0 +1,105 @@
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s


@x = common global i32 0, align 4

define zeroext i1 @unsigned_i1() {
entry:
%0 = load i32, i32* @x
%cmp = icmp eq i32 %0, 42
ret i1 %cmp

; Unsigned i1 return values are not extended.
; CHECK-LABEL: unsigned_i1:
; CHECK: cmp
; CHECK-NEXT: sete
; CHECK-NEXT: ret
}

define zeroext i8 @unsigned_i8() {
entry:
%0 = load i32, i32* @x
%cmp = icmp eq i32 %0, 42
%retval = zext i1 %cmp to i8
ret i8 %retval

; Unsigned i8 return values are not extended.
; CHECK-LABEL: unsigned_i8:
; CHECK: cmp
; CHECK-NEXT: sete
; CHECK-NEXT: ret
}

define signext i8 @signed_i8() {
entry:
%0 = load i32, i32* @x
%cmp = icmp eq i32 %0, 42
%retval = zext i1 %cmp to i8
ret i8 %retval

; Signed i8 return values are not extended.
; CHECK-LABEL: signed_i8:
; CHECK: cmp
; CHECK-NEXT: sete
; CHECK-NEXT: ret
}

@a = common global i16 0
@b = common global i16 0
define zeroext i16 @unsigned_i16() {
entry:
%0 = load i16, i16* @a
%1 = load i16, i16* @b
%add = add i16 %1, %0
ret i16 %add

; i16 return values are not extended.
; CHECK-LABEL: unsigned_i16:
; CHECK: movw
; CHECK-NEXT: addw
; CHECK-NEXT: ret
}


define i32 @use_i1() {
entry:
%0 = call i1 @unsigned_i1();
%1 = zext i1 %0 to i32
ret i32 %1

; The high 24 bits of %eax from a function returning i1 are undefined.
; CHECK-LABEL: use_i1:
; CHECK: call
; CHECK-NEXT: movzbl
; CHECK-NEXT: {{pop|add}}
; CHECK-NEXT: ret
}

define i32 @use_i8() {
entry:
%0 = call i8 @unsigned_i8();
%1 = zext i8 %0 to i32
ret i32 %1

; The high 24 bits of %eax from a function returning i8 are undefined.
; CHECK-LABEL: use_i8:
; CHECK: call
; CHECK-NEXT: movzbl
; CHECK-NEXT: {{pop|add}}
; CHECK-NEXT: ret
}

define i32 @use_i16() {
entry:
%0 = call i16 @unsigned_i16();
%1 = zext i16 %0 to i32
ret i32 %1

; The high 16 bits of %eax from a function returning i16 are undefined.
; CHECK-LABEL: use_i16:
; CHECK: call
; CHECK-NEXT: movzwl
; CHECK-NEXT: {{pop|add}}
; CHECK-NEXT: ret
}
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/select.ll
Expand Up @@ -66,10 +66,10 @@ entry:
%2 = load i8, i8* %1, align 1 ; <i8> [#uses=1]
ret i8 %2
; CHECK-LABEL: test4:
; CHECK: movsbl ({{.*}},4), %eax
; CHECK: movb ({{.*}},4), %al

; ATOM-LABEL: test4:
; ATOM: movsbl ({{.*}},4), %eax
; ATOM: movb ({{.*}},4), %al
}

define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
Expand Down

0 comments on commit 850ec6c

Please sign in to comment.