239 changes: 174 additions & 65 deletions clang/test/CodeGen/attr-cpuspecific.c
Original file line number Diff line number Diff line change
@@ -1,100 +1,209 @@
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LINUX
// RUN: %clang_cc1 -triple x86_64-windows-pc -fms-compatibility -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WINDOWS

#ifdef _WIN64
#define ATTR(X) __declspec(X)
#else
#define ATTR(X) __attribute__((X))
#endif // _MSC_VER

// Each called version should have an IFunc.
// CHECK: @SingleVersion.ifunc = ifunc void (), void ()* ()* @SingleVersion.resolver
// CHECK: @TwoVersions.ifunc = ifunc void (), void ()* ()* @TwoVersions.resolver
// CHECK: @TwoVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @TwoVersionsSameAttr.resolver
// CHECK: @ThreeVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @ThreeVersionsSameAttr.resolver
// LINUX: @SingleVersion.ifunc = ifunc void (), void ()* ()* @SingleVersion.resolver
// LINUX: @TwoVersions.ifunc = ifunc void (), void ()* ()* @TwoVersions.resolver
// LINUX: @TwoVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @TwoVersionsSameAttr.resolver
// LINUX: @ThreeVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @ThreeVersionsSameAttr.resolver

__attribute__((cpu_specific(ivybridge)))
ATTR(cpu_specific(ivybridge))
void SingleVersion(void){}
// CHECK: define void @SingleVersion.S() #[[S:[0-9]+]]
// LINUX: define void @SingleVersion.S() #[[S:[0-9]+]]
// WINDOWS: define dso_local void @SingleVersion.S() #[[S:[0-9]+]]

__attribute__((cpu_specific(ivybridge)))
ATTR(cpu_specific(ivybridge))
void NotCalled(void){}
// CHECK: define void @NotCalled.S() #[[S]]
// LINUX: define void @NotCalled.S() #[[S]]
// WINDOWS: define dso_local void @NotCalled.S() #[[S:[0-9]+]]

// Done before any of the implementations.
__attribute__((cpu_dispatch(ivybridge, knl)))
ATTR(cpu_dispatch(ivybridge, knl))
void TwoVersions(void);
// CHECK: define void ()* @TwoVersions.resolver()
// CHECK: call void @__cpu_indicator_init
// CHECK: ret void ()* @TwoVersions.Z
// CHECK: ret void ()* @TwoVersions.S
// CHECK: call void @llvm.trap
// CHECK: unreachable

__attribute__((cpu_specific(ivybridge)))
// LINUX: define void ()* @TwoVersions.resolver()
// LINUX: call void @__cpu_indicator_init
// LINUX: ret void ()* @TwoVersions.Z
// LINUX: ret void ()* @TwoVersions.S
// LINUX: call void @llvm.trap
// LINUX: unreachable

// WINDOWS: define dso_local void @TwoVersions()
// WINDOWS: call void @__cpu_indicator_init()
// WINDOWS: call void @TwoVersions.Z()
// WINDOWS-NEXT: ret void
// WINDOWS: call void @TwoVersions.S()
// WINDOWS-NEXT: ret void
// WINDOWS: call void @llvm.trap
// WINDOWS: unreachable

ATTR(cpu_specific(ivybridge))
void TwoVersions(void){}
// CHECK: define void @TwoVersions.S() #[[S]]
// CHECK: define {{.*}}void @TwoVersions.S() #[[S]]

__attribute__((cpu_specific(knl)))
ATTR(cpu_specific(knl))
void TwoVersions(void){}
// CHECK: define void @TwoVersions.Z() #[[K:[0-9]+]]
// CHECK: define {{.*}}void @TwoVersions.Z() #[[K:[0-9]+]]

__attribute__((cpu_specific(ivybridge, knl)))
ATTR(cpu_specific(ivybridge, knl))
void TwoVersionsSameAttr(void){}
// CHECK: define void @TwoVersionsSameAttr.S() #[[S]]
// CHECK: define void @TwoVersionsSameAttr.Z() #[[K]]
// CHECK: define {{.*}}void @TwoVersionsSameAttr.S() #[[S]]
// CHECK: define {{.*}}void @TwoVersionsSameAttr.Z() #[[K]]

__attribute__((cpu_specific(atom, ivybridge, knl)))
ATTR(cpu_specific(atom, ivybridge, knl))
void ThreeVersionsSameAttr(void){}
// CHECK: define void @ThreeVersionsSameAttr.O() #[[O:[0-9]+]]
// CHECK: define void @ThreeVersionsSameAttr.S() #[[S]]
// CHECK: define void @ThreeVersionsSameAttr.Z() #[[K]]
// CHECK: define {{.*}}void @ThreeVersionsSameAttr.O() #[[O:[0-9]+]]
// CHECK: define {{.*}}void @ThreeVersionsSameAttr.S() #[[S]]
// CHECK: define {{.*}}void @ThreeVersionsSameAttr.Z() #[[K]]

void usages() {
SingleVersion();
// CHECK: @SingleVersion.ifunc()
// LINUX: @SingleVersion.ifunc()
// WINDOWS: @SingleVersion()
TwoVersions();
// CHECK: @TwoVersions.ifunc()
// LINUX: @TwoVersions.ifunc()
// WINDOWS: @TwoVersions()
TwoVersionsSameAttr();
// CHECK: @TwoVersionsSameAttr.ifunc()
// LINUX: @TwoVersionsSameAttr.ifunc()
// WINDOWS: @TwoVersionsSameAttr()
ThreeVersionsSameAttr();
// CHECK: @ThreeVersionsSameAttr.ifunc()
// LINUX: @ThreeVersionsSameAttr.ifunc()
// WINDOWS: @ThreeVersionsSameAttr()
}

// has an extra config to emit!
__attribute__((cpu_dispatch(ivybridge, knl, atom)))
ATTR(cpu_dispatch(ivybridge, knl, atom))
void TwoVersionsSameAttr(void);
// CHECK: define void ()* @TwoVersionsSameAttr.resolver()
// CHECK: ret void ()* @TwoVersionsSameAttr.Z
// CHECK: ret void ()* @TwoVersionsSameAttr.S
// CHECK: ret void ()* @TwoVersionsSameAttr.O
// CHECK: call void @llvm.trap
// CHECK: unreachable

__attribute__((cpu_dispatch(atom, ivybridge, knl)))
// LINUX: define void ()* @TwoVersionsSameAttr.resolver()
// LINUX: ret void ()* @TwoVersionsSameAttr.Z
// LINUX: ret void ()* @TwoVersionsSameAttr.S
// LINUX: ret void ()* @TwoVersionsSameAttr.O
// LINUX: call void @llvm.trap
// LINUX: unreachable

// WINDOWS: define dso_local void @TwoVersionsSameAttr()
// WINDOWS: call void @TwoVersionsSameAttr.Z
// WINDOWS-NEXT: ret void
// WINDOWS: call void @TwoVersionsSameAttr.S
// WINDOWS-NEXT: ret void
// WINDOWS: call void @TwoVersionsSameAttr.O
// WINDOWS-NEXT: ret void
// WINDOWS: call void @llvm.trap
// WINDOWS: unreachable

ATTR(cpu_dispatch(atom, ivybridge, knl))
void ThreeVersionsSameAttr(void){}
// CHECK: define void ()* @ThreeVersionsSameAttr.resolver()
// CHECK: call void @__cpu_indicator_init
// CHECK: ret void ()* @ThreeVersionsSameAttr.Z
// CHECK: ret void ()* @ThreeVersionsSameAttr.S
// CHECK: ret void ()* @ThreeVersionsSameAttr.O
// CHECK: call void @llvm.trap
// CHECK: unreachable
// LINUX: define void ()* @ThreeVersionsSameAttr.resolver()
// LINUX: call void @__cpu_indicator_init
// LINUX: ret void ()* @ThreeVersionsSameAttr.Z
// LINUX: ret void ()* @ThreeVersionsSameAttr.S
// LINUX: ret void ()* @ThreeVersionsSameAttr.O
// LINUX: call void @llvm.trap
// LINUX: unreachable

// WINDOWS: define dso_local void @ThreeVersionsSameAttr()
// WINDOWS: call void @__cpu_indicator_init
// WINDOWS: call void @ThreeVersionsSameAttr.Z
// WINDOWS-NEXT: ret void
// WINDOWS: call void @ThreeVersionsSameAttr.S
// WINDOWS-NEXT: ret void
// WINDOWS: call void @ThreeVersionsSameAttr.O
// WINDOWS-NEXT: ret void
// WINDOWS: call void @llvm.trap
// WINDOWS: unreachable

// No Cpu Specific options.
__attribute__((cpu_dispatch(atom, ivybridge, knl)))
ATTR(cpu_dispatch(atom, ivybridge, knl))
void NoSpecifics(void);
// CHECK: define void ()* @NoSpecifics.resolver()
// CHECK: call void @__cpu_indicator_init
// CHECK: ret void ()* @NoSpecifics.Z
// CHECK: ret void ()* @NoSpecifics.S
// CHECK: ret void ()* @NoSpecifics.O
// CHECK: call void @llvm.trap
// CHECK: unreachable

__attribute__((cpu_dispatch(atom, generic, ivybridge, knl)))
// LINUX: define void ()* @NoSpecifics.resolver()
// LINUX: call void @__cpu_indicator_init
// LINUX: ret void ()* @NoSpecifics.Z
// LINUX: ret void ()* @NoSpecifics.S
// LINUX: ret void ()* @NoSpecifics.O
// LINUX: call void @llvm.trap
// LINUX: unreachable

// WINDOWS: define dso_local void @NoSpecifics()
// WINDOWS: call void @__cpu_indicator_init
// WINDOWS: call void @NoSpecifics.Z
// WINDOWS-NEXT: ret void
// WINDOWS: call void @NoSpecifics.S
// WINDOWS-NEXT: ret void
// WINDOWS: call void @NoSpecifics.O
// WINDOWS-NEXT: ret void
// WINDOWS: call void @llvm.trap
// WINDOWS: unreachable

ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
void HasGeneric(void);
// CHECK: define void ()* @HasGeneric.resolver()
// CHECK: call void @__cpu_indicator_init
// CHECK: ret void ()* @HasGeneric.Z
// CHECK: ret void ()* @HasGeneric.S
// CHECK: ret void ()* @HasGeneric.O
// CHECK: ret void ()* @HasGeneric.A
// CHECK-NOT: call void @llvm.trap
// LINUX: define void ()* @HasGeneric.resolver()
// LINUX: call void @__cpu_indicator_init
// LINUX: ret void ()* @HasGeneric.Z
// LINUX: ret void ()* @HasGeneric.S
// LINUX: ret void ()* @HasGeneric.O
// LINUX: ret void ()* @HasGeneric.A
// LINUX-NOT: call void @llvm.trap

// WINDOWS: define dso_local void @HasGeneric()
// WINDOWS: call void @__cpu_indicator_init
// WINDOWS: call void @HasGeneric.Z
// WINDOWS-NEXT: ret void
// WINDOWS: call void @HasGeneric.S
// WINDOWS-NEXT: ret void
// WINDOWS: call void @HasGeneric.O
// WINDOWS-NEXT: ret void
// WINDOWS: call void @HasGeneric.A
// WINDOWS-NEXT: ret void
// WINDOWS-NOT: call void @llvm.trap

ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
void HasParams(int i, double d);
// LINUX: define void (i32, double)* @HasParams.resolver()
// LINUX: call void @__cpu_indicator_init
// LINUX: ret void (i32, double)* @HasParams.Z
// LINUX: ret void (i32, double)* @HasParams.S
// LINUX: ret void (i32, double)* @HasParams.O
// LINUX: ret void (i32, double)* @HasParams.A
// LINUX-NOT: call void @llvm.trap

// WINDOWS: define dso_local void @HasParams(i32, double)
// WINDOWS: call void @__cpu_indicator_init
// WINDOWS: call void @HasParams.Z(i32 %0, double %1)
// WINDOWS-NEXT: ret void
// WINDOWS: call void @HasParams.S(i32 %0, double %1)
// WINDOWS-NEXT: ret void
// WINDOWS: call void @HasParams.O(i32 %0, double %1)
// WINDOWS-NEXT: ret void
// WINDOWS: call void @HasParams.A(i32 %0, double %1)
// WINDOWS-NEXT: ret void
// WINDOWS-NOT: call void @llvm.trap

ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
int HasParamsAndReturn(int i, double d);
// LINUX: define i32 (i32, double)* @HasParamsAndReturn.resolver()
// LINUX: call void @__cpu_indicator_init
// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.Z
// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.S
// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.O
// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.A
// LINUX-NOT: call void @llvm.trap

// WINDOWS: define dso_local i32 @HasParamsAndReturn(i32, double)
// WINDOWS: call void @__cpu_indicator_init
// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.Z(i32 %0, double %1)
// WINDOWS-NEXT: ret i32 %[[RET]]
// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.S(i32 %0, double %1)
// WINDOWS-NEXT: ret i32 %[[RET]]
// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.O(i32 %0, double %1)
// WINDOWS-NEXT: ret i32 %[[RET]]
// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.A(i32 %0, double %1)
// WINDOWS-NEXT: ret i32 %[[RET]]
// WINDOWS-NOT: call void @llvm.trap

// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
Expand Down
41 changes: 28 additions & 13 deletions clang/test/CodeGen/attr-target-mv-func-ptrs.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
int __attribute__((target("sse4.2"))) foo(int i) { return 0; }
int __attribute__((target("arch=sandybridge"))) foo(int);
int __attribute__((target("arch=ivybridge"))) foo(int i) {return 1;}
Expand All @@ -16,17 +17,31 @@ int bar() {
return Free(1) + Free(2);
}

// CHECK: @foo.ifunc = ifunc i32 (i32), i32 (i32)* ()* @foo.resolver
// CHECK: define i32 @foo.sse4.2(
// CHECK: ret i32 0
// CHECK: define i32 @foo.arch_ivybridge(
// CHECK: ret i32 1
// CHECK: define i32 @foo(
// CHECK: ret i32 2
// LINUX: @foo.ifunc = ifunc i32 (i32), i32 (i32)* ()* @foo.resolver
// LINUX: define i32 @foo.sse4.2(
// LINUX: ret i32 0
// LINUX: define i32 @foo.arch_ivybridge(
// LINUX: ret i32 1
// LINUX: define i32 @foo(
// LINUX: ret i32 2

// CHECK: define i32 @bar()
// CHECK: call void @func(i32 (i32)* @foo.ifunc)
// CHECK: store i32 (i32)* @foo.ifunc
// CHECK: store i32 (i32)* @foo.ifunc
// WINDOWS: define dso_local i32 @foo.sse4.2(
// WINDOWS: ret i32 0
// WINDOWS: define dso_local i32 @foo.arch_ivybridge(
// WINDOWS: ret i32 1
// WINDOWS: define dso_local i32 @foo(
// WINDOWS: ret i32 2

// CHECK: declare i32 @foo.arch_sandybridge(
// LINUX: define i32 @bar()
// LINUX: call void @func(i32 (i32)* @foo.ifunc)
// LINUX: store i32 (i32)* @foo.ifunc
// LINUX: store i32 (i32)* @foo.ifunc

// WINDOWS: define dso_local i32 @bar()
// WINDOWS: call void @func(i32 (i32)* @foo.resolver)
// WINDOWS: store i32 (i32)* @foo.resolver
// WINDOWS: store i32 (i32)* @foo.resolver

// LINUX: declare i32 @foo.arch_sandybridge(

// WINDOWS: declare dso_local i32 @foo.arch_sandybridge(
53 changes: 36 additions & 17 deletions clang/test/CodeGen/attr-target-mv-va-args.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
int __attribute__((target("sse4.2"))) foo(int i, ...) { return 0; }
int __attribute__((target("arch=sandybridge"))) foo(int i, ...);
int __attribute__((target("arch=ivybridge"))) foo(int i, ...) {return 1;}
Expand All @@ -8,19 +9,37 @@ int bar() {
return foo(1, 'a', 1.1) + foo(2, 2.2, "asdf");
}

// CHECK: @foo.ifunc = ifunc i32 (i32, ...), i32 (i32, ...)* ()* @foo.resolver
// CHECK: define i32 @foo.sse4.2(i32 %i, ...)
// CHECK: ret i32 0
// CHECK: define i32 @foo.arch_ivybridge(i32 %i, ...)
// CHECK: ret i32 1
// CHECK: define i32 @foo(i32 %i, ...)
// CHECK: ret i32 2
// CHECK: define i32 @bar()
// CHECK: call i32 (i32, ...) @foo.ifunc(i32 1, i32 97, double
// CHECK: call i32 (i32, ...) @foo.ifunc(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds
// CHECK: define i32 (i32, ...)* @foo.resolver() comdat
// CHECK: ret i32 (i32, ...)* @foo.arch_sandybridge
// CHECK: ret i32 (i32, ...)* @foo.arch_ivybridge
// CHECK: ret i32 (i32, ...)* @foo.sse4.2
// CHECK: ret i32 (i32, ...)* @foo
// CHECK: declare i32 @foo.arch_sandybridge(i32, ...)
// LINUX: @foo.ifunc = ifunc i32 (i32, ...), i32 (i32, ...)* ()* @foo.resolver
// LINUX: define i32 @foo.sse4.2(i32 %i, ...)
// LINUX: ret i32 0
// LINUX: define i32 @foo.arch_ivybridge(i32 %i, ...)
// LINUX: ret i32 1
// LINUX: define i32 @foo(i32 %i, ...)
// LINUX: ret i32 2
// LINUX: define i32 @bar()
// LINUX: call i32 (i32, ...) @foo.ifunc(i32 1, i32 97, double
// LINUX: call i32 (i32, ...) @foo.ifunc(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds

// LINUX: define i32 (i32, ...)* @foo.resolver() comdat
// LINUX: ret i32 (i32, ...)* @foo.arch_sandybridge
// LINUX: ret i32 (i32, ...)* @foo.arch_ivybridge
// LINUX: ret i32 (i32, ...)* @foo.sse4.2
// LINUX: ret i32 (i32, ...)* @foo
// LINUX: declare i32 @foo.arch_sandybridge(i32, ...)

// WINDOWS: define dso_local i32 @foo.sse4.2(i32 %i, ...)
// WINDOWS: ret i32 0
// WINDOWS: define dso_local i32 @foo.arch_ivybridge(i32 %i, ...)
// WINDOWS: ret i32 1
// WINDOWS: define dso_local i32 @foo(i32 %i, ...)
// WINDOWS: ret i32 2
// WINDOWS: define dso_local i32 @bar()
// WINDOWS: call i32 (i32, ...) @foo.resolver(i32 1, i32 97, double
// WINDOWS: call i32 (i32, ...) @foo.resolver(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds

// WINDOWS: define dso_local i32 @foo.resolver(i32, ...) comdat
// WINDOWS: musttail call i32 (i32, ...) @foo.arch_sandybridge
// WINDOWS: musttail call i32 (i32, ...) @foo.arch_ivybridge
// WINDOWS: musttail call i32 (i32, ...) @foo.sse4.2
// WINDOWS: musttail call i32 (i32, ...) @foo
// WINDOWS: declare dso_local i32 @foo.arch_sandybridge(i32, ...)
229 changes: 152 additions & 77 deletions clang/test/CodeGen/attr-target-mv.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS

int __attribute__((target("sse4.2"))) foo(void) { return 0; }
int __attribute__((target("arch=sandybridge"))) foo(void);
int __attribute__((target("arch=ivybridge"))) foo(void) {return 1;}
Expand All @@ -25,82 +27,155 @@ void bar3() {
inline __attribute__((target("default"))) void foo_decls(void) {}
inline __attribute__((target("sse4.2"))) void foo_decls(void) {}

inline __attribute__((target("default"))) void foo_multi(void) {}
inline __attribute__((target("avx,sse4.2"))) void foo_multi(void) {}
inline __attribute__((target("sse4.2,fma4"))) void foo_multi(void) {}
inline __attribute__((target("arch=ivybridge,fma4,sse4.2"))) void foo_multi(void) {}
inline __attribute__((target("default"))) void foo_multi(int i, double d) {}
inline __attribute__((target("avx,sse4.2"))) void foo_multi(int i, double d) {}
inline __attribute__((target("sse4.2,fma4"))) void foo_multi(int i, double d) {}
inline __attribute__((target("arch=ivybridge,fma4,sse4.2"))) void foo_multi(int i, double d) {}
void bar4() {
foo_multi();
foo_multi(1, 5.0);
}

// CHECK: @foo.ifunc = ifunc i32 (), i32 ()* ()* @foo.resolver
// CHECK: @foo_inline.ifunc = ifunc i32 (), i32 ()* ()* @foo_inline.resolver
// CHECK: @foo_decls.ifunc = ifunc void (), void ()* ()* @foo_decls.resolver

// CHECK: define i32 @foo.sse4.2()
// CHECK: ret i32 0
// CHECK: define i32 @foo.arch_ivybridge()
// CHECK: ret i32 1
// CHECK: define i32 @foo()
// CHECK: ret i32 2
// CHECK: define i32 @bar()
// CHECK: call i32 @foo.ifunc()

// CHECK: define i32 ()* @foo.resolver() comdat
// CHECK: call void @__cpu_indicator_init()
// CHECK: ret i32 ()* @foo.arch_sandybridge
// CHECK: ret i32 ()* @foo.arch_ivybridge
// CHECK: ret i32 ()* @foo.sse4.2
// CHECK: ret i32 ()* @foo

// CHECK: define i32 @bar2()
// CHECK: call i32 @foo_inline.ifunc()

// CHECK: define i32 ()* @foo_inline.resolver() comdat
// CHECK: call void @__cpu_indicator_init()
// CHECK: ret i32 ()* @foo_inline.arch_sandybridge
// CHECK: ret i32 ()* @foo_inline.arch_ivybridge
// CHECK: ret i32 ()* @foo_inline.sse4.2
// CHECK: ret i32 ()* @foo_inline

// CHECK: define void @bar3()
// CHECK: call void @foo_decls.ifunc()

// CHECK: define void ()* @foo_decls.resolver() comdat
// CHECK: ret void ()* @foo_decls.sse4.2
// CHECK: ret void ()* @foo_decls

// CHECK: define void @bar4()
// CHECK: call void @foo_multi.ifunc()

// CHECK: define void ()* @foo_multi.resolver() comdat
// CHECK: and i32 %{{.*}}, 4352
// CHECK: icmp eq i32 %{{.*}}, 4352
// CHECK: ret void ()* @foo_multi.fma4_sse4.2
// CHECK: icmp eq i32 %{{.*}}, 12
// CHECK: and i32 %{{.*}}, 4352
// CHECK: icmp eq i32 %{{.*}}, 4352
// CHECK: ret void ()* @foo_multi.arch_ivybridge_fma4_sse4.2
// CHECK: and i32 %{{.*}}, 768
// CHECK: icmp eq i32 %{{.*}}, 768
// CHECK: ret void ()* @foo_multi.avx_sse4.2
// CHECK: ret void ()* @foo_multi

// CHECK: declare i32 @foo.arch_sandybridge()

// CHECK: define linkonce i32 @foo_inline.sse4.2()
// CHECK: ret i32 0

// CHECK: declare i32 @foo_inline.arch_sandybridge()
//
// CHECK: define linkonce i32 @foo_inline.arch_ivybridge()
// CHECK: ret i32 1
// CHECK: define linkonce i32 @foo_inline()
// CHECK: ret i32 2

// CHECK: define linkonce void @foo_decls()
// CHECK: define linkonce void @foo_decls.sse4.2()

// CHECK: define linkonce void @foo_multi.avx_sse4.2()
// CHECK: define linkonce void @foo_multi.fma4_sse4.2()
// CHECK: define linkonce void @foo_multi.arch_ivybridge_fma4_sse4.2()
// LINUX: @foo.ifunc = ifunc i32 (), i32 ()* ()* @foo.resolver
// LINUX: @foo_inline.ifunc = ifunc i32 (), i32 ()* ()* @foo_inline.resolver
// LINUX: @foo_decls.ifunc = ifunc void (), void ()* ()* @foo_decls.resolver
// LINUX: @foo_multi.ifunc = ifunc void (i32, double), void (i32, double)* ()* @foo_multi.resolver

// LINUX: define i32 @foo.sse4.2()
// LINUX: ret i32 0
// LINUX: define i32 @foo.arch_ivybridge()
// LINUX: ret i32 1
// LINUX: define i32 @foo()
// LINUX: ret i32 2
// LINUX: define i32 @bar()
// LINUX: call i32 @foo.ifunc()

// WINDOWS: define dso_local i32 @foo.sse4.2()
// WINDOWS: ret i32 0
// WINDOWS: define dso_local i32 @foo.arch_ivybridge()
// WINDOWS: ret i32 1
// WINDOWS: define dso_local i32 @foo()
// WINDOWS: ret i32 2
// WINDOWS: define dso_local i32 @bar()
// WINDOWS: call i32 @foo.resolver()

// LINUX: define i32 ()* @foo.resolver() comdat
// LINUX: call void @__cpu_indicator_init()
// LINUX: ret i32 ()* @foo.arch_sandybridge
// LINUX: ret i32 ()* @foo.arch_ivybridge
// LINUX: ret i32 ()* @foo.sse4.2
// LINUX: ret i32 ()* @foo

// WINDOWS: define dso_local i32 @foo.resolver() comdat
// WINDOWS: call void @__cpu_indicator_init()
// WINDOWS: call i32 @foo.arch_sandybridge
// WINDOWS: call i32 @foo.arch_ivybridge
// WINDOWS: call i32 @foo.sse4.2
// WINDOWS: call i32 @foo

// LINUX: define i32 @bar2()
// LINUX: call i32 @foo_inline.ifunc()

// WINDOWS: define dso_local i32 @bar2()
// WINDOWS: call i32 @foo_inline.resolver()

// LINUX: define i32 ()* @foo_inline.resolver() comdat
// LINUX: call void @__cpu_indicator_init()
// LINUX: ret i32 ()* @foo_inline.arch_sandybridge
// LINUX: ret i32 ()* @foo_inline.arch_ivybridge
// LINUX: ret i32 ()* @foo_inline.sse4.2
// LINUX: ret i32 ()* @foo_inline

// WINDOWS: define dso_local i32 @foo_inline.resolver() comdat
// WINDOWS: call void @__cpu_indicator_init()
// WINDOWS: call i32 @foo_inline.arch_sandybridge
// WINDOWS: call i32 @foo_inline.arch_ivybridge
// WINDOWS: call i32 @foo_inline.sse4.2
// WINDOWS: call i32 @foo_inline

// LINUX: define void @bar3()
// LINUX: call void @foo_decls.ifunc()

// WINDOWS: define dso_local void @bar3()
// WINDOWS: call void @foo_decls.resolver()

// LINUX: define void ()* @foo_decls.resolver() comdat
// LINUX: ret void ()* @foo_decls.sse4.2
// LINUX: ret void ()* @foo_decls

// WINDOWS: define dso_local void @foo_decls.resolver() comdat
// WINDOWS: call void @foo_decls.sse4.2
// Windows: call void @foo_decls

// LINUX: define void @bar4()
// LINUX: call void @foo_multi.ifunc(i32 1, double 5.{{[0+e]*}})

// WINDOWS: define dso_local void @bar4()
// WINDOWS: call void @foo_multi.resolver(i32 1, double 5.{{[0+e]*}})

// LINUX: define void (i32, double)* @foo_multi.resolver() comdat
// LINUX: and i32 %{{.*}}, 4352
// LINUX: icmp eq i32 %{{.*}}, 4352
// LINUX: ret void (i32, double)* @foo_multi.fma4_sse4.2
// LINUX: icmp eq i32 %{{.*}}, 12
// LINUX: and i32 %{{.*}}, 4352
// LINUX: icmp eq i32 %{{.*}}, 4352
// LINUX: ret void (i32, double)* @foo_multi.arch_ivybridge_fma4_sse4.2
// LINUX: and i32 %{{.*}}, 768
// LINUX: icmp eq i32 %{{.*}}, 768
// LINUX: ret void (i32, double)* @foo_multi.avx_sse4.2
// LINUX: ret void (i32, double)* @foo_multi

// WINDOWS: define dso_local void @foo_multi.resolver(i32, double) comdat
// WINDOWS: and i32 %{{.*}}, 4352
// WINDOWS: icmp eq i32 %{{.*}}, 4352
// WINDOWS: call void @foo_multi.fma4_sse4.2(i32 %0, double %1)
// WINDOWS-NEXT: ret void
// WINDOWS: icmp eq i32 %{{.*}}, 12
// WINDOWS: and i32 %{{.*}}, 4352
// WINDOWS: icmp eq i32 %{{.*}}, 4352
// WINDOWS: call void @foo_multi.arch_ivybridge_fma4_sse4.2(i32 %0, double %1)
// WINDOWS-NEXT: ret void
// WINDOWS: and i32 %{{.*}}, 768
// WINDOWS: icmp eq i32 %{{.*}}, 768
// WINDOWS: call void @foo_multi.avx_sse4.2(i32 %0, double %1)
// WINDOWS-NEXT: ret void
// WINDOWS: call void @foo_multi(i32 %0, double %1)
// WINDOWS-NEXT: ret void

// LINUX: declare i32 @foo.arch_sandybridge()

// WINDOWS: declare dso_local i32 @foo.arch_sandybridge()

// LINUX: define linkonce i32 @foo_inline.sse4.2()
// LINUX: ret i32 0

// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.sse4.2()
// WINDOWS: ret i32 0

// LINUX: declare i32 @foo_inline.arch_sandybridge()

// WINDOWS: declare dso_local i32 @foo_inline.arch_sandybridge()

// LINUX: define linkonce i32 @foo_inline.arch_ivybridge()
// LINUX: ret i32 1
// LINUX: define linkonce i32 @foo_inline()
// LINUX: ret i32 2

// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.arch_ivybridge()
// WINDOWS: ret i32 1
// WINDOWS: define linkonce_odr dso_local i32 @foo_inline()
// WINDOWS: ret i32 2

// LINUX: define linkonce void @foo_decls()
// LINUX: define linkonce void @foo_decls.sse4.2()

// WINDOWS: define linkonce_odr dso_local void @foo_decls()
// WINDOWS: define linkonce_odr dso_local void @foo_decls.sse4.2()

// LINUX: define linkonce void @foo_multi.avx_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})
// LINUX: define linkonce void @foo_multi.fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})
// LINUX: define linkonce void @foo_multi.arch_ivybridge_fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})

// WINDOWS: define linkonce_odr dso_local void @foo_multi.avx_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})
// WINDOWS: define linkonce_odr dso_local void @foo_multi.fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})
// WINDOWS: define linkonce_odr dso_local void @foo_multi.arch_ivybridge_fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})
108 changes: 71 additions & 37 deletions clang/test/CodeGenCXX/attr-target-mv-diff-ns.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
// Test ensures that this properly differentiates between types in different
// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
// Test ensures that this properly differentiates between types in different
// namespaces.
int __attribute__((target("sse4.2"))) foo(int) { return 0; }
int __attribute__((target("arch=sandybridge"))) foo(int);
Expand All @@ -17,38 +18,71 @@ int bar() {
return foo(1) + ns::foo(2);
}

// CHECK: @_Z3fooi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z3fooi.resolver
// CHECK: @_ZN2ns3fooEi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_ZN2ns3fooEi.resolver

// CHECK: define i32 @_Z3fooi.sse4.2(i32)
// CHECK: ret i32 0
// CHECK: define i32 @_Z3fooi.arch_ivybridge(i32)
// CHECK: ret i32 1
// CHECK: define i32 @_Z3fooi(i32)
// CHECK: ret i32 2

// CHECK: define i32 @_ZN2ns3fooEi.sse4.2(i32)
// CHECK: ret i32 0
// CHECK: define i32 @_ZN2ns3fooEi.arch_ivybridge(i32)
// CHECK: ret i32 1
// CHECK: define i32 @_ZN2ns3fooEi(i32)
// CHECK: ret i32 2

// CHECK: define i32 @_Z3barv()
// CHECK: call i32 @_Z3fooi.ifunc(i32 1)
// CHECK: call i32 @_ZN2ns3fooEi.ifunc(i32 2)

// CHECK: define i32 (i32)* @_Z3fooi.resolver() comdat
// CHECK: ret i32 (i32)* @_Z3fooi.arch_sandybridge
// CHECK: ret i32 (i32)* @_Z3fooi.arch_ivybridge
// CHECK: ret i32 (i32)* @_Z3fooi.sse4.2
// CHECK: ret i32 (i32)* @_Z3fooi
//
// CHECK: define i32 (i32)* @_ZN2ns3fooEi.resolver() comdat
// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.arch_sandybridge
// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.arch_ivybridge
// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.sse4.2
// CHECK: ret i32 (i32)* @_ZN2ns3fooEi

// CHECK: declare i32 @_Z3fooi.arch_sandybridge(i32)
// CHECK: declare i32 @_ZN2ns3fooEi.arch_sandybridge(i32)
// LINUX: @_Z3fooi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z3fooi.resolver
// LINUX: @_ZN2ns3fooEi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_ZN2ns3fooEi.resolver

// LINUX: define i32 @_Z3fooi.sse4.2(i32)
// LINUX: ret i32 0
// LINUX: define i32 @_Z3fooi.arch_ivybridge(i32)
// LINUX: ret i32 1
// LINUX: define i32 @_Z3fooi(i32)
// LINUX: ret i32 2

// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z.sse4.2"(i32)
// WINDOWS: ret i32 0
// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z.arch_ivybridge"(i32)
// WINDOWS: ret i32 1
// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z"(i32)
// WINDOWS: ret i32 2

// LINUX: define i32 @_ZN2ns3fooEi.sse4.2(i32)
// LINUX: ret i32 0
// LINUX: define i32 @_ZN2ns3fooEi.arch_ivybridge(i32)
// LINUX: ret i32 1
// LINUX: define i32 @_ZN2ns3fooEi(i32)
// LINUX: ret i32 2

// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z.sse4.2"(i32)
// WINDOWS: ret i32 0
// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z.arch_ivybridge"(i32)
// WINDOWS: ret i32 1
// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z"(i32)
// WINDOWS: ret i32 2

// LINUX: define i32 @_Z3barv()
// LINUX: call i32 @_Z3fooi.ifunc(i32 1)
// LINUX: call i32 @_ZN2ns3fooEi.ifunc(i32 2)

// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"()
// WINDOWS: call i32 @"?foo@@YAHH@Z.resolver"(i32 1)
// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.resolver"(i32 2)

// LINUX: define i32 (i32)* @_Z3fooi.resolver() comdat
// LINUX: ret i32 (i32)* @_Z3fooi.arch_sandybridge
// LINUX: ret i32 (i32)* @_Z3fooi.arch_ivybridge
// LINUX: ret i32 (i32)* @_Z3fooi.sse4.2
// LINUX: ret i32 (i32)* @_Z3fooi

// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z.resolver"(i32) comdat
// WINDOWS: call i32 @"?foo@@YAHH@Z.arch_sandybridge"(i32 %0)
// WINDOWS: call i32 @"?foo@@YAHH@Z.arch_ivybridge"(i32 %0)
// WINDOWS: call i32 @"?foo@@YAHH@Z.sse4.2"(i32 %0)
// WINDOWS: call i32 @"?foo@@YAHH@Z"(i32 %0)

// LINUX: define i32 (i32)* @_ZN2ns3fooEi.resolver() comdat
// LINUX: ret i32 (i32)* @_ZN2ns3fooEi.arch_sandybridge
// LINUX: ret i32 (i32)* @_ZN2ns3fooEi.arch_ivybridge
// LINUX: ret i32 (i32)* @_ZN2ns3fooEi.sse4.2
// LINUX: ret i32 (i32)* @_ZN2ns3fooEi

// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z.resolver"(i32) comdat
// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.arch_sandybridge"(i32 %0)
// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.arch_ivybridge"(i32 %0)
// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.sse4.2"(i32 %0)
// WINDOWS: call i32 @"?foo@ns@@YAHH@Z"(i32 %0)

// LINUX: declare i32 @_Z3fooi.arch_sandybridge(i32)
// LINUX: declare i32 @_ZN2ns3fooEi.arch_sandybridge(i32)

// WINDOWS: declare dso_local i32 @"?foo@@YAHH@Z.arch_sandybridge"(i32)
// WINDOWS: declare dso_local i32 @"?foo@ns@@YAHH@Z.arch_sandybridge"(i32)
25 changes: 17 additions & 8 deletions clang/test/CodeGenCXX/attr-target-mv-func-ptrs.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
void temp();
void temp(int);
using FP = void(*)(int);
Expand Down Expand Up @@ -31,15 +32,23 @@ int bar() {
return Free(1) + (s.*Member)(2);
}

// LINUX: @_Z3fooi.ifunc
// LINUX: @_ZN1S3fooEi.ifunc

// CHECK: @_Z3fooi.ifunc
// CHECK: @_ZN1S3fooEi.ifunc

// CHECK: define i32 @_Z3barv()
// LINUX: define i32 @_Z3barv()
// Store to Free of ifunc
// CHECK: store i32 (i32)* @_Z3fooi.ifunc
// LINUX: store i32 (i32)* @_Z3fooi.ifunc
// Store to Member of ifunc
// CHECK: store { i64, i64 } { i64 ptrtoint (i32 (%struct.S*, i32)* @_ZN1S3fooEi.ifunc to i64), i64 0 }, { i64, i64 }* [[MEMBER:%[a-z]+]]
// LINUX: store { i64, i64 } { i64 ptrtoint (i32 (%struct.S*, i32)* @_ZN1S3fooEi.ifunc to i64), i64 0 }, { i64, i64 }* [[MEMBER:%[a-z]+]]

// Call to 'f' with the ifunc
// CHECK: call void @_Z1fPFiiEM1SFiiE(i32 (i32)* @_Z3fooi.ifunc
// LINUX: call void @_Z1fPFiiEM1SFiiE(i32 (i32)* @_Z3fooi.ifunc

// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"()
// Store to Free
// WINDOWS: store i32 (i32)* @"?foo@@YAHH@Z.resolver", i32 (i32)**
// Store to Member
// WINDOWS: store i8* bitcast (i32 (%struct.S*, i32)* @"?foo@S@@QEAAHH@Z.resolver" to i8*), i8**

// Call to 'f'
// WINDOWS: call void @"?f@@YAXP6AHH@ZP8S@@EAAHH@Z@Z"(i32 (i32)* @"?foo@@YAHH@Z.resolver", i8* bitcast (i32 (%struct.S*, i32)* @"?foo@S@@QEAAHH@Z.resolver" to i8*))
81 changes: 81 additions & 0 deletions clang/test/CodeGenCXX/attr-target-mv-inalloca.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// RUN: %clang_cc1 -std=c++11 -triple i686-windows-msvc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-msvc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS64

struct Foo {
Foo();
Foo(const Foo &o);
~Foo();
int x;
};
int __attribute__((target("default"))) bar(Foo o) { return o.x; }
int __attribute__((target("sse4.2"))) bar(Foo o) { return o.x + 1; }
int __attribute__((target("arch=ivybridge"))) bar(Foo o) { return o.x + 2; }

void usage() {
Foo f;
bar(f);
}

// WINDOWS: define dso_local i32 @"?bar@@YAHUFoo@@@Z"(<{ %struct.Foo }>* inalloca)
// WINDOWS: %[[O:[0-9a-zA-Z]+]] = getelementptr inbounds <{ %struct.Foo }>, <{ %struct.Foo }>* %0, i32 0, i32 0
// WINDOWS: %[[X:[0-9a-zA-Z]+]] = getelementptr inbounds %struct.Foo, %struct.Foo* %[[O]], i32 0, i32 0
// WINDOWS: %[[LOAD:[0-9a-zA-Z]+]] = load i32, i32* %[[X]]
// WINDOWS: ret i32 %[[LOAD]]

// WINDOWS: define dso_local i32 @"?bar@@YAHUFoo@@@Z.sse4.2"(<{ %struct.Foo }>* inalloca)
// WINDOWS: %[[O:[0-9a-zA-Z]+]] = getelementptr inbounds <{ %struct.Foo }>, <{ %struct.Foo }>* %0, i32 0, i32 0
// WINDOWS: %[[X:[0-9a-zA-Z]+]] = getelementptr inbounds %struct.Foo, %struct.Foo* %[[O]], i32 0, i32 0
// WINDOWS: %[[LOAD:[0-9a-zA-Z]+]] = load i32, i32* %[[X]]
// WINDOWS: %[[ADD:[0-9a-zA-Z]+]] = add nsw i32 %[[LOAD]], 1
// WINDOWS: ret i32 %[[ADD]]

// WINDOWS: define dso_local i32 @"?bar@@YAHUFoo@@@Z.arch_ivybridge"(<{ %struct.Foo }>* inalloca)
// WINDOWS: %[[O:[0-9a-zA-Z]+]] = getelementptr inbounds <{ %struct.Foo }>, <{ %struct.Foo }>* %0, i32 0, i32 0
// WINDOWS: %[[X:[0-9a-zA-Z]+]] = getelementptr inbounds %struct.Foo, %struct.Foo* %[[O]], i32 0, i32 0
// WINDOWS: %[[LOAD:[0-9a-zA-Z]+]] = load i32, i32* %[[X]]
// WINDOWS: %[[ADD:[0-9a-zA-Z]+]] = add nsw i32 %[[LOAD]], 2
// WINDOWS: ret i32 %[[ADD]]

// WINDOWS: define dso_local void @"?usage@@YAXXZ"()
// WINDOWS: %[[F:[0-9a-zA-Z]+]] = alloca %struct.Foo
// WINDOWS: %[[ARGMEM:[0-9a-zA-Z]+]] = alloca inalloca <{ %struct.Foo }>
// WINDOWS: %[[CALL:[0-9a-zA-Z]+]] = call i32 @"?bar@@YAHUFoo@@@Z.resolver"(<{ %struct.Foo }>* inalloca %[[ARGMEM]])

// WINDOWS: define dso_local i32 @"?bar@@YAHUFoo@@@Z.resolver"(<{ %struct.Foo }>*)
// WINDOWS: %[[RET:[0-9a-zA-Z]+]] = musttail call i32 @"?bar@@YAHUFoo@@@Z.arch_ivybridge"(<{ %struct.Foo }>* %0)
// WINDOWS-NEXT: ret i32 %[[RET]]
// WINDOWS: %[[RET:[0-9a-zA-Z]+]] = musttail call i32 @"?bar@@YAHUFoo@@@Z.sse4.2"(<{ %struct.Foo }>* %0)
// WINDOWS-NEXT: ret i32 %[[RET]]
// WINDOWS: %[[RET:[0-9a-zA-Z]+]] = musttail call i32 @"?bar@@YAHUFoo@@@Z"(<{ %struct.Foo }>* %0)
// WINDOWS-NEXT: ret i32 %[[RET]]


// WINDOWS64: define dso_local i32 @"?bar@@YAHUFoo@@@Z"(%struct.Foo* %[[O:[0-9a-zA-Z]+]])
// WINDOWS64: %[[X:[0-9a-zA-Z]+]] = getelementptr inbounds %struct.Foo, %struct.Foo* %[[O]], i32 0, i32 0
// WINDOWS64: %[[LOAD:[0-9a-zA-Z]+]] = load i32, i32* %[[X]]
// WINDOWS64: ret i32 %[[LOAD]]

// WINDOWS64: define dso_local i32 @"?bar@@YAHUFoo@@@Z.sse4.2"(%struct.Foo* %[[O:[0-9a-zA-Z]+]])
// WINDOWS64: %[[X:[0-9a-zA-Z]+]] = getelementptr inbounds %struct.Foo, %struct.Foo* %[[O]], i32 0, i32 0
// WINDOWS64: %[[LOAD:[0-9a-zA-Z]+]] = load i32, i32* %[[X]]
// WINDOWS64: %[[ADD:[0-9a-zA-Z]+]] = add nsw i32 %[[LOAD]], 1
// WINDOWS64: ret i32 %[[ADD]]

// WINDOWS64: define dso_local i32 @"?bar@@YAHUFoo@@@Z.arch_ivybridge"(%struct.Foo* %[[O:[0-9a-zA-Z]+]])
// WINDOWS64: %[[X:[0-9a-zA-Z]+]] = getelementptr inbounds %struct.Foo, %struct.Foo* %[[O]], i32 0, i32 0
// WINDOWS64: %[[LOAD:[0-9a-zA-Z]+]] = load i32, i32* %[[X]]
// WINDOWS64: %[[ADD:[0-9a-zA-Z]+]] = add nsw i32 %[[LOAD]], 2
// WINDOWS64: ret i32 %[[ADD]]

// WINDOWS64: define dso_local void @"?usage@@YAXXZ"()
// WINDOWS64: %[[F:[0-9a-zA-Z]+]] = alloca %struct.Foo
// WINDOWS64: %[[ARG:[0-9a-zA-Z.]+]] = alloca %struct.Foo
// WINDOWS64: %[[CALL:[0-9a-zA-Z]+]] = call i32 @"?bar@@YAHUFoo@@@Z.resolver"(%struct.Foo* %[[ARG]])

// WINDOWS64: define dso_local i32 @"?bar@@YAHUFoo@@@Z.resolver"(%struct.Foo*)
// WINDOWS64: %[[RET:[0-9a-zA-Z]+]] = musttail call i32 @"?bar@@YAHUFoo@@@Z.arch_ivybridge"(%struct.Foo* %0)
// WINDOWS64-NEXT: ret i32 %[[RET]]
// WINDOWS64: %[[RET:[0-9a-zA-Z]+]] = musttail call i32 @"?bar@@YAHUFoo@@@Z.sse4.2"(%struct.Foo* %0)
// WINDOWS64-NEXT: ret i32 %[[RET]]
// WINDOWS64: %[[RET:[0-9a-zA-Z]+]] = musttail call i32 @"?bar@@YAHUFoo@@@Z"(%struct.Foo* %0)
// WINDOWS64-NEXT: ret i32 %[[RET]]
233 changes: 154 additions & 79 deletions clang/test/CodeGenCXX/attr-target-mv-member-funcs.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS

struct S {
int __attribute__((target("sse4.2"))) foo(int) { return 0; }
Expand Down Expand Up @@ -64,82 +65,156 @@ int templ_use() {
return a.foo(1) + b.foo(2);
}

// CHECK: @_ZN1SaSERKS_.ifunc = ifunc %struct.S* (%struct.S*, %struct.S*), %struct.S* (%struct.S*, %struct.S*)* ()* @_ZN1SaSERKS_.resolver
// CHECK: @_ZNK9ConvertTocv1SEv.ifunc = ifunc void (%struct.ConvertTo*), void (%struct.ConvertTo*)* ()* @_ZNK9ConvertTocv1SEv.resolver
// CHECK: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver
// CHECK: @_ZN2S23fooEi.ifunc = ifunc i32 (%struct.S2*, i32), i32 (%struct.S2*, i32)* ()* @_ZN2S23fooEi.resolver
// LINUX: @_ZN1SaSERKS_.ifunc = ifunc %struct.S* (%struct.S*, %struct.S*), %struct.S* (%struct.S*, %struct.S*)* ()* @_ZN1SaSERKS_.resolver
// LINUX: @_ZNK9ConvertTocv1SEv.ifunc = ifunc void (%struct.ConvertTo*), void (%struct.ConvertTo*)* ()* @_ZNK9ConvertTocv1SEv.resolver
// LINUX: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver
// LINUX: @_ZN2S23fooEi.ifunc = ifunc i32 (%struct.S2*, i32), i32 (%struct.S2*, i32)* ()* @_ZN2S23fooEi.resolver
// Templates:
// CHECK: @_ZN5templIiE3fooEi.ifunc = ifunc i32 (%struct.templ*, i32), i32 (%struct.templ*, i32)* ()* @_ZN5templIiE3fooEi.resolver
// CHECK: @_ZN5templIdE3fooEi.ifunc = ifunc i32 (%struct.templ.0*, i32), i32 (%struct.templ.0*, i32)* ()* @_ZN5templIdE3fooEi.resolver

// CHECK: define i32 @_Z3barv()
// CHECK: %s = alloca %struct.S, align 1
// CHECK: %s2 = alloca %struct.S, align 1
// CHECK: %C = alloca %struct.ConvertTo, align 1
// CHECK: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2
// CHECK: call void @_ZNK9ConvertTocv1SEv.ifunc(%struct.ConvertTo* %C)
// CHECK: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2
// CHECK: call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0)

// CHECK: define %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.resolver() comdat
// CHECK: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.arch_ivybridge
// CHECK: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_

// CHECK: define void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.resolver() comdat
// CHECK: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.arch_ivybridge
// CHECK: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv

// CHECK: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat
// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge
// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge
// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2
// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi

// CHECK: define i32 @_Z4bar2v()
// CHECK:call i32 @_ZN2S23fooEi.ifunc
// define i32 (%struct.S2*, i32)* @_ZN2S23fooEi.resolver() comdat
// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_sandybridge
// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_ivybridge
// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.sse4.2
// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi

// CHECK: define i32 @_ZN2S23fooEi.sse4.2(%struct.S2* %this, i32)
// CHECK: define i32 @_ZN2S23fooEi.arch_ivybridge(%struct.S2* %this, i32)
// CHECK: define i32 @_ZN2S23fooEi(%struct.S2* %this, i32)

// CHECK: define i32 @_Z9templ_usev()
// CHECK: call i32 @_ZN5templIiE3fooEi.ifunc
// CHECK: call i32 @_ZN5templIdE3fooEi.ifunc

// CHECK: define i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.resolver() comdat
// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_sandybridge
// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_ivybridge
// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.sse4.2
// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi

// CHECK: define i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.resolver() comdat
// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_sandybridge
// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_ivybridge
// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.sse4.2
// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi

// CHECK: define linkonce_odr i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32)
// CHECK: ret i32 0

// CHECK: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32)

// CHECK: define linkonce_odr i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32)
// CHECK: ret i32 1

// CHECK: define linkonce_odr i32 @_ZN1S3fooEi(%struct.S* %this, i32)
// CHECK: ret i32 2

// CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi.sse4.2
// CHECK: declare i32 @_ZN5templIiE3fooEi.arch_sandybridge
// CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi.arch_ivybridge
// CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi

// CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi.sse4.2
// CHECK: declare i32 @_ZN5templIdE3fooEi.arch_sandybridge
// CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi.arch_ivybridge
// CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi
// LINUX: @_ZN5templIiE3fooEi.ifunc = ifunc i32 (%struct.templ*, i32), i32 (%struct.templ*, i32)* ()* @_ZN5templIiE3fooEi.resolver
// LINUX: @_ZN5templIdE3fooEi.ifunc = ifunc i32 (%struct.templ.0*, i32), i32 (%struct.templ.0*, i32)* ()* @_ZN5templIdE3fooEi.resolver

// LINUX: define i32 @_Z3barv()
// LINUX: %s = alloca %struct.S, align 1
// LINUX: %s2 = alloca %struct.S, align 1
// LINUX: %C = alloca %struct.ConvertTo, align 1
// LINUX: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2
// LINUX: call void @_ZNK9ConvertTocv1SEv.ifunc(%struct.ConvertTo* %C)
// LINUX: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2
// LINUX: call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0)

// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"()
// WINDOWS: %s = alloca %struct.S, align 1
// WINDOWS: %s2 = alloca %struct.S, align 1
// WINDOWS: %C = alloca %struct.ConvertTo, align 1
// WINDOWS: call dereferenceable(1) %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.resolver"(%struct.S* %s2
// WINDOWS: call void @"??BConvertTo@@QEBA?AUS@@XZ.resolver"(%struct.ConvertTo* %C
// WINDOWS: call dereferenceable(1) %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.resolver"(%struct.S* %s2
// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S* %s, i32 0)

// LINUX: define %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.resolver() comdat
// LINUX: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.arch_ivybridge
// LINUX: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_

// WINDOWS: define dso_local %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.resolver"(%struct.S*, %struct.S*)
// WINDOWS: call %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.arch_ivybridge"
// WINDOWS: call %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z"

// LINUX: define void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.resolver() comdat
// LINUX: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.arch_ivybridge
// LINUX: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv

// WINDOWS: define dso_local void @"??BConvertTo@@QEBA?AUS@@XZ.resolver"(%struct.ConvertTo*, %struct.S*)
// WINDOWS: call void @"??BConvertTo@@QEBA?AUS@@XZ.arch_ivybridge"
// WINDOWS: call void @"??BConvertTo@@QEBA?AUS@@XZ"

// LINUX: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat
// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge
// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge
// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2
// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi

// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S*, i32)
// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge"
// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge"
// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.sse4.2"
// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z"

// LINUX: define i32 @_Z4bar2v()
// LINUX: call i32 @_ZN2S23fooEi.ifunc

// WINDOWS: define dso_local i32 @"?bar2@@YAHXZ"()
// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.resolver"

// LINUX: define i32 (%struct.S2*, i32)* @_ZN2S23fooEi.resolver() comdat
// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_sandybridge
// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_ivybridge
// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.sse4.2
// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi

// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z.resolver"(%struct.S2*, i32)
// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.arch_sandybridge"
// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.arch_ivybridge"
// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.sse4.2"
// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z"

// LINUX: define i32 @_ZN2S23fooEi.sse4.2(%struct.S2* %this, i32)
// LINUX: define i32 @_ZN2S23fooEi.arch_ivybridge(%struct.S2* %this, i32)
// LINUX: define i32 @_ZN2S23fooEi(%struct.S2* %this, i32)

// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z.sse4.2"(%struct.S2* %this, i32)
// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z.arch_ivybridge"(%struct.S2* %this, i32)
// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z"(%struct.S2* %this, i32)

// LINUX: define i32 @_Z9templ_usev()
// LINUX: call i32 @_ZN5templIiE3fooEi.ifunc
// LINUX: call i32 @_ZN5templIdE3fooEi.ifunc

// WINDOWS: define dso_local i32 @"?templ_use@@YAHXZ"()
// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.resolver"
// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.resolver"

// LINUX: define i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.resolver() comdat
// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_sandybridge
// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_ivybridge
// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.sse4.2
// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi

// WINDOWS: define dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.resolver"(%struct.templ*, i32)
// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_sandybridge"
// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_ivybridge"
// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.sse4.2"
// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z"

// LINUX: define i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.resolver() comdat
// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_sandybridge
// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_ivybridge
// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.sse4.2
// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi

// WINDOWS: define dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.resolver"(%struct.templ.0*, i32) comdat
// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_sandybridge"
// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_ivybridge"
// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.sse4.2"
// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z"

// LINUX: define linkonce_odr i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32)
// LINUX: ret i32 0

// WINDOWS: define linkonce_odr dso_local i32 @"?foo@S@@QEAAHH@Z.sse4.2"(%struct.S* %this, i32)
// WINDOWS: ret i32 0

// LINUX: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32)

// WINDOWS: declare dso_local i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge"(%struct.S*, i32)

// LINUX: define linkonce_odr i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32)
// LINUX: ret i32 1

// WINDOWS: define linkonce_odr dso_local i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge"(%struct.S* %this, i32)
// WINDOWS: ret i32 1

// LINUX: define linkonce_odr i32 @_ZN1S3fooEi(%struct.S* %this, i32)
// LINUX: ret i32 2

// WINDOWS: define linkonce_odr dso_local i32 @"?foo@S@@QEAAHH@Z"(%struct.S* %this, i32)
// WINDOWS: ret i32 2

// LINUX: define linkonce_odr i32 @_ZN5templIiE3fooEi.sse4.2
// LINUX: declare i32 @_ZN5templIiE3fooEi.arch_sandybridge
// LINUX: define linkonce_odr i32 @_ZN5templIiE3fooEi.arch_ivybridge
// LINUX: define linkonce_odr i32 @_ZN5templIiE3fooEi

// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.sse4.2"
// WINDOWS: declare dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_sandybridge"
// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_ivybridge"
// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z"

// LINUX: define linkonce_odr i32 @_ZN5templIdE3fooEi.sse4.2
// LINUX: declare i32 @_ZN5templIdE3fooEi.arch_sandybridge
// LINUX: define linkonce_odr i32 @_ZN5templIdE3fooEi.arch_ivybridge
// LINUX: define linkonce_odr i32 @_ZN5templIdE3fooEi

// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.sse4.2"
// WINDOWS: declare dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_sandybridge"
// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_ivybridge"
// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z"
56 changes: 39 additions & 17 deletions clang/test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
struct S {
int __attribute__((target("sse4.2"))) foo(int);
int __attribute__((target("arch=sandybridge"))) foo(int);
Expand All @@ -15,25 +16,46 @@ int bar() {
return s.foo(0);
}

// CHECK: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver
// LINUX: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver

// CHECK: define i32 @_ZN1S3fooEi(%struct.S* %this, i32)
// CHECK: ret i32 2
// LINUX: define i32 @_ZN1S3fooEi(%struct.S* %this, i32)
// LINUX: ret i32 2

// CHECK: define i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32)
// CHECK: ret i32 0
// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z"(%struct.S* %this, i32)
// WINDOWS: ret i32 2

// CHECK: define i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32)
// CHECK: ret i32 1
// LINUX: define i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32)
// LINUX: ret i32 0

// CHECK: define i32 @_Z3barv()
// CHECK: %s = alloca %struct.S, align 1
// CHECK: %call = call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0)
// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.sse4.2"(%struct.S* %this, i32)
// WINDOWS: ret i32 0

// CHECK: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat
// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge
// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge
// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2
// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi
// LINUX: define i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32)
// LINUX: ret i32 1

// CHECK: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32)
// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge"(%struct.S* %this, i32)
// WINDOWS: ret i32 1

// LINUX: define i32 @_Z3barv()
// LINUX: %s = alloca %struct.S, align 1
// LINUX: %call = call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0)

// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"()
// WINDOWS: %s = alloca %struct.S, align 1
// WINDOWS: %call = call i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S* %s, i32 0)

// LINUX: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat
// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge
// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge
// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2
// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi

// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S*, i32) comdat
// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge"(%struct.S* %0, i32 %1)
// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge"(%struct.S* %0, i32 %1)
// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.sse4.2"(%struct.S* %0, i32 %1)
// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z"(%struct.S* %0, i32 %1)

// LINUX: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32)

// WINDOWS: declare dso_local i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge"(%struct.S*, i32)
104 changes: 68 additions & 36 deletions clang/test/CodeGenCXX/attr-target-mv-overloads.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS

int __attribute__((target("sse4.2"))) foo_overload(int) { return 0; }
int __attribute__((target("arch=sandybridge"))) foo_overload(int);
Expand All @@ -13,38 +14,69 @@ int bar2() {
return foo_overload() + foo_overload(1);
}

// CHECK: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver
// CHECK: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver


// CHECK: define i32 @_Z12foo_overloadi.sse4.2(i32)
// CHECK: ret i32 0
// CHECK: define i32 @_Z12foo_overloadi.arch_ivybridge(i32)
// CHECK: ret i32 1
// CHECK: define i32 @_Z12foo_overloadi(i32)
// CHECK: ret i32 2
// CHECK: define i32 @_Z12foo_overloadv.sse4.2()
// CHECK: ret i32 0
// CHECK: define i32 @_Z12foo_overloadv.arch_ivybridge()
// CHECK: ret i32 1
// CHECK: define i32 @_Z12foo_overloadv()
// CHECK: ret i32 2

// CHECK: define i32 @_Z4bar2v()
// CHECK: call i32 @_Z12foo_overloadv.ifunc()
// CHECK: call i32 @_Z12foo_overloadi.ifunc(i32 1)

// CHECK: define i32 ()* @_Z12foo_overloadv.resolver() comdat
// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge
// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge
// CHECK: ret i32 ()* @_Z12foo_overloadv.sse4.2
// CHECK: ret i32 ()* @_Z12foo_overloadv

// CHECK: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat
// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge
// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge
// CHECK: ret i32 (i32)* @_Z12foo_overloadi.sse4.2
// CHECK: ret i32 (i32)* @_Z12foo_overloadi

// CHECK: declare i32 @_Z12foo_overloadv.arch_sandybridge()
// CHECK: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32)
// LINUX: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver
// LINUX: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver

// LINUX: define i32 @_Z12foo_overloadi.sse4.2(i32)
// LINUX: ret i32 0
// LINUX: define i32 @_Z12foo_overloadi.arch_ivybridge(i32)
// LINUX: ret i32 1
// LINUX: define i32 @_Z12foo_overloadi(i32)
// LINUX: ret i32 2
// LINUX: define i32 @_Z12foo_overloadv.sse4.2()
// LINUX: ret i32 0
// LINUX: define i32 @_Z12foo_overloadv.arch_ivybridge()
// LINUX: ret i32 1
// LINUX: define i32 @_Z12foo_overloadv()
// LINUX: ret i32 2

// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.sse4.2"(i32)
// WINDOWS: ret i32 0
// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"(i32)
// WINDOWS: ret i32 1
// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z"(i32)
// WINDOWS: ret i32 2
// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.sse4.2"()
// WINDOWS: ret i32 0
// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.arch_ivybridge"()
// WINDOWS: ret i32 1
// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ"()
// WINDOWS: ret i32 2

// LINUX: define i32 @_Z4bar2v()
// LINUX: call i32 @_Z12foo_overloadv.ifunc()
// LINUX: call i32 @_Z12foo_overloadi.ifunc(i32 1)

// WINDOWS: define dso_local i32 @"?bar2@@YAHXZ"()
// WINDOWS: call i32 @"?foo_overload@@YAHXZ.resolver"()
// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.resolver"(i32 1)

// LINUX: define i32 ()* @_Z12foo_overloadv.resolver() comdat
// LINUX: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge
// LINUX: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge
// LINUX: ret i32 ()* @_Z12foo_overloadv.sse4.2
// LINUX: ret i32 ()* @_Z12foo_overloadv

// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.resolver"() comdat
// WINDOWS: call i32 @"?foo_overload@@YAHXZ.arch_sandybridge"
// WINDOWS: call i32 @"?foo_overload@@YAHXZ.arch_ivybridge"
// WINDOWS: call i32 @"?foo_overload@@YAHXZ.sse4.2"
// WINDOWS: call i32 @"?foo_overload@@YAHXZ"

// LINUX: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat
// LINUX: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge
// LINUX: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge
// LINUX: ret i32 (i32)* @_Z12foo_overloadi.sse4.2
// LINUX: ret i32 (i32)* @_Z12foo_overloadi

// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.resolver"(i32) comdat
// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_sandybridge"
// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"
// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.sse4.2"
// WINDOWS: call i32 @"?foo_overload@@YAHH@Z"

// LINUX: declare i32 @_Z12foo_overloadv.arch_sandybridge()
// LINUX: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32)

// WINDOWS: declare dso_local i32 @"?foo_overload@@YAHXZ.arch_sandybridge"()
// WINDOWS: declare dso_local i32 @"?foo_overload@@YAHH@Z.arch_sandybridge"(i32)
1 change: 0 additions & 1 deletion clang/test/Sema/attr-target-mv-bad-target.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
// RUN: %clang_cc1 -triple x86_64-windows-pc -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple arm-none-eabi -fsyntax-only -verify %s

int __attribute__((target("sse4.2"))) redecl1(void) { return 1; }
Expand Down