Skip to content

Commit

Permalink
[clang] Make -masm=intel affect inline asm style
Browse files Browse the repository at this point in the history
With this,

  void f() {  __asm__("mov eax, ebx"); }

now compiles with clang with -masm=intel.

This matches gcc.

The flag is not accepted in clang-cl mode. It has no effect on
MSVC-style `__asm {}` blocks, which are unconditionally in intel
mode both before and after this change.

One difference to gcc is that in clang, inline asm strings are
"local" while they're "global" in gcc. Building the following with
-masm=intel works with clang, but not with gcc where the ".att_syntax"
from the 2nd __asm__() is in effect until file end (or until a
".intel_syntax" somewhere later in the file):

  __asm__("mov eax, ebx");
  __asm__(".att_syntax\nmovl %ebx, %eax");
  __asm__("mov eax, ebx");

This also updates clang's intrinsic headers to work both in
-masm=att (the default) and -masm=intel modes.
The official solution for this according to "Multiple assembler dialects in asm
templates" in gcc docs->Extensions->Inline Assembly->Extended Asm
is to write every inline asm snippet twice:

    bt{l %[Offset],%[Base] | %[Base],%[Offset]}

This works in LLVM after D113932 and D113894, so use that.

(Just putting `.att_syntax` at the start of the snippet works in some but not
all cases: When LLVM interpolates in parameters like `%0`, it uses at&t or
intel syntax according to the inline asm snippet's flavor, so the `.att_syntax`
within the snippet happens to late: The interpolated-in parameter is already
in intel style, and then won't parse in the switched `.att_syntax`.)

It might be nice to invent a `#pragma clang asm_dialect push "att"` /
`#pragma clang asm_dialect pop` to be able to force asm style per snippet,
so that the inline asm string doesn't contain the same code in two variants,
but let's leave that for a follow-up.

Fixes PR21401 and PR20241.

Differential Revision: https://reviews.llvm.org/D113707
  • Loading branch information
nico committed Nov 17, 2021
1 parent 68311f2 commit ae98182
Show file tree
Hide file tree
Showing 14 changed files with 163 additions and 36 deletions.
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/CodeGenOptions.def
Expand Up @@ -92,6 +92,8 @@ CODEGENOPT(EmulatedTLS , 1, 0) ///< Set by default or -f[no-]emulated-tls.
CODEGENOPT(ExplicitEmulatedTLS , 1, 0) ///< Set if -f[no-]emulated-tls is used.
/// Embed Bitcode mode (off/all/bitcode/marker).
ENUM_CODEGENOPT(EmbedBitcode, EmbedBitcodeKind, 2, Embed_Off)
/// Inline asm dialect, -masm=(att|intel)
ENUM_CODEGENOPT(InlineAsmDialect, InlineAsmDialectKind, 1, IAD_ATT)
CODEGENOPT(ForbidGuardVariables , 1, 0) ///< Issue errors if C++ guard variables
///< are required.
CODEGENOPT(FunctionSections , 1, 0) ///< Set when -ffunction-sections is enabled.
Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/CodeGenOptions.h
Expand Up @@ -97,6 +97,11 @@ class CodeGenOptions : public CodeGenOptionsBase {
Embed_Marker // Embed a marker as a placeholder for bitcode.
};

enum InlineAsmDialectKind {
IAD_ATT,
IAD_Intel,
};

// This field stores one of the allowed values for the option
// -fbasic-block-sections=. The allowed values with this option are:
// {"labels", "all", "list=<file>", "none"}.
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Driver/Options.td
Expand Up @@ -3131,6 +3131,7 @@ def mwatchos_simulator_version_min_EQ : Joined<["-"], "mwatchos-simulator-versio
def mwatchsimulator_version_min_EQ : Joined<["-"], "mwatchsimulator-version-min=">, Alias<mwatchos_simulator_version_min_EQ>;
def march_EQ : Joined<["-"], "march=">, Group<m_Group>, Flags<[CoreOption]>;
def masm_EQ : Joined<["-"], "masm=">, Group<m_Group>, Flags<[NoXarchOption]>;
def inline_asm_EQ : Joined<["-"], "inline-asm=">, Group<m_Group>, Flags<[CC1Option]>;
def mcmodel_EQ : Joined<["-"], "mcmodel=">, Group<m_Group>, Flags<[CC1Option]>,
MarshallingInfoString<TargetOpts<"CodeModel">, [{"default"}]>;
def mtls_size_EQ : Joined<["-"], "mtls-size=">, Group<m_Group>, Flags<[NoXarchOption, CC1Option]>,
Expand Down
8 changes: 7 additions & 1 deletion clang/lib/CodeGen/CGStmt.cpp
Expand Up @@ -2629,8 +2629,14 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
llvm::FunctionType::get(ResultType, ArgTypes, false);

bool HasSideEffect = S.isVolatile() || S.getNumOutputs() == 0;

llvm::InlineAsm::AsmDialect GnuAsmDialect =
CGM.getCodeGenOpts().getInlineAsmDialect() == CodeGenOptions::IAD_ATT
? llvm::InlineAsm::AD_ATT
: llvm::InlineAsm::AD_Intel;
llvm::InlineAsm::AsmDialect AsmDialect = isa<MSAsmStmt>(&S) ?
llvm::InlineAsm::AD_Intel : llvm::InlineAsm::AD_ATT;
llvm::InlineAsm::AD_Intel : GnuAsmDialect;

llvm::InlineAsm *IA = llvm::InlineAsm::get(
FTy, AsmString, Constraints, HasSideEffect,
/* IsAlignStack */ false, AsmDialect, HasUnwindClobber);
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Expand Up @@ -2200,6 +2200,7 @@ void Clang::AddX86TargetArgs(const ArgList &Args,
if (Value == "intel" || Value == "att") {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString("-x86-asm-syntax=" + Value));
CmdArgs.push_back(Args.MakeArgString("-inline-asm=" + Value));
} else {
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Value;
Expand Down
12 changes: 12 additions & 0 deletions clang/lib/Frontend/CompilerInvocation.cpp
Expand Up @@ -1614,6 +1614,18 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
}
}

if (Arg *A = Args.getLastArg(options::OPT_inline_asm_EQ)) {
StringRef Value = A->getValue();
if (Value == "att") {
Opts.InlineAsmDialect = CodeGenOptions::IAD_ATT;
} else if (Value == "intel") {
Opts.InlineAsmDialect = CodeGenOptions::IAD_Intel;
} else {
Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
<< A->getValue();
}
}

// PIC defaults to -fno-direct-access-external-data while non-PIC defaults to
// -fdirect-access-external-data.
Opts.DirectAccessExternalData =
Expand Down
16 changes: 8 additions & 8 deletions clang/lib/Headers/immintrin.h
Expand Up @@ -543,27 +543,27 @@ extern "C" {
#if defined(__i386__) || defined(__x86_64__)
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
__asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
__asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
: "+r" (_Value), "+m" (*_Target) :: "memory");
return _Value;
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
__asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
__asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
: "+r" (_Value), "+m" (*_Target) :: "memory");
return _Value;
}
#endif
#if defined(__x86_64__)
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
__asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
__asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
: "+r" (_Value), "+m" (*_Target) :: "memory");
return _Value;
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
__asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
__asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
: "+r" (_Value), "+m" (*_Target) :: "memory");
return _Value;
}
Expand All @@ -575,15 +575,15 @@ _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
long _Exchange, long _Comparand) {
__asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
__asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
: "+a" (_Comparand), "+m" (*_Destination)
: "r" (_Exchange) : "memory");
return _Comparand;
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
long _Exchange, long _Comparand) {
__asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
__asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
: "+a" (_Comparand), "+m" (*_Destination)
: "r" (_Exchange) : "memory");
return _Comparand;
Expand All @@ -593,15 +593,15 @@ _InterlockedCompareExchange_HLERelease(long volatile *_Destination,
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand) {
__asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
__asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
: "+a" (_Comparand), "+m" (*_Destination)
: "r" (_Exchange) : "memory");
return _Comparand;
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand) {
__asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
__asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
: "+a" (_Comparand), "+m" (*_Destination)
: "r" (_Exchange) : "memory");
return _Comparand;
Expand Down
28 changes: 19 additions & 9 deletions clang/lib/Headers/intrin.h
Expand Up @@ -455,7 +455,9 @@ static __inline__ void __DEFAULT_FN_ATTRS __movsb(unsigned char *__dst,
:
: "memory");
#else
__asm__ __volatile__("xchg %%esi, %1\nrep movsb\nxchg %%esi, %1"
__asm__ __volatile__("xchg {%%esi, %1|%1, esi}\n"
"rep movsb\n"
"xchg {%%esi, %1|%1, esi}"
: "+D"(__dst), "+r"(__src), "+c"(__n)
:
: "memory");
Expand All @@ -465,12 +467,14 @@ static __inline__ void __DEFAULT_FN_ATTRS __movsd(unsigned long *__dst,
unsigned long const *__src,
size_t __n) {
#if defined(__x86_64__)
__asm__ __volatile__("rep movsl"
__asm__ __volatile__("rep movs{l|d}"
: "+D"(__dst), "+S"(__src), "+c"(__n)
:
: "memory");
#else
__asm__ __volatile__("xchg %%esi, %1\nrep movsl\nxchg %%esi, %1"
__asm__ __volatile__("xchg {%%esi, %1|%1, esi}\n"
"rep movs{l|d}\n"
"xchg {%%esi, %1|%1, esi}"
: "+D"(__dst), "+r"(__src), "+c"(__n)
:
: "memory");
Expand All @@ -485,7 +489,9 @@ static __inline__ void __DEFAULT_FN_ATTRS __movsw(unsigned short *__dst,
:
: "memory");
#else
__asm__ __volatile__("xchg %%esi, %1\nrep movsw\nxchg %%esi, %1"
__asm__ __volatile__("xchg {%%esi, %1|%1, esi}\n"
"rep movsw\n"
"xchg {%%esi, %1|%1, esi}"
: "+D"(__dst), "+r"(__src), "+c"(__n)
:
: "memory");
Expand All @@ -494,7 +500,7 @@ static __inline__ void __DEFAULT_FN_ATTRS __movsw(unsigned short *__dst,
static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst,
unsigned long __x,
size_t __n) {
__asm__ __volatile__("rep stosl"
__asm__ __volatile__("rep stos{l|d}"
: "+D"(__dst), "+c"(__n)
: "a"(__x)
: "memory");
Expand Down Expand Up @@ -536,9 +542,9 @@ static __inline__ void __DEFAULT_FN_ATTRS __stosq(unsigned __int64 *__dst,
#else
/* x86-64 uses %rbx as the base register, so preserve it. */
#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \
__asm("xchgq %%rbx,%q1\n" \
__asm("xchg{q} {%%rbx, %q1|%q1, rbx}\n" \
"cpuid\n" \
"xchgq %%rbx,%q1" \
"xchg{q} {%%rbx, %q1|%q1, rbx}" \
: "=a"(__eax), "=r"(__ebx), "=c"(__ecx), "=d"(__edx) \
: "0"(__leaf), "2"(__count))
#endif
Expand Down Expand Up @@ -598,13 +604,17 @@ __readmsr(unsigned long __register) {

static __inline__ unsigned __LPTRINT_TYPE__ __DEFAULT_FN_ATTRS __readcr3(void) {
unsigned __LPTRINT_TYPE__ __cr3_val;
__asm__ __volatile__ ("mov %%cr3, %0" : "=r"(__cr3_val) : : "memory");
__asm__ __volatile__(
"mov {%%cr3, %0|%0, cr3}"
: "=r"(__cr3_val)
:
: "memory");
return __cr3_val;
}

static __inline__ void __DEFAULT_FN_ATTRS
__writecr3(unsigned __INTPTR_TYPE__ __cr3_val) {
__asm__ ("mov %0, %%cr3" : : "r"(__cr3_val) : "memory");
__asm__ ("mov {%0, %%cr3|cr3, %0}" : : "r"(__cr3_val) : "memory");
}

#ifdef __cplusplus
Expand Down
6 changes: 4 additions & 2 deletions clang/lib/Headers/x86gprintrin.h
Expand Up @@ -26,8 +26,10 @@
#endif

#define __SSC_MARK(Tag) \
__asm__ __volatile__("movl %%ebx, %%eax; movl %0, %%ebx; .byte 0x64, 0x67, " \
"0x90; movl %%eax, %%ebx;" ::"i"(Tag) \
__asm__ __volatile__("mov{l} {%%ebx, %%eax|eax, ebx}; " \
"mov{l} {%0, %%ebx|ebx, %0}; " \
".byte 0x64, 0x67, 0x90; " \
"mov{l} {%%eax, %%ebx|ebx, eax};" ::"i"(Tag) \
: "%eax");

#endif /* __X86GPRINTRIN_H */
82 changes: 82 additions & 0 deletions clang/test/CodeGen/inline-asm-intel.c
@@ -0,0 +1,82 @@
// REQUIRES: x86-registered-target

/// Accept intel inline asm but write it out as att:
// RUN: %clang_cc1 -Werror -target-feature +hreset -target-feature +pconfig -target-feature +sgx -ffreestanding -triple i386-unknown-unknown -mllvm -x86-asm-syntax=att -inline-asm=intel -O0 -S %s -o - | FileCheck --check-prefix=ATT %s
// RUN: %clang_cc1 -Werror -target-feature +hreset -target-feature +pconfig -target-feature +sgx -ffreestanding -triple x86_64-unknown-unknown -mllvm -x86-asm-syntax=att -inline-asm=intel -O0 -S %s -o - | FileCheck --check-prefix=ATT %s

/// Accept intel inline asm and write it out as intel:
// RUN: %clang_cc1 -Werror -target-feature +hreset -target-feature +pconfig -target-feature +sgx -ffreestanding -triple i386-unknown-unknown -mllvm -x86-asm-syntax=intel -inline-asm=intel -O0 -S %s -o - | FileCheck --check-prefix=INTEL %s
// RUN: %clang_cc1 -Werror -target-feature +hreset -target-feature +pconfig -target-feature +sgx -ffreestanding -triple x86_64-unknown-unknown -mllvm -x86-asm-syntax=intel -inline-asm=intel -O0 -S %s -o - | FileCheck --check-prefix=INTEL %s

// RUN: %clang_cc1 -Werror -target-feature +hreset -target-feature +pconfig -target-feature +sgx -ffreestanding -triple i386-pc-win32 -mllvm -x86-asm-syntax=intel -inline-asm=intel -O0 -S %s -o - -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 | FileCheck --check-prefix=INTEL %s
// RUN: %clang_cc1 -Werror -target-feature +hreset -target-feature +pconfig -target-feature +sgx -ffreestanding -triple x86_64-pc-win32 -mllvm -x86-asm-syntax=intel -inline-asm=intel -O0 -S %s -o - -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 | FileCheck --check-prefix=INTEL %s

// Test that intrinsics headers still work with -masm=intel.
#ifdef _MSC_VER
#include <intrin.h>
#else
#include <x86intrin.h>
#endif

void f() {
// Intrinsic headers contain macros and inline functions.
// Inline assembly in both are checked only when they are
// referenced, so reference a few intrinsics here.
__SSC_MARK(4);
int a;
_hreset(a);
_pconfig_u32(0, (void*)0);

_encls_u32(0, (void*)0);
_enclu_u32(0, (void*)0);
_enclv_u32(0, (void*)0);
#ifdef _MSC_VER
__movsb((void*)0, (void*)0, 0);
__movsd((void*)0, (void*)0, 0);
__movsw((void*)0, (void*)0, 0);
__stosb((void*)0, 0, 0);
__stosd((void*)0, 0, 0);
__stosw((void*)0, 0, 0);
#ifdef __x86_64__
__movsq((void*)0, (void*)0, 0);
__stosq((void*)0, 0, 0);
#endif
__cpuid((void*)0, 0);
__cpuidex((void*)0, 0, 0);
__halt();
__nop();
__readmsr(0);
__readcr3();
__writecr3(0);

_InterlockedExchange_HLEAcquire((void*)0, 0);
_InterlockedExchange_HLERelease((void*)0, 0);
_InterlockedCompareExchange_HLEAcquire((void*)0, 0, 0);
_InterlockedCompareExchange_HLERelease((void*)0, 0, 0);
#ifdef __x86_64__
_InterlockedExchange64_HLEAcquire((void*)0, 0);
_InterlockedExchange64_HLERelease((void*)0, 0);
_InterlockedCompareExchange64_HLEAcquire((void*)0, 0, 0);
_InterlockedCompareExchange64_HLERelease((void*)0, 0, 0);
#endif
#endif


__asm__("mov eax, ebx");
// ATT: movl %ebx, %eax
// INTEL: mov eax, ebx

// Explicitly overriding asm style per block works:
__asm__(".att_syntax\nmovl %ebx, %eax");
// ATT: movl %ebx, %eax
// INTEL: mov eax, ebx

// The .att_syntax was only scoped to the previous statement.
// (This is different from gcc, where `.att_syntax` is in
// effect from that point on, so portable code would want an
// explicit `.intel_syntax noprefix\n` at the start of this string).
__asm__("mov eax, ebx");
// ATT: movl %ebx, %eax
// INTEL: mov eax, ebx
}

5 changes: 4 additions & 1 deletion clang/test/CodeGen/inline-asm-mixed-style.c
@@ -1,6 +1,9 @@
// RUN: %clang_cc1 -triple i386-unknown-unknown -fasm-blocks -O0 -emit-llvm -S %s -o - | FileCheck %s
// RUN: %clang_cc1 -ffreestanding -triple i386-unknown-unknown -fasm-blocks -O0 -emit-llvm -S %s -o - | FileCheck %s
// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-unknown -fasm-blocks -O0 -emit-llvm -S %s -o - | FileCheck %s
// REQUIRES: x86-registered-target

#include <immintrin.h>

void f() {
__asm mov eax, ebx
__asm mov ebx, ecx
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGen/ms-intrinsics-cpuid.c
Expand Up @@ -18,6 +18,6 @@ void test__cpuid(int *info, int level) {
// X86-SAME: (i32 %{{.*}}, i32 0)

// X64-LABEL: define {{.*}} @test__cpuid(i32* %{{.*}}, i32 %{{.*}})
// X64: call { i32, i32, i32, i32 } asm "xchgq %rbx{{.*}}cpuid{{.*}}xchgq %rbx{{.*}}",
// X64: call { i32, i32, i32, i32 } asm "xchg$(q$) $(%rbx{{.*}}$){{.*}}cpuid{{.*}}xchg$(q$) $(%rbx{{.*}}$)",
// X64-SAME: "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"
// X64-SAME: (i32 %{{.*}}, i32 0)

0 comments on commit ae98182

Please sign in to comment.