Skip to content

Commit

Permalink
x86: Add indirect branch tracking support (#540)
Browse files Browse the repository at this point in the history
Intel Control-flow Enforcement Technology (CET):

https://software.intel.com/en-us/articles/intel-sdm

contains shadow stack (SHSTK) and indirect branch tracking (IBT).  When
CET is enabled, ELF object files must be marked with .note.gnu.property
section.  When Intel CET is enabled, include <cet.h> in assembly codes
to mark Intel CET support.

Also when IBT is enabled, all indirect branch targets must start with
ENDBR instruction and notrack prefix can be used to disable IBT on
indirect branch.  <cet.h> defines _CET_ENDBR which can be used in
assembly codes for ENDBR instruction.  If <cet.h> isn't included,
define _CET_ENDBR as empty so that _CET_ENDBR can be used in assembly
codes.

Trampoline must be enlarged to add ENDBR instruction unconditionally,
which is NOP on non-CET processors.  This is required regardless if
libffi is enabled with CET since libffi.so will be marked in legacy
bitmap, but trampoline won't.  Update library version for larger
FFI_TRAMPOLINE_SIZE.

This fixed:

#474

Tested with

$ CC="gcc -Wl,-z,cet-report=error -fcf-protection" CXX="g++ -Wl,-z,cet-report=error -fcf-protection" .../configure

on Linux CET machines in i686, x32 and x86-64 modes.
  • Loading branch information
hjl-tools committed Feb 22, 2020
1 parent 4d6d286 commit 7855656
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 31 deletions.
3 changes: 2 additions & 1 deletion Makefile.am
Expand Up @@ -144,7 +144,8 @@ endif
libffi_version_info = -version-info `grep -v '^\#' $(srcdir)/libtool-version`

libffi.map: $(top_srcdir)/libffi.map.in
$(COMPILE) -D$(TARGET) -E -x assembler-with-cpp -o $@ $<
$(COMPILE) -D$(TARGET) -DGENERATE_LIBFFI_MAP \
-E -x assembler-with-cpp -o $@ $<

libffi_la_LDFLAGS = -no-undefined $(libffi_version_info) $(libffi_version_script) $(LTLDFLAGS) $(AM_LTLDFLAGS)
libffi_la_DEPENDENCIES = $(libffi_la_LIBADD) $(libffi_version_dep)
Expand Down
2 changes: 1 addition & 1 deletion libtool-version
Expand Up @@ -26,4 +26,4 @@
# release, then set age to 0.
#
# CURRENT:REVISION:AGE
8:0:1
9:0:1
11 changes: 7 additions & 4 deletions src/x86/ffi.c
Expand Up @@ -557,13 +557,16 @@ ffi_prep_closure_loc (ffi_closure* closure,
return FFI_BAD_ABI;
}

/* endbr32. */
*(UINT32 *) tramp = 0xfb1e0ff3;

/* movl or pushl immediate. */
tramp[0] = op;
*(void **)(tramp + 1) = codeloc;
tramp[4] = op;
*(void **)(tramp + 5) = codeloc;

/* jmp dest */
tramp[5] = 0xe9;
*(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10);
tramp[9] = 0xe9;
*(unsigned *)(tramp + 10) = (unsigned)dest - ((unsigned)codeloc + 10);

closure->cif = cif;
closure->fun = fun;
Expand Down
18 changes: 10 additions & 8 deletions src/x86/ffi64.c
Expand Up @@ -728,13 +728,15 @@ ffi_prep_closure_loc (ffi_closure* closure,
void *user_data,
void *codeloc)
{
static const unsigned char trampoline[16] = {
/* leaq -0x7(%rip),%r10 # 0x0 */
0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
/* jmpq *0x3(%rip) # 0x10 */
0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
/* nopl (%rax) */
0x0f, 0x1f, 0x00
static const unsigned char trampoline[24] = {
/* endbr64 */
0xf3, 0x0f, 0x1e, 0xfa,
/* leaq -0xb(%rip),%r10 # 0x0 */
0x4c, 0x8d, 0x15, 0xf5, 0xff, 0xff, 0xff,
/* jmpq *0x7(%rip) # 0x18 */
0xff, 0x25, 0x07, 0x00, 0x00, 0x00,
/* nopl 0(%rax) */
0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
};
void (*dest)(void);
char *tramp = closure->tramp;
Expand All @@ -752,7 +754,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
dest = ffi_closure_unix64;

memcpy (tramp, trampoline, sizeof(trampoline));
*(UINT64 *)(tramp + 16) = (uintptr_t)dest;
*(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)dest;

closure->cif = cif;
closure->fun = fun;
Expand Down
17 changes: 15 additions & 2 deletions src/x86/ffitarget.h
Expand Up @@ -136,12 +136,25 @@ typedef enum ffi_abi {

#if defined (X86_64) || defined(X86_WIN64) \
|| (defined (__x86_64__) && defined (X86_DARWIN))
# define FFI_TRAMPOLINE_SIZE 24
/* 4 bytes of ENDBR64 + 7 bytes of LEA + 6 bytes of JMP + 7 bytes of NOP
+ 8 bytes of pointer. */
# define FFI_TRAMPOLINE_SIZE 32
# define FFI_NATIVE_RAW_API 0
#else
# define FFI_TRAMPOLINE_SIZE 12
/* 4 bytes of ENDBR32 + 5 bytes of MOV + 5 bytes of JMP + 2 unused
bytes. */
# define FFI_TRAMPOLINE_SIZE 16
# define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */
#endif

#if !defined(GENERATE_LIBFFI_MAP) && defined(__ASSEMBLER__) \
&& defined(__CET__)
# include <cet.h>
# define _CET_NOTRACK notrack
#else
# define _CET_ENDBR
# define _CET_NOTRACK
#endif

#endif

18 changes: 10 additions & 8 deletions src/x86/ffiw64.c
Expand Up @@ -196,13 +196,15 @@ EFI64(ffi_prep_closure_loc)(ffi_closure* closure,
void *user_data,
void *codeloc)
{
static const unsigned char trampoline[16] = {
/* leaq -0x7(%rip),%r10 # 0x0 */
0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
/* jmpq *0x3(%rip) # 0x10 */
0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
/* nopl (%rax) */
0x0f, 0x1f, 0x00
static const unsigned char trampoline[FFI_TRAMPOLINE_SIZE - 8] = {
/* endbr64 */
0xf3, 0x0f, 0x1e, 0xfa,
/* leaq -0xb(%rip),%r10 # 0x0 */
0x4c, 0x8d, 0x15, 0xf5, 0xff, 0xff, 0xff,
/* jmpq *0x7(%rip) # 0x18 */
0xff, 0x25, 0x07, 0x00, 0x00, 0x00,
/* nopl 0(%rax) */
0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
};
char *tramp = closure->tramp;

Expand All @@ -216,7 +218,7 @@ EFI64(ffi_prep_closure_loc)(ffi_closure* closure,
}

memcpy (tramp, trampoline, sizeof(trampoline));
*(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64;
*(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)ffi_closure_win64;

closure->cif = cif;
closure->fun = fun;
Expand Down
17 changes: 13 additions & 4 deletions src/x86/sysv.S
Expand Up @@ -92,6 +92,7 @@
ffi_call_i386:
L(UW0):
# cfi_startproc
_CET_ENDBR
#if !HAVE_FASTCALL
movl 4(%esp), %ecx
movl 8(%esp), %edx
Expand Down Expand Up @@ -133,7 +134,7 @@ L(pc1):
leal L(store_table)(,%ecx, 8), %ebx
#endif
movl 16(%ebp), %ecx /* load result address */
jmp *%ebx
_CET_NOTRACK jmp *%ebx

.balign 8
L(store_table):
Expand Down Expand Up @@ -256,7 +257,7 @@ ENDF(ffi_call_i386)
andl $X86_RET_TYPE_MASK, %eax; \
leal L(C1(load_table,N))(, %eax, 8), %edx; \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
jmp *%edx
_CET_NOTRACK jmp *%edx

#ifdef __PIC__
# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
Expand All @@ -267,7 +268,7 @@ ENDF(ffi_call_i386)
L(C1(pc,N)): \
leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx; \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
jmp *%edx
_CET_NOTRACK jmp *%edx
# else
# define FFI_CLOSURE_CALL_INNER_SAVE_EBX
# undef FFI_CLOSURE_CALL_INNER
Expand All @@ -286,7 +287,7 @@ L(C1(UW,UWN)): \
L(C1(UW,UWN)): \
/* cfi_restore(%ebx); */ \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
jmp *%edx
_CET_NOTRACK jmp *%edx
# endif /* DARWIN || HIDDEN */
#endif /* __PIC__ */

Expand All @@ -296,6 +297,7 @@ L(C1(UW,UWN)): \
C(ffi_go_closure_EAX):
L(UW6):
# cfi_startproc
_CET_ENDBR
subl $closure_FS, %esp
L(UW7):
# cfi_def_cfa_offset(closure_FS + 4)
Expand All @@ -316,6 +318,7 @@ ENDF(C(ffi_go_closure_EAX))
C(ffi_go_closure_ECX):
L(UW9):
# cfi_startproc
_CET_ENDBR
subl $closure_FS, %esp
L(UW10):
# cfi_def_cfa_offset(closure_FS + 4)
Expand All @@ -340,6 +343,7 @@ ENDF(C(ffi_go_closure_ECX))
C(ffi_closure_i386):
L(UW12):
# cfi_startproc
_CET_ENDBR
subl $closure_FS, %esp
L(UW13):
# cfi_def_cfa_offset(closure_FS + 4)
Expand Down Expand Up @@ -423,6 +427,7 @@ ENDF(C(ffi_closure_i386))
C(ffi_go_closure_STDCALL):
L(UW21):
# cfi_startproc
_CET_ENDBR
subl $closure_FS, %esp
L(UW22):
# cfi_def_cfa_offset(closure_FS + 4)
Expand All @@ -448,6 +453,7 @@ L(UW24):
# cfi_startproc
# cfi_def_cfa(%esp, 8)
# cfi_offset(%eip, -8)
_CET_ENDBR
subl $closure_FS-4, %esp
L(UW25):
# cfi_def_cfa_offset(closure_FS + 4)
Expand All @@ -470,6 +476,7 @@ ENDF(C(ffi_closure_REGISTER))
C(ffi_closure_STDCALL):
L(UW27):
# cfi_startproc
_CET_ENDBR
subl $closure_FS, %esp
L(UW28):
# cfi_def_cfa_offset(closure_FS + 4)
Expand Down Expand Up @@ -576,6 +583,7 @@ ENDF(C(ffi_closure_STDCALL))
C(ffi_closure_raw_SYSV):
L(UW32):
# cfi_startproc
_CET_ENDBR
subl $raw_closure_S_FS, %esp
L(UW33):
# cfi_def_cfa_offset(raw_closure_S_FS + 4)
Expand Down Expand Up @@ -679,6 +687,7 @@ ENDF(C(ffi_closure_raw_SYSV))
C(ffi_closure_raw_THISCALL):
L(UW41):
# cfi_startproc
_CET_ENDBR
/* Rearrange the stack such that %ecx is the first argument.
This means moving the return address. */
popl %edx
Expand Down

0 comments on commit 7855656

Please sign in to comment.