From 88065693392e2816f1f501bd2b2bb8edd24f4e12 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 31 Jan 2020 05:07:01 -0800 Subject: [PATCH 1/5] x86: Add endbranch to indirect branch targets for Intel CET To support Intel CET, all indirect branch targets must start with endbranch. Here is a patch to add endbranch to all function entries in x86 assembly codes which are indirect branch targets as discovered by running openssl testsuite on Intel CET machine and visual inspection. Since x86 cbc.pl uses indirect branch with a jump table, we also need to add endbranch to all jump targets. Reviewed-by: Richard Levitte Reviewed-by: Paul Dale (Merged from https://github.com/openssl/openssl/pull/10984) --- crypto/perlasm/cbc.pl | 7 +++++++ crypto/perlasm/x86gas.pl | 1 + 2 files changed, 8 insertions(+) diff --git a/crypto/perlasm/cbc.pl b/crypto/perlasm/cbc.pl index 01bafe457d680..17b01d22eb2b1 100644 --- a/crypto/perlasm/cbc.pl +++ b/crypto/perlasm/cbc.pl @@ -165,21 +165,28 @@ sub cbc &jmp_ptr($count); &set_label("ej7"); + &endbranch() &movb(&HB("edx"), &BP(6,$in,"",0)); &shl("edx",8); &set_label("ej6"); + &endbranch() &movb(&HB("edx"), &BP(5,$in,"",0)); &set_label("ej5"); + &endbranch() &movb(&LB("edx"), &BP(4,$in,"",0)); &set_label("ej4"); + &endbranch() &mov("ecx", &DWP(0,$in,"",0)); &jmp(&label("ejend")); &set_label("ej3"); + &endbranch() &movb(&HB("ecx"), &BP(2,$in,"",0)); &shl("ecx",8); &set_label("ej2"); + &endbranch() &movb(&HB("ecx"), &BP(1,$in,"",0)); &set_label("ej1"); + &endbranch() &movb(&LB("ecx"), &BP(0,$in,"",0)); &set_label("ejend"); diff --git a/crypto/perlasm/x86gas.pl b/crypto/perlasm/x86gas.pl index 5c7ea3880e4d3..22897f1d896b2 100644 --- a/crypto/perlasm/x86gas.pl +++ b/crypto/perlasm/x86gas.pl @@ -124,6 +124,7 @@ sub ::function_begin_B push(@out,".align\t$align\n"); push(@out,"$func:\n"); push(@out,"$begin:\n") if ($global); + &::endbranch(); $::stack=4; } From cc5d87cb22f213e478f6db064b186ecb1bfaf57b Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 13 Dec 2019 16:46:07 -0800 Subject: [PATCH 2/5] Use swapcontext for Intel CET When Intel CET is enabled, makecontext will create a different shadow stack for each context. async_fibre_swapcontext cannot use _longjmp. It must call swapcontext to swap shadow stack as well as normal stack. Reviewed-by: Paul Dale Reviewed-by: Matt Caswell (Merged from https://github.com/openssl/openssl/pull/10983) --- crypto/async/arch/async_posix.c | 2 ++ crypto/async/arch/async_posix.h | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/crypto/async/arch/async_posix.c b/crypto/async/arch/async_posix.c index 95678d4fa686a..dfc45bfa05400 100644 --- a/crypto/async/arch/async_posix.c +++ b/crypto/async/arch/async_posix.c @@ -34,7 +34,9 @@ void async_local_cleanup(void) int async_fibre_makecontext(async_fibre *fibre) { +#ifndef USE_SWAPCONTEXT fibre->env_init = 0; +#endif if (getcontext(&fibre->fibre) == 0) { fibre->fibre.uc_stack.ss_sp = OPENSSL_malloc(STACKSIZE); if (fibre->fibre.uc_stack.ss_sp != NULL) { diff --git a/crypto/async/arch/async_posix.h b/crypto/async/arch/async_posix.h index 873c0316ddf3f..db42a018806f8 100644 --- a/crypto/async/arch/async_posix.h +++ b/crypto/async/arch/async_posix.h @@ -25,17 +25,33 @@ # define ASYNC_POSIX # define ASYNC_ARCH +# ifdef __CET__ +/* + * When Intel CET is enabled, makecontext will create a different + * shadow stack for each context. async_fibre_swapcontext cannot + * use _longjmp. It must call swapcontext to swap shadow stack as + * well as normal stack. + */ +# define USE_SWAPCONTEXT +# endif # include -# include +# ifndef USE_SWAPCONTEXT +# include +# endif typedef struct async_fibre_st { ucontext_t fibre; +# ifndef USE_SWAPCONTEXT jmp_buf env; int env_init; +# endif } async_fibre; static ossl_inline int async_fibre_swapcontext(async_fibre *o, async_fibre *n, int r) { +# ifdef USE_SWAPCONTEXT + swapcontext(&o->fibre, &n->fibre); +# else o->env_init = 1; if (!r || !_setjmp(o->env)) { @@ -44,6 +60,7 @@ static ossl_inline int async_fibre_swapcontext(async_fibre *o, async_fibre *n, i else setcontext(&n->fibre); } +# endif return 1; } From ace0bfffdf0e6259827c8ef1bef44e8684282479 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sat, 14 Dec 2019 09:48:18 -0800 Subject: [PATCH 3/5] x86: Always generate .note.gnu.property section for ELF outputs We should always generate .note.gnu.property section in x86 assembly codes for ELF outputs to mark Intel CET support since all input files must be marked with Intel CET support in order for linker to mark output with Intel CET support. Verified with $ CC="gcc -Wl,-z,cet-report=error" ./Configure shared linux-x86 -fcf-protection $ make $ make test Reviewed-by: Richard Levitte Reviewed-by: Paul Dale (Merged from https://github.com/openssl/openssl/pull/11044) --- crypto/perlasm/x86gas.pl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/crypto/perlasm/x86gas.pl b/crypto/perlasm/x86gas.pl index 22897f1d896b2..58ea922256ef4 100644 --- a/crypto/perlasm/x86gas.pl +++ b/crypto/perlasm/x86gas.pl @@ -173,6 +173,26 @@ sub ::file_end else { push (@out,"$tmp\n"); } } push(@out,$initseg) if ($initseg); + if ($::elf) { + push(@out," + .section \".note.gnu.property\", \"a\" + .p2align 2 + .long 1f - 0f + .long 4f - 1f + .long 5 +0: + .asciz \"GNU\" +1: + .p2align 2 + .long 0xc0000002 + .long 3f - 2f +2: + .long 3 +3: + .p2align 2 +4: +"); + } } sub ::data_byte { push(@out,".byte\t".join(',',@_)."\n"); } From 0e7236cb29770ffff17dc9544ccef384334badde Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 31 Jan 2020 09:13:27 -0800 Subject: [PATCH 4/5] x86_64: Always generate .note.gnu.property section for ELF outputs We should always generate .note.gnu.property section in x86_64 assembly codes for ELF outputs to mark Intel CET support since all input files must be marked with Intel CET support in order for linker to mark output with Intel CET support. Also .note.gnu.property section in x32 should be aligned to 4 bytes, not 8 bytes and .p2align should be used consistently. Verified with $ CC="gcc -Wl,-z,cet-report=error" ./Configure shared linux-x86_64 -fcf-protection $ make $ make test and $ CC="gcc -mx32 -Wl,-z,cet-report=error" ./Configure shared linux-x32 -fcf-protection $ make $ make test # <<< 90-test_sslapi.t failed because 8-byte pointer size. Fix #10896 Reviewed-by: Richard Levitte Reviewed-by: Paul Dale Reviewed-by: Tomas Mraz (Merged from https://github.com/openssl/openssl/pull/10985) --- crypto/perlasm/x86_64-xlate.pl | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl index 59af6df9c200d..2cd410b45102c 100755 --- a/crypto/perlasm/x86_64-xlate.pl +++ b/crypto/perlasm/x86_64-xlate.pl @@ -101,6 +101,33 @@ $decor="\$L\$"; } +my $cet_property; +if ($flavour =~ /elf/) { + # Always generate .note.gnu.property section for ELF outputs to + # mark Intel CET support since all input files must be marked + # with Intel CET support in order for linker to mark output with + # Intel CET support. + my $p2align=3; $p2align=2 if ($flavour eq "elf32"); + $cet_property = <<_____; + .section ".note.gnu.property", "a" + .p2align $p2align + .long 1f - 0f + .long 4f - 1f + .long 5 +0: + .asciz "GNU" +1: + .p2align $p2align + .long 0xc0000002 + .long 3f - 2f +2: + .long 3 +3: + .p2align $p2align +4: +_____ +} + my $current_segment; my $current_function; my %globals; @@ -1213,6 +1240,7 @@ sub rxb { print $line,"\n"; } +print "$cet_property" if ($cet_property); print "\n$current_segment\tENDS\n" if ($current_segment && $masm); print "END\n" if ($masm); From aa0d12b651ad8ff471d074afe653d70b73ecd0fd Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 31 Jan 2020 04:17:26 -0800 Subject: [PATCH 5/5] x86_64: Add endbranch at function entries for Intel CET To support Intel CET, all indirect branch targets must start with endbranch. Here is a patch to add endbranch to function entries in x86_64 assembly codes which are indirect branch targets as discovered by running openssl testsuite on Intel CET machine and visual inspection. Verified with $ CC="gcc -Wl,-z,cet-report=error" ./Configure shared linux-x86_64 -fcf-protection $ make $ make test and $ CC="gcc -mx32 -Wl,-z,cet-report=error" ./Configure shared linux-x32 -fcf-protection $ make $ make test # <<< passed with https://github.com/openssl/openssl/pull/10988 Reviewed-by: Tomas Mraz Reviewed-by: Richard Levitte (Merged from https://github.com/openssl/openssl/pull/10982) --- crypto/aes/asm/aesni-x86_64.pl | 11 +++++++++++ crypto/aes/asm/vpaes-x86_64.pl | 5 +++++ crypto/camellia/asm/cmll-x86_64.pl | 1 + crypto/modes/asm/ghash-x86_64.pl | 6 ++++++ crypto/poly1305/asm/poly1305-x86_64.pl | 2 ++ crypto/rc4/asm/rc4-x86_64.pl | 3 +++ crypto/x86_64cpuid.pl | 9 +++++++++ 7 files changed, 37 insertions(+) diff --git a/crypto/aes/asm/aesni-x86_64.pl b/crypto/aes/asm/aesni-x86_64.pl index f8c2e2393438c..25a806edd5a47 100644 --- a/crypto/aes/asm/aesni-x86_64.pl +++ b/crypto/aes/asm/aesni-x86_64.pl @@ -275,6 +275,7 @@ sub aesni_generate1 { .align 16 ${PREFIX}_encrypt: .cfi_startproc + endbranch movups ($inp),$inout0 # load input mov 240($key),$rounds # key->rounds ___ @@ -293,6 +294,7 @@ sub aesni_generate1 { .align 16 ${PREFIX}_decrypt: .cfi_startproc + endbranch movups ($inp),$inout0 # load input mov 240($key),$rounds # key->rounds ___ @@ -613,6 +615,7 @@ sub aesni_generate8 { .align 16 aesni_ecb_encrypt: .cfi_startproc + endbranch ___ $code.=<<___ if ($win64); lea -0x58(%rsp),%rsp @@ -985,6 +988,7 @@ sub aesni_generate8 { .align 16 aesni_ccm64_encrypt_blocks: .cfi_startproc + endbranch ___ $code.=<<___ if ($win64); lea -0x58(%rsp),%rsp @@ -1077,6 +1081,7 @@ sub aesni_generate8 { .align 16 aesni_ccm64_decrypt_blocks: .cfi_startproc + endbranch ___ $code.=<<___ if ($win64); lea -0x58(%rsp),%rsp @@ -1203,6 +1208,7 @@ sub aesni_generate8 { .align 16 aesni_ctr32_encrypt_blocks: .cfi_startproc + endbranch cmp \$1,$len jne .Lctr32_bulk @@ -1775,6 +1781,7 @@ sub aesni_generate8 { .align 16 aesni_xts_encrypt: .cfi_startproc + endbranch lea (%rsp),%r11 # frame pointer .cfi_def_cfa_register %r11 push %rbp @@ -2258,6 +2265,7 @@ sub aesni_generate8 { .align 16 aesni_xts_decrypt: .cfi_startproc + endbranch lea (%rsp),%r11 # frame pointer .cfi_def_cfa_register %r11 push %rbp @@ -2783,6 +2791,7 @@ sub aesni_generate8 { .align 32 aesni_ocb_encrypt: .cfi_startproc + endbranch lea (%rsp),%rax push %rbx .cfi_push %rbx @@ -3249,6 +3258,7 @@ sub aesni_generate8 { .align 32 aesni_ocb_decrypt: .cfi_startproc + endbranch lea (%rsp),%rax push %rbx .cfi_push %rbx @@ -3737,6 +3747,7 @@ sub aesni_generate8 { .align 16 ${PREFIX}_cbc_encrypt: .cfi_startproc + endbranch test $len,$len # check length jz .Lcbc_ret diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl index 099a686a4d5d7..d6f2d89eafc67 100644 --- a/crypto/aes/asm/vpaes-x86_64.pl +++ b/crypto/aes/asm/vpaes-x86_64.pl @@ -696,6 +696,7 @@ .align 16 ${PREFIX}_set_encrypt_key: .cfi_startproc + endbranch ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -746,6 +747,7 @@ .align 16 ${PREFIX}_set_decrypt_key: .cfi_startproc + endbranch ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -801,6 +803,7 @@ .align 16 ${PREFIX}_encrypt: .cfi_startproc + endbranch ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -846,6 +849,7 @@ .align 16 ${PREFIX}_decrypt: .cfi_startproc + endbranch ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -897,6 +901,7 @@ .align 16 ${PREFIX}_cbc_encrypt: .cfi_startproc + endbranch xchg $key,$len ___ ($len,$key)=($key,$len); diff --git a/crypto/camellia/asm/cmll-x86_64.pl b/crypto/camellia/asm/cmll-x86_64.pl index 59e1840160ff9..82b3d466b981a 100644 --- a/crypto/camellia/asm/cmll-x86_64.pl +++ b/crypto/camellia/asm/cmll-x86_64.pl @@ -685,6 +685,7 @@ sub _rotl128 { .align 16 Camellia_cbc_encrypt: .cfi_startproc + endbranch cmp \$0,%rdx je .Lcbc_abort push %rbx diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl index 0a0bfd575ceeb..3c49bdf859c46 100644 --- a/crypto/modes/asm/ghash-x86_64.pl +++ b/crypto/modes/asm/ghash-x86_64.pl @@ -239,6 +239,7 @@ () .align 16 gcm_gmult_4bit: .cfi_startproc + endbranch push %rbx .cfi_push %rbx push %rbp # %rbp and others are pushed exclusively in @@ -286,6 +287,7 @@ () .align 16 gcm_ghash_4bit: .cfi_startproc + endbranch push %rbx .cfi_push %rbx push %rbp @@ -612,6 +614,7 @@ sub reduction_alg9 { # 17/11 times faster than Intel version .align 16 gcm_gmult_clmul: .cfi_startproc + endbranch .L_gmult_clmul: movdqu ($Xip),$Xi movdqa .Lbswap_mask(%rip),$T3 @@ -663,6 +666,7 @@ sub reduction_alg9 { # 17/11 times faster than Intel version .align 32 gcm_ghash_clmul: .cfi_startproc + endbranch .L_ghash_clmul: ___ $code.=<<___ if ($win64); @@ -1166,6 +1170,7 @@ sub reduction_avx { .align 32 gcm_gmult_avx: .cfi_startproc + endbranch jmp .L_gmult_clmul .cfi_endproc .size gcm_gmult_avx,.-gcm_gmult_avx @@ -1177,6 +1182,7 @@ sub reduction_avx { .align 32 gcm_ghash_avx: .cfi_startproc + endbranch ___ if ($avx) { my ($Xip,$Htbl,$inp,$len)=@_4args; diff --git a/crypto/poly1305/asm/poly1305-x86_64.pl b/crypto/poly1305/asm/poly1305-x86_64.pl index c014be1ca9fae..75532553bb9b6 100755 --- a/crypto/poly1305/asm/poly1305-x86_64.pl +++ b/crypto/poly1305/asm/poly1305-x86_64.pl @@ -2806,6 +2806,7 @@ sub poly1305_iteration { .align 32 poly1305_blocks_vpmadd52: .cfi_startproc + endbranch shr \$4,$len jz .Lno_data_vpmadd52 # too short @@ -3739,6 +3740,7 @@ sub poly1305_iteration { .align 32 poly1305_emit_base2_44: .cfi_startproc + endbranch mov 0($ctx),%r8 # load hash value mov 8($ctx),%r9 mov 16($ctx),%r10 diff --git a/crypto/rc4/asm/rc4-x86_64.pl b/crypto/rc4/asm/rc4-x86_64.pl index 423eb5b4a9978..2f19ec9b4518b 100755 --- a/crypto/rc4/asm/rc4-x86_64.pl +++ b/crypto/rc4/asm/rc4-x86_64.pl @@ -140,6 +140,7 @@ .align 16 RC4: .cfi_startproc + endbranch or $len,$len jne .Lentry ret @@ -455,6 +456,7 @@ sub RC4_loop { .align 16 RC4_set_key: .cfi_startproc + endbranch lea 8($dat),$dat lea ($inp,$len),$inp neg $len @@ -529,6 +531,7 @@ sub RC4_loop { .align 16 RC4_options: .cfi_startproc + endbranch lea .Lopts(%rip),%rax mov OPENSSL_ia32cap_P(%rip),%edx bt \$20,%edx diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl index d9536d65768f0..523f2678e6574 100644 --- a/crypto/x86_64cpuid.pl +++ b/crypto/x86_64cpuid.pl @@ -40,6 +40,7 @@ .align 16 OPENSSL_atomic_add: .cfi_startproc + endbranch movl ($arg1),%eax .Lspin: leaq ($arg2,%rax),%r8 .byte 0xf0 # lock @@ -56,6 +57,7 @@ .align 16 OPENSSL_rdtsc: .cfi_startproc + endbranch rdtsc shl \$32,%rdx or %rdx,%rax @@ -68,6 +70,7 @@ .align 16 OPENSSL_ia32_cpuid: .cfi_startproc + endbranch mov %rbx,%r8 # save %rbx .cfi_register %rbx,%r8 @@ -237,6 +240,7 @@ .align 16 OPENSSL_cleanse: .cfi_startproc + endbranch xor %rax,%rax cmp \$15,$arg2 jae .Lot @@ -274,6 +278,7 @@ .align 16 CRYPTO_memcmp: .cfi_startproc + endbranch xor %rax,%rax xor %r10,%r10 cmp \$0,$arg3 @@ -312,6 +317,7 @@ .align 16 OPENSSL_wipe_cpu: .cfi_startproc + endbranch pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 @@ -376,6 +382,7 @@ .align 16 OPENSSL_instrument_bus: .cfi_startproc + endbranch mov $arg1,$out # tribute to Win64 mov $arg2,$cnt mov $arg2,$max @@ -410,6 +417,7 @@ .align 16 OPENSSL_instrument_bus2: .cfi_startproc + endbranch mov $arg1,$out # tribute to Win64 mov $arg2,$cnt mov $arg3,$max @@ -465,6 +473,7 @@ sub gen_random { .align 16 OPENSSL_ia32_${rdop}_bytes: .cfi_startproc + endbranch xor %rax, %rax # return value cmp \$0,$arg2 je .Ldone_${rdop}_bytes