Skip to content

Commit

Permalink
CVE-2018-5407 fix: ECC ladder
Browse files Browse the repository at this point in the history
Reviewed-by: Matt Caswell <matt@openssl.org>
Reviewed-by: Paul Dale <paul.dale@oracle.com>
Reviewed-by: Nicola Tuveri <nic.tuv@gmail.com>
(Merged from #7593)
  • Loading branch information
bbbrumley authored and romen committed Nov 12, 2018
1 parent 59b9c67 commit b18162a
Show file tree
Hide file tree
Showing 3 changed files with 291 additions and 0 deletions.
13 changes: 13 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,19 @@

Changes between 1.0.2p and 1.0.2q [xx XXX xxxx]

*) Microarchitecture timing vulnerability in ECC scalar multiplication

OpenSSL ECC scalar multiplication, used in e.g. ECDSA and ECDH, has been
shown to be vulnerable to a microarchitecture timing side channel attack.
An attacker with sufficient access to mount local timing attacks during
ECDSA signature generation could recover the private key.

This issue was reported to OpenSSL on 26th October 2018 by Alejandro
Cabrera Aldaya, Billy Brumley, Sohaib ul Hassan, Cesar Pereida Garcia and
Nicola Tuveri.
(CVE-2018-5407)
[Billy Brumley]

*) Resolve a compatibility issue in EC_GROUP handling with the FIPS Object
Module, accidentally introduced while backporting security fixes from the
development branch and hindering the use of ECC in FIPS mode.
Expand Down
32 changes: 32 additions & 0 deletions crypto/bn/bn_lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,38 @@ void BN_consttime_swap(BN_ULONG condition, BIGNUM *a, BIGNUM *b, int nwords)
a->top ^= t;
b->top ^= t;

t = (a->neg ^ b->neg) & condition;
a->neg ^= t;
b->neg ^= t;

/*-
* BN_FLG_STATIC_DATA: indicates that data may not be written to. Intention
* is actually to treat it as it's read-only data, and some (if not most)
* of it does reside in read-only segment. In other words observation of
* BN_FLG_STATIC_DATA in BN_consttime_swap should be treated as fatal
* condition. It would either cause SEGV or effectively cause data
* corruption.
*
* BN_FLG_MALLOCED: refers to BN structure itself, and hence must be
* preserved.
*
* BN_FLG_SECURE: must be preserved, because it determines how x->d was
* allocated and hence how to free it.
*
* BN_FLG_CONSTTIME: sufficient to mask and swap
*
* BN_FLG_FIXED_TOP: indicates that we haven't called bn_correct_top() on
* the data, so the d array may be padded with additional 0 values (i.e.
* top could be greater than the minimal value that it could be). We should
* be swapping it
*/

#define BN_CONSTTIME_SWAP_FLAGS (BN_FLG_CONSTTIME | BN_FLG_FIXED_TOP)

t = ((a->flags ^ b->flags) & BN_CONSTTIME_SWAP_FLAGS) & condition;
a->flags ^= t;
b->flags ^= t;

#define BN_CONSTTIME_SWAP(ind) \
do { \
t = (a->d[ind] ^ b->d[ind]) & condition; \
Expand Down
246 changes: 246 additions & 0 deletions crypto/ec/ec_mult.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,224 @@ static signed char *compute_wNAF(const BIGNUM *scalar, int w, size_t *ret_len)
return r;
}

#define EC_POINT_BN_set_flags(P, flags) do { \
BN_set_flags(&(P)->X, (flags)); \
BN_set_flags(&(P)->Y, (flags)); \
BN_set_flags(&(P)->Z, (flags)); \
} while(0)

/*-
* This functions computes (in constant time) a point multiplication over the
* EC group.
*
* At a high level, it is Montgomery ladder with conditional swaps.
*
* It performs either a fixed scalar point multiplication
* (scalar * generator)
* when point is NULL, or a generic scalar point multiplication
* (scalar * point)
* when point is not NULL.
*
* scalar should be in the range [0,n) otherwise all constant time bets are off.
*
* NB: This says nothing about EC_POINT_add and EC_POINT_dbl,
* which of course are not constant time themselves.
*
* The product is stored in r.
*
* Returns 1 on success, 0 otherwise.
*/
static int ec_mul_consttime(const EC_GROUP *group, EC_POINT *r,
const BIGNUM *scalar, const EC_POINT *point,
BN_CTX *ctx)
{
int i, cardinality_bits, group_top, kbit, pbit, Z_is_one;
EC_POINT *s = NULL;
BIGNUM *k = NULL;
BIGNUM *lambda = NULL;
BIGNUM *cardinality = NULL;
BN_CTX *new_ctx = NULL;
int ret = 0;

if (ctx == NULL && (ctx = new_ctx = BN_CTX_new()) == NULL)
return 0;

BN_CTX_start(ctx);

s = EC_POINT_new(group);
if (s == NULL)
goto err;

if (point == NULL) {
if (!EC_POINT_copy(s, group->generator))
goto err;
} else {
if (!EC_POINT_copy(s, point))
goto err;
}

EC_POINT_BN_set_flags(s, BN_FLG_CONSTTIME);

cardinality = BN_CTX_get(ctx);
lambda = BN_CTX_get(ctx);
k = BN_CTX_get(ctx);
if (k == NULL || !BN_mul(cardinality, &group->order, &group->cofactor, ctx))
goto err;

/*
* Group cardinalities are often on a word boundary.
* So when we pad the scalar, some timing diff might
* pop if it needs to be expanded due to carries.
* So expand ahead of time.
*/
cardinality_bits = BN_num_bits(cardinality);
group_top = cardinality->top;
if ((bn_wexpand(k, group_top + 2) == NULL)
|| (bn_wexpand(lambda, group_top + 2) == NULL))
goto err;

if (!BN_copy(k, scalar))
goto err;

BN_set_flags(k, BN_FLG_CONSTTIME);

if ((BN_num_bits(k) > cardinality_bits) || (BN_is_negative(k))) {
/*-
* this is an unusual input, and we don't guarantee
* constant-timeness
*/
if (!BN_nnmod(k, k, cardinality, ctx))
goto err;
}

if (!BN_add(lambda, k, cardinality))
goto err;
BN_set_flags(lambda, BN_FLG_CONSTTIME);
if (!BN_add(k, lambda, cardinality))
goto err;
/*
* lambda := scalar + cardinality
* k := scalar + 2*cardinality
*/
kbit = BN_is_bit_set(lambda, cardinality_bits);
BN_consttime_swap(kbit, k, lambda, group_top + 2);

group_top = group->field.top;
if ((bn_wexpand(&s->X, group_top) == NULL)
|| (bn_wexpand(&s->Y, group_top) == NULL)
|| (bn_wexpand(&s->Z, group_top) == NULL)
|| (bn_wexpand(&r->X, group_top) == NULL)
|| (bn_wexpand(&r->Y, group_top) == NULL)
|| (bn_wexpand(&r->Z, group_top) == NULL))
goto err;

/* top bit is a 1, in a fixed pos */
if (!EC_POINT_copy(r, s))
goto err;

EC_POINT_BN_set_flags(r, BN_FLG_CONSTTIME);

if (!EC_POINT_dbl(group, s, s, ctx))
goto err;

pbit = 0;

#define EC_POINT_CSWAP(c, a, b, w, t) do { \
BN_consttime_swap(c, &(a)->X, &(b)->X, w); \
BN_consttime_swap(c, &(a)->Y, &(b)->Y, w); \
BN_consttime_swap(c, &(a)->Z, &(b)->Z, w); \
t = ((a)->Z_is_one ^ (b)->Z_is_one) & (c); \
(a)->Z_is_one ^= (t); \
(b)->Z_is_one ^= (t); \
} while(0)

/*-
* The ladder step, with branches, is
*
* k[i] == 0: S = add(R, S), R = dbl(R)
* k[i] == 1: R = add(S, R), S = dbl(S)
*
* Swapping R, S conditionally on k[i] leaves you with state
*
* k[i] == 0: T, U = R, S
* k[i] == 1: T, U = S, R
*
* Then perform the ECC ops.
*
* U = add(T, U)
* T = dbl(T)
*
* Which leaves you with state
*
* k[i] == 0: U = add(R, S), T = dbl(R)
* k[i] == 1: U = add(S, R), T = dbl(S)
*
* Swapping T, U conditionally on k[i] leaves you with state
*
* k[i] == 0: R, S = T, U
* k[i] == 1: R, S = U, T
*
* Which leaves you with state
*
* k[i] == 0: S = add(R, S), R = dbl(R)
* k[i] == 1: R = add(S, R), S = dbl(S)
*
* So we get the same logic, but instead of a branch it's a
* conditional swap, followed by ECC ops, then another conditional swap.
*
* Optimization: The end of iteration i and start of i-1 looks like
*
* ...
* CSWAP(k[i], R, S)
* ECC
* CSWAP(k[i], R, S)
* (next iteration)
* CSWAP(k[i-1], R, S)
* ECC
* CSWAP(k[i-1], R, S)
* ...
*
* So instead of two contiguous swaps, you can merge the condition
* bits and do a single swap.
*
* k[i] k[i-1] Outcome
* 0 0 No Swap
* 0 1 Swap
* 1 0 Swap
* 1 1 No Swap
*
* This is XOR. pbit tracks the previous bit of k.
*/

for (i = cardinality_bits - 1; i >= 0; i--) {
kbit = BN_is_bit_set(k, i) ^ pbit;
EC_POINT_CSWAP(kbit, r, s, group_top, Z_is_one);
if (!EC_POINT_add(group, s, r, s, ctx))
goto err;
if (!EC_POINT_dbl(group, r, r, ctx))
goto err;
/*
* pbit logic merges this cswap with that of the
* next iteration
*/
pbit ^= kbit;
}
/* one final cswap to move the right value into r */
EC_POINT_CSWAP(pbit, r, s, group_top, Z_is_one);
#undef EC_POINT_CSWAP

ret = 1;

err:
EC_POINT_free(s);
BN_CTX_end(ctx);
BN_CTX_free(new_ctx);

return ret;
}

#undef EC_POINT_BN_set_flags

/*
* TODO: table should be optimised for the wNAF-based implementation,
* sometimes smaller windows will give better performance (thus the
Expand Down Expand Up @@ -369,6 +587,34 @@ int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar,
return EC_POINT_set_to_infinity(group, r);
}

if (!BN_is_zero(&group->order) && !BN_is_zero(&group->cofactor)) {
/*-
* Handle the common cases where the scalar is secret, enforcing a constant
* time scalar multiplication algorithm.
*/
if ((scalar != NULL) && (num == 0)) {
/*-
* In this case we want to compute scalar * GeneratorPoint: this
* codepath is reached most prominently by (ephemeral) key generation
* of EC cryptosystems (i.e. ECDSA keygen and sign setup, ECDH
* keygen/first half), where the scalar is always secret. This is why
* we ignore if BN_FLG_CONSTTIME is actually set and we always call the
* constant time version.
*/
return ec_mul_consttime(group, r, scalar, NULL, ctx);
}
if ((scalar == NULL) && (num == 1)) {
/*-
* In this case we want to compute scalar * GenericPoint: this codepath
* is reached most prominently by the second half of ECDH, where the
* secret scalar is multiplied by the peer's public point. To protect
* the secret scalar, we ignore if BN_FLG_CONSTTIME is actually set and
* we always call the constant time version.
*/
return ec_mul_consttime(group, r, scalars[0], points[0], ctx);
}
}

for (i = 0; i < num; i++) {
if (group->meth != points[i]->meth) {
ECerr(EC_F_EC_WNAF_MUL, EC_R_INCOMPATIBLE_OBJECTS);
Expand Down

0 comments on commit b18162a

Please sign in to comment.