From 8a65e7a529020b50716f08acc82816b95765914b Mon Sep 17 00:00:00 2001 From: Hugo Landau Date: Tue, 23 May 2023 12:23:06 +0100 Subject: [PATCH] QUIC CHANNEL: Handle key updates correctly Reviewed-by: Tomas Mraz Reviewed-by: Matt Caswell Reviewed-by: Paul Dale (Merged from https://github.com/openssl/openssl/pull/21029) --- ssl/quic/quic_channel.c | 233 ++++++++++++++++++++++++++++++++++ ssl/quic/quic_channel_local.h | 67 ++++++++++ 2 files changed, 300 insertions(+) diff --git a/ssl/quic/quic_channel.c b/ssl/quic/quic_channel.c index 596dc2c36ea6e..b8f6121b4b99a 100644 --- a/ssl/quic/quic_channel.c +++ b/ssl/quic/quic_channel.c @@ -66,6 +66,7 @@ static int ch_on_crypto_send(const unsigned char *buf, size_t buf_len, static OSSL_TIME get_time(void *arg); static uint64_t get_stream_limit(int uni, void *arg); static int rx_early_validate(QUIC_PN pn, int pn_space, void *arg); +static void rxku_detected(QUIC_PN pn, void *arg); static int ch_retry(QUIC_CHANNEL *ch, const unsigned char *retry_token, size_t retry_token_len, @@ -85,6 +86,8 @@ static void ch_default_packet_handler(QUIC_URXE *e, void *arg); static int ch_server_on_new_conn(QUIC_CHANNEL *ch, const BIO_ADDR *peer, const QUIC_CONN_ID *peer_scid, const QUIC_CONN_ID *peer_dcid); +static void ch_on_txp_ack_tx(const OSSL_QUIC_FRAME_ACK *ack, uint32_t pn_space, + void *arg); static int gen_rand_conn_id(OSSL_LIB_CTX *libctx, size_t len, QUIC_CONN_ID *cid) { @@ -184,6 +187,7 @@ static int ch_init(QUIC_CHANNEL *ch) ch->cc_method, ch->cc_data)) == NULL) goto err; + if (!ossl_quic_stream_map_init(&ch->qsm, get_stream_limit, ch, &ch->max_streams_bidi_rxfc, &ch->max_streams_uni_rxfc, @@ -221,6 +225,8 @@ static int ch_init(QUIC_CHANNEL *ch) if (ch->txp == NULL) goto err; + ossl_quic_tx_packetiser_set_ack_tx_cb(ch->txp, ch_on_txp_ack_tx, ch); + if ((ch->demux = ossl_quic_demux_new(/*BIO=*/NULL, /*Short CID Len=*/rx_short_cid_len, get_time, ch)) == NULL) @@ -249,6 +255,11 @@ static int ch_init(QUIC_CHANNEL *ch) ch)) goto err; + if (!ossl_qrx_set_key_update_cb(ch->qrx, + rxku_detected, + ch)) + goto err; + if (!ch->is_server && !ossl_qrx_add_dst_conn_id(ch->qrx, &txp_args.cur_scid)) goto err; @@ -512,6 +523,213 @@ static int rx_early_validate(QUIC_PN pn, int pn_space, void *arg) return 1; } +/* + * Triggers a TXKU (whether spontaneous or solicited). Does not check whether + * spontaneous TXKU is currently allowed. + */ +QUIC_NEEDS_LOCK +static void ch_trigger_txku(QUIC_CHANNEL *ch) +{ + uint64_t next_pn + = ossl_quic_tx_packetiser_get_next_pn(ch->txp, QUIC_PN_SPACE_APP); + + if (!ossl_quic_pn_valid(next_pn) + || !ossl_qtx_trigger_key_update(ch->qtx)) { + ossl_quic_channel_raise_protocol_error(ch, QUIC_ERR_INTERNAL_ERROR, 0, + "key update"); + return; + } + + ch->txku_in_progress = 1; + ch->txku_pn = next_pn; + ch->rxku_expected = ch->ku_locally_initiated; +} + +QUIC_NEEDS_LOCK +static int txku_in_progress(QUIC_CHANNEL *ch) +{ + if (ch->txku_in_progress + && ossl_ackm_get_largest_acked(ch->ackm, QUIC_PN_SPACE_APP) >= ch->txku_pn) { + OSSL_TIME pto = ossl_ackm_get_pto_duration(ch->ackm); + + /* + * RFC 9001 s. 6.5: Endpoints SHOULD wait three times the PTO before + * initiating a key update after receiving an acknowledgment that + * confirms that the previous key update was received. + * + * Note that by the above wording, this period starts from when we get + * the ack for a TXKU-triggering packet, not when the TXKU is initiated. + * So we defer TXKU cooldown deadline calculation to this point. + */ + ch->txku_in_progress = 0; + ch->txku_cooldown_deadline = ossl_time_add(get_time(ch), + ossl_time_multiply(pto, 3)); + } + + return ch->txku_in_progress; +} + +QUIC_NEEDS_LOCK +static int txku_allowed(QUIC_CHANNEL *ch) +{ + return ch->tx_enc_level == QUIC_ENC_LEVEL_1RTT /* Sanity check. */ + /* Strict RFC 9001 criterion for TXKU. */ + && ch->handshake_confirmed + && !txku_in_progress(ch); +} + +QUIC_NEEDS_LOCK +static int txku_recommendable(QUIC_CHANNEL *ch) +{ + if (!txku_allowed(ch)) + return 0; + + return + /* Recommended RFC 9001 criterion for TXKU. */ + ossl_time_compare(get_time(ch), ch->txku_cooldown_deadline) >= 0 + /* Some additional sensible criteria. */ + && !ch->rxku_in_progress + && !ch->rxku_pending_confirm; +} + +QUIC_NEEDS_LOCK +static int txku_desirable(QUIC_CHANNEL *ch) +{ + uint64_t cur_pkt_count, max_pkt_count; + const uint32_t enc_level = QUIC_ENC_LEVEL_1RTT; + + /* Check AEAD limit to determine if we should perform a spontaneous TXKU. */ + cur_pkt_count = ossl_qtx_get_cur_epoch_pkt_count(ch->qtx, enc_level); + max_pkt_count = ossl_qtx_get_max_epoch_pkt_count(ch->qtx, enc_level); + + return cur_pkt_count >= max_pkt_count / 2; +} + +QUIC_NEEDS_LOCK +static void ch_maybe_trigger_spontaneous_txku(QUIC_CHANNEL *ch) +{ + if (!txku_recommendable(ch) || !txku_desirable(ch)) + return; + + ch->ku_locally_initiated = 1; + ch_trigger_txku(ch); +} + +QUIC_NEEDS_LOCK +static int rxku_allowed(QUIC_CHANNEL *ch) +{ + /* + * RFC 9001 s. 6.1: An endpoint MUST NOT initiate a key update prior to + * having confirmed the handshake (Section 4.1.2). + * + * RFC 9001 s. 6.1: An endpoint MUST NOT initiate a subsequent key update + * unless it has received an acknowledgment for a packet that was sent + * protected with keys from the current key phase. + * + * RFC 9001 s. 6.2: If an endpoint detects a second update before it has + * sent any packets with updated keys containing an acknowledgment for the + * packet that initiated the key update, it indicates that its peer has + * updated keys twice without awaiting confirmation. An endpoint MAY treat + * such consecutive key updates as a connection error of type + * KEY_UPDATE_ERROR. + */ + return ch->handshake_confirmed && !ch->rxku_pending_confirm; +} + +/* + * Called when the QRX detects a new RX key update event. + */ +enum rxku_decision { + DECISION_RXKU_ONLY, + DECISION_PROTOCOL_VIOLATION, + DECISION_SOLICITED_TXKU +}; + +/* Called when the QRX detects a key update has occurred. */ +QUIC_NEEDS_LOCK +static void rxku_detected(QUIC_PN pn, void *arg) +{ + QUIC_CHANNEL *ch = arg; + enum rxku_decision decision; + OSSL_TIME pto; + + /* + * Note: rxku_in_progress is always 0 here as an RXKU cannot be detected + * when we are still in UPDATING or COOLDOWN (see quic_record_rx.h). + */ + assert(!ch->rxku_in_progress); + + if (!rxku_allowed(ch)) + /* Is RXKU even allowed at this time? */ + decision = DECISION_PROTOCOL_VIOLATION; + + else if (ch->ku_locally_initiated) + /* + * If this key update was locally initiated (meaning that this detected + * RXKU event is a result of our own spontaneous TXKU), we do not + * trigger another TXKU; after all, to do so would result in an infinite + * ping-pong of key updates. We still process it as an RXKU. + */ + decision = DECISION_RXKU_ONLY; + + else + /* + * Otherwise, a peer triggering a KU means we have to trigger a KU also. + */ + decision = DECISION_SOLICITED_TXKU; + + if (decision == DECISION_PROTOCOL_VIOLATION) { + ossl_quic_channel_raise_protocol_error(ch, QUIC_ERR_KEY_UPDATE_ERROR, + 0, "RX key update again too soon"); + return; + } + + pto = ossl_ackm_get_pto_duration(ch->ackm); + + ch->ku_locally_initiated = 0; + ch->rxku_in_progress = 1; + ch->rxku_pending_confirm = 1; + ch->rxku_trigger_pn = pn; + ch->rxku_update_end_deadline = ossl_time_add(get_time(ch), pto); + + if (decision == DECISION_SOLICITED_TXKU) + /* NOT gated by usual txku_allowed() */ + ch_trigger_txku(ch); +} + +/* Called per tick to handle RXKU timer events. */ +QUIC_NEEDS_LOCK +static void ch_rxku_tick(QUIC_CHANNEL *ch) +{ + if (!ch->rxku_in_progress + || ossl_time_compare(get_time(ch), ch->rxku_update_end_deadline) < 0) + return; + + ch->rxku_update_end_deadline = ossl_time_infinite(); + ch->rxku_in_progress = 0; + + if (!ossl_qrx_key_update_timeout(ch->qrx, /*normal=*/1)) + ossl_quic_channel_raise_protocol_error(ch, QUIC_ERR_INTERNAL_ERROR, 0, + "RXKU cooldown internal error"); +} + +QUIC_NEEDS_LOCK +static void ch_on_txp_ack_tx(const OSSL_QUIC_FRAME_ACK *ack, uint32_t pn_space, + void *arg) +{ + QUIC_CHANNEL *ch = arg; + + if (pn_space != QUIC_PN_SPACE_APP || !ch->rxku_pending_confirm + || !ossl_quic_frame_ack_contains_pn(ack, ch->rxku_trigger_pn)) + return; + + /* + * Defer clearing rxku_pending_confirm until TXP generate call returns + * successfully. + */ + ch->rxku_pending_confirm_done = 1; +} + /* * QUIC Channel: Handshake Layer Event Handling * ============================================ @@ -1317,6 +1535,9 @@ static void ch_tick(QUIC_TICK_RESULT *res, void *arg, uint32_t flags) } } + /* Handle RXKU timeouts. */ + ch_rxku_tick(ch); + /* Handle any incoming data from network. */ ch_rx_pre(ch); @@ -1655,6 +1876,11 @@ static int ch_tx(QUIC_CHANNEL *ch) ch->conn_close_queued = 0; } + /* Do TXKU if we need to. */ + ch_maybe_trigger_spontaneous_txku(ch); + + ch->rxku_pending_confirm_done = 0; + /* * Send a packet, if we need to. Best effort. The TXP consults the CC and * applies any limitations imposed by it, so we don't need to do it here. @@ -1678,6 +1904,9 @@ static int ch_tx(QUIC_CHANNEL *ch) ch->have_sent_ack_eliciting_since_rx = 1; } + if (ch->rxku_pending_confirm_done) + ch->rxku_pending_confirm = 0; + ch_update_ping_deadline(ch); break; @@ -1755,6 +1984,10 @@ static OSSL_TIME ch_determine_next_tick_deadline(QUIC_CHANNEL *ch) deadline = ossl_time_min(deadline, ch->ping_deadline); + /* When does the RXKU process complete? */ + if (ch->rxku_in_progress) + deadline = ossl_time_min(deadline, ch->rxku_update_end_deadline); + return deadline; } diff --git a/ssl/quic/quic_channel_local.h b/ssl/quic/quic_channel_local.h index 3854567b361f4..c4be7d792247b 100644 --- a/ssl/quic/quic_channel_local.h +++ b/ssl/quic/quic_channel_local.h @@ -214,6 +214,34 @@ struct quic_channel_st { */ OSSL_TIME ping_deadline; + /* + * The deadline at which the period in which it is RECOMMENDED that we not + * initiate any spontaneous TXKU ends. This is zero if no such deadline + * applies. + */ + OSSL_TIME txku_cooldown_deadline; + + /* + * The deadline at which we take the QRX out of UPDATING and back to NORMAL. + * Valid if rxku_in_progress in 1. + */ + OSSL_TIME rxku_update_end_deadline; + + /* + * The first (application space) PN sent with a new key phase. Valid if + * txku_in_progress. Once a packet we sent with a PN p (p >= txku_pn) is + * ACKed, the TXKU is considered completed and txku_in_progress becomes 0. + * For sanity's sake, such a PN p should also be <= the highest PN we have + * ever sent, of course. + */ + QUIC_PN txku_pn; + + /* + * The (application space) PN which triggered RXKU detection. Valid if + * rxku_pending_confirm. + */ + QUIC_PN rxku_trigger_pn; + /* * State tracking. QUIC connection-level state is best represented based on * whether various things have happened yet or not, rather than as an @@ -319,6 +347,45 @@ struct quic_channel_st { /* Should incoming streams automatically be rejected? */ unsigned int incoming_stream_auto_reject : 1; + + /* + * 1 if a key update sequence was locally initiated, meaning we sent the + * TXKU first and the resultant RXKU shouldn't result in our triggering + * another TXKU. 0 if a key update sequence was initiated by the peer, + * meaning we detect a RXKU first and have to generate a TXKU in response. + */ + unsigned int ku_locally_initiated : 1; + + /* + * 1 if we have triggered TXKU (whether spontaneous or solicited) but are + * waiting for any PN using that new KP to be ACKed. While this is set, we + * are not allowed to trigger spontaneous TXKU (but solicited TXKU is + * potentially still possible). + */ + unsigned int txku_in_progress : 1; + + /* + * We have received an RXKU event and currently are going through + * UPDATING/COOLDOWN on the QRX. COOLDOWN is currently not used. Since RXKU + * cannot be detected in this state, this doesn't cause a protocol error or + * anything similar if a peer tries TXKU in this state. That traffic would + * simply be dropped. It's only used to track that our UPDATING timer is + * active so we know when to take the QRX out of UPDATING and back to + * NORMAL. + */ + unsigned int rxku_in_progress : 1; + + /* + * We have received an RXKU but have yet to send an ACK for it, which means + * no further RXKUs are allowed yet. Note that we cannot detect further + * RXKUs anyway while the QRX remains in the UPDATING/COOLDOWN states, so + * this restriction comes into play if we take more than PTO time to send + * an ACK for it (not likely). + */ + unsigned int rxku_pending_confirm : 1; + + /* Temporary variable indicating rxku_pending_confirm is to become 0. */ + unsigned int rxku_pending_confirm_done : 1; }; # endif