@@ -33,6 +33,18 @@ pub(crate) fn poly_compress(r: &mut [u8], a: Poly) {
3333 let mut k = 0usize ;
3434 let mut u: i16 ;
3535
36+ // Compress_q(x, d) = ⌈(2ᵈ/q)x⌋ mod⁺ 2ᵈ
37+ // = ⌊(2ᵈ/q)x+½⌋ mod⁺ 2ᵈ
38+ // = ⌊((x << d) + q/2) / q⌋ mod⁺ 2ᵈ
39+ // = DIV((x << d) + q/2, q) & ((1<<d) - 1)
40+ //
41+ // We approximate DIV(x, q) by computing (x*a)>>e, where a/(2^e) ≈ 1/q.
42+ // For d in {10,11} we use 20,642,678/2^36, which computes division by x/q
43+ // correctly for 0 ≤ x < 41,522,616, which fits (q << 11) + q/2 comfortably.
44+ // For d in {4,5} we use 315/2^20, which doesn't compute division by x/q
45+ // correctly for all inputs, but it's close enough that the end result
46+ // of the compression is correct. The advantage is that we do not need
47+ // to use a 64-bit intermediate value.
3648 match KYBER_POLY_COMPRESSED_BYTES {
3749 128 => {
3850 #[ allow( clippy:: needless_range_loop) ]
@@ -41,9 +53,11 @@ pub(crate) fn poly_compress(r: &mut [u8], a: Poly) {
4153 // map to positive standard representatives
4254 u = a. coeffs [ 8 * i + j] ;
4355 u += ( u >> 15 ) & KYBER_Q as i16 ;
44- t[ j] = ( ( ( ( ( u as u16 ) << 4 ) + KYBER_Q as u16 / 2 )
45- / KYBER_Q as u16 )
46- & 15 ) as u8 ;
56+ let mut tmp: u32 =
57+ ( ( ( u as u16 ) << 4 ) + KYBER_Q as u16 / 2 ) as u32 ;
58+ tmp *= 315 ;
59+ tmp >>= 20 ;
60+ t[ j] = ( ( tmp as u16 ) & 15 ) as u8 ;
4761 }
4862 r[ k] = t[ 0 ] | ( t[ 1 ] << 4 ) ;
4963 r[ k + 1 ] = t[ 2 ] | ( t[ 3 ] << 4 ) ;
@@ -59,9 +73,11 @@ pub(crate) fn poly_compress(r: &mut [u8], a: Poly) {
5973 // map to positive standard representatives
6074 u = a. coeffs [ 8 * i + j] ;
6175 u += ( u >> 15 ) & KYBER_Q as i16 ;
62- t[ j] = ( ( ( ( ( u as u32 ) << 5 ) + KYBER_Q as u32 / 2 )
63- / KYBER_Q as u32 )
64- & 31 ) as u8 ;
76+ let mut tmp: u32 =
77+ ( ( u as u32 ) << 5 ) + KYBER_Q as u32 / 2 ;
78+ tmp *= 315 ;
79+ tmp >>= 20 ;
80+ t[ j] = ( ( tmp as u16 ) & 31 ) as u8 ;
6581 }
6682 r[ k] = t[ 0 ] | ( t[ 1 ] << 5 ) ;
6783 r[ k + 1 ] = ( t[ 1 ] >> 3 ) | ( t[ 2 ] << 2 ) | ( t[ 3 ] << 7 ) ;
@@ -324,14 +340,19 @@ pub(crate) fn poly_frommsg(r: &mut Poly, msg: &[u8]) {
324340/// Arguments: - [u8] msg: output message
325341/// - const poly *a: input polynomial
326342pub ( crate ) fn poly_tomsg ( msg : & mut [ u8 ] , a : Poly ) {
327- let mut t;
343+ let mut t: u32 ;
328344 #[ allow( clippy:: needless_range_loop) ]
329345 for i in 0 ..KYBER_N / 8 {
330346 msg[ i] = 0 ;
331347 for j in 0 ..8 {
332- t = a. coeffs [ 8 * i + j] ;
333- t += ( t >> 15 ) & KYBER_Q as i16 ;
334- t = ( ( ( t << 1 ) + KYBER_Q as i16 / 2 ) / KYBER_Q as i16 ) & 1 ;
348+ t = a. coeffs [ 8 * i + j] as u32 ;
349+
350+ t <<= 1 ;
351+ t = t. wrapping_add ( 1665 ) ;
352+ t = t. wrapping_mul ( 80635 ) ;
353+ t >>= 28 ;
354+ t &= 1 ;
355+
335356 msg[ i] |= ( t << j) as u8 ;
336357 }
337358 }
0 commit comments