Skip to content

Commit

Permalink
Merge tag 'pull-crypto-20230915' of https://gitlab.com/rth7680/qemu i…
Browse files Browse the repository at this point in the history
…nto staging

Unify implementation of carry-less multiply.
Accelerate carry-less multiply for 64x64->128.

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmUEiPodHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/akgf/XkiIeErWJr1YXSbS
# YPQtCsDAfIrqn3RiyQ2uwSn2eeuwVqTFFPGER04YegRDK8dyO874JBfvOwmBT70J
# I/aU8Z4BbRyNu9nfaCtFMlXQH9KArAKcAds1PnshfcnI5T2yBloZ1sAU97IuJFZk
# Uuz96H60+ohc4wzaUiPqPhXQStgZeSYwwAJB0s25DhCckdea0udRCAJ1tQTVpxkM
# wIFef1SHPoM6DtMzFKHLLUH6VivSlHjqx8GqFusa7pVqfQyDzNBfwvDl1F/bkE07
# yTocQEkV3QnZvIplhqUxAaZXIFZr9BNk7bDimMjHW6z3pNPN3T8zRn4trNjxbgPV
# jqzAtg==
# =8nnk
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 15 Sep 2023 12:40:26 EDT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* tag 'pull-crypto-20230915' of https://gitlab.com/rth7680/qemu:
  host/include/aarch64: Implement clmul.h
  host/include/i386: Implement clmul.h
  target/ppc: Use clmul_64
  target/s390x: Use clmul_64
  target/i386: Use clmul_64
  target/arm: Use clmul_64
  crypto: Add generic 64-bit carry-less multiply routine
  target/ppc: Use clmul_32* routines
  target/s390x: Use clmul_32* routines
  target/arm: Use clmul_32* routines
  crypto: Add generic 32-bit carry-less multiply routines
  target/ppc: Use clmul_16* routines
  target/s390x: Use clmul_16* routines
  target/arm: Use clmul_16* routines
  crypto: Add generic 16-bit carry-less multiply routines
  target/ppc: Use clmul_8* routines
  target/s390x: Use clmul_8* routines
  target/arm: Use clmul_8* routines
  crypto: Add generic 8-bit carry-less multiply routines

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
  • Loading branch information
stefanhaRH committed Sep 18, 2023
2 parents 005ad32 + 055c990 commit 13d6b16
Show file tree
Hide file tree
Showing 18 changed files with 434 additions and 286 deletions.
111 changes: 111 additions & 0 deletions crypto/clmul.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Carry-less multiply operations.
* SPDX-License-Identifier: GPL-2.0-or-later
*
* Copyright (C) 2023 Linaro, Ltd.
*/

#include "qemu/osdep.h"
#include "crypto/clmul.h"

uint64_t clmul_8x8_low(uint64_t n, uint64_t m)
{
uint64_t r = 0;

for (int i = 0; i < 8; ++i) {
uint64_t mask = (n & 0x0101010101010101ull) * 0xff;
r ^= m & mask;
m = (m << 1) & 0xfefefefefefefefeull;
n >>= 1;
}
return r;
}

static uint64_t clmul_8x4_even_int(uint64_t n, uint64_t m)
{
uint64_t r = 0;

for (int i = 0; i < 8; ++i) {
uint64_t mask = (n & 0x0001000100010001ull) * 0xffff;
r ^= m & mask;
n >>= 1;
m <<= 1;
}
return r;
}

uint64_t clmul_8x4_even(uint64_t n, uint64_t m)
{
n &= 0x00ff00ff00ff00ffull;
m &= 0x00ff00ff00ff00ffull;
return clmul_8x4_even_int(n, m);
}

uint64_t clmul_8x4_odd(uint64_t n, uint64_t m)
{
return clmul_8x4_even(n >> 8, m >> 8);
}

static uint64_t unpack_8_to_16(uint64_t x)
{
return (x & 0x000000ff)
| ((x & 0x0000ff00) << 8)
| ((x & 0x00ff0000) << 16)
| ((x & 0xff000000) << 24);
}

uint64_t clmul_8x4_packed(uint32_t n, uint32_t m)
{
return clmul_8x4_even_int(unpack_8_to_16(n), unpack_8_to_16(m));
}

uint64_t clmul_16x2_even(uint64_t n, uint64_t m)
{
uint64_t r = 0;

n &= 0x0000ffff0000ffffull;
m &= 0x0000ffff0000ffffull;

for (int i = 0; i < 16; ++i) {
uint64_t mask = (n & 0x0000000100000001ull) * 0xffffffffull;
r ^= m & mask;
n >>= 1;
m <<= 1;
}
return r;
}

uint64_t clmul_16x2_odd(uint64_t n, uint64_t m)
{
return clmul_16x2_even(n >> 16, m >> 16);
}

uint64_t clmul_32(uint32_t n, uint32_t m32)
{
uint64_t r = 0;
uint64_t m = m32;

for (int i = 0; i < 32; ++i) {
r ^= n & 1 ? m : 0;
n >>= 1;
m <<= 1;
}
return r;
}

Int128 clmul_64_gen(uint64_t n, uint64_t m)
{
uint64_t rl = 0, rh = 0;

/* Bit 0 can only influence the low 64-bit result. */
if (n & 1) {
rl = m;
}

for (int i = 1; i < 64; ++i) {
uint64_t mask = -((n >> i) & 1);
rl ^= (m << i) & mask;
rh ^= (m >> (64 - i)) & mask;
}
return int128_make128(rl, rh);
}
9 changes: 6 additions & 3 deletions crypto/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,12 @@ if have_afalg
endif
crypto_ss.add(when: gnutls, if_true: files('tls-cipher-suites.c'))

util_ss.add(files('sm4.c'))
util_ss.add(files('aes.c'))
util_ss.add(files('init.c'))
util_ss.add(files(
'aes.c',
'clmul.c',
'init.c',
'sm4.c',
))
if gnutls.found()
util_ss.add(gnutls)
endif
Expand Down
1 change: 1 addition & 0 deletions host/include/aarch64/host/cpuinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define CPUINFO_LSE (1u << 1)
#define CPUINFO_LSE2 (1u << 2)
#define CPUINFO_AES (1u << 3)
#define CPUINFO_PMULL (1u << 4)

/* Initialized with a constructor. */
extern unsigned cpuinfo;
Expand Down
41 changes: 41 additions & 0 deletions host/include/aarch64/host/crypto/clmul.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* AArch64 specific clmul acceleration.
* SPDX-License-Identifier: GPL-2.0-or-later
*/

#ifndef AARCH64_HOST_CRYPTO_CLMUL_H
#define AARCH64_HOST_CRYPTO_CLMUL_H

#include "host/cpuinfo.h"
#include <arm_neon.h>

/*
* 64x64->128 pmull is available with FEAT_PMULL.
* Both FEAT_AES and FEAT_PMULL are covered under the same macro.
*/
#ifdef __ARM_FEATURE_AES
# define HAVE_CLMUL_ACCEL true
#else
# define HAVE_CLMUL_ACCEL likely(cpuinfo & CPUINFO_PMULL)
#endif
#if !defined(__ARM_FEATURE_AES) && defined(CONFIG_ARM_AES_BUILTIN)
# define ATTR_CLMUL_ACCEL __attribute__((target("+crypto")))
#else
# define ATTR_CLMUL_ACCEL
#endif

static inline Int128 ATTR_CLMUL_ACCEL
clmul_64_accel(uint64_t n, uint64_t m)
{
union { poly128_t v; Int128 s; } u;

#ifdef CONFIG_ARM_AES_BUILTIN
u.v = vmull_p64((poly64_t)n, (poly64_t)m);
#else
asm(".arch_extension aes\n\t"
"pmull %0.1q, %1.1d, %2.1d" : "=w"(u.v) : "w"(n), "w"(m));
#endif
return u.s;
}

#endif /* AARCH64_HOST_CRYPTO_CLMUL_H */
15 changes: 15 additions & 0 deletions host/include/generic/host/crypto/clmul.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/*
* No host specific carry-less multiply acceleration.
* SPDX-License-Identifier: GPL-2.0-or-later
*/

#ifndef GENERIC_HOST_CRYPTO_CLMUL_H
#define GENERIC_HOST_CRYPTO_CLMUL_H

#define HAVE_CLMUL_ACCEL false
#define ATTR_CLMUL_ACCEL

Int128 clmul_64_accel(uint64_t, uint64_t)
QEMU_ERROR("unsupported accel");

#endif /* GENERIC_HOST_CRYPTO_CLMUL_H */
1 change: 1 addition & 0 deletions host/include/i386/host/cpuinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#define CPUINFO_ATOMIC_VMOVDQA (1u << 16)
#define CPUINFO_ATOMIC_VMOVDQU (1u << 17)
#define CPUINFO_AES (1u << 18)
#define CPUINFO_PCLMUL (1u << 19)

/* Initialized with a constructor. */
extern unsigned cpuinfo;
Expand Down
29 changes: 29 additions & 0 deletions host/include/i386/host/crypto/clmul.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* x86 specific clmul acceleration.
* SPDX-License-Identifier: GPL-2.0-or-later
*/

#ifndef X86_HOST_CRYPTO_CLMUL_H
#define X86_HOST_CRYPTO_CLMUL_H

#include "host/cpuinfo.h"
#include <immintrin.h>

#if defined(__PCLMUL__)
# define HAVE_CLMUL_ACCEL true
# define ATTR_CLMUL_ACCEL
#else
# define HAVE_CLMUL_ACCEL likely(cpuinfo & CPUINFO_PCLMUL)
# define ATTR_CLMUL_ACCEL __attribute__((target("pclmul")))
#endif

static inline Int128 ATTR_CLMUL_ACCEL
clmul_64_accel(uint64_t n, uint64_t m)
{
union { __m128i v; Int128 s; } u;

u.v = _mm_clmulepi64_si128(_mm_set_epi64x(0, n), _mm_set_epi64x(0, m), 0);
return u.s;
}

#endif /* X86_HOST_CRYPTO_CLMUL_H */
1 change: 1 addition & 0 deletions host/include/x86_64/host/crypto/clmul.h
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#include "host/include/i386/host/crypto/clmul.h"
83 changes: 83 additions & 0 deletions include/crypto/clmul.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Carry-less multiply operations.
* SPDX-License-Identifier: GPL-2.0-or-later
*
* Copyright (C) 2023 Linaro, Ltd.
*/

#ifndef CRYPTO_CLMUL_H
#define CRYPTO_CLMUL_H

#include "qemu/int128.h"
#include "host/crypto/clmul.h"

/**
* clmul_8x8_low:
*
* Perform eight 8x8->8 carry-less multiplies.
*/
uint64_t clmul_8x8_low(uint64_t, uint64_t);

/**
* clmul_8x4_even:
*
* Perform four 8x8->16 carry-less multiplies.
* The odd bytes of the inputs are ignored.
*/
uint64_t clmul_8x4_even(uint64_t, uint64_t);

/**
* clmul_8x4_odd:
*
* Perform four 8x8->16 carry-less multiplies.
* The even bytes of the inputs are ignored.
*/
uint64_t clmul_8x4_odd(uint64_t, uint64_t);

/**
* clmul_8x4_packed:
*
* Perform four 8x8->16 carry-less multiplies.
*/
uint64_t clmul_8x4_packed(uint32_t, uint32_t);

/**
* clmul_16x2_even:
*
* Perform two 16x16->32 carry-less multiplies.
* The odd words of the inputs are ignored.
*/
uint64_t clmul_16x2_even(uint64_t, uint64_t);

/**
* clmul_16x2_odd:
*
* Perform two 16x16->32 carry-less multiplies.
* The even words of the inputs are ignored.
*/
uint64_t clmul_16x2_odd(uint64_t, uint64_t);

/**
* clmul_32:
*
* Perform a 32x32->64 carry-less multiply.
*/
uint64_t clmul_32(uint32_t, uint32_t);

/**
* clmul_64:
*
* Perform a 64x64->128 carry-less multiply.
*/
Int128 clmul_64_gen(uint64_t, uint64_t);

static inline Int128 clmul_64(uint64_t a, uint64_t b)
{
if (HAVE_CLMUL_ACCEL) {
return clmul_64_accel(a, b);
} else {
return clmul_64_gen(a, b);
}
}

#endif /* CRYPTO_CLMUL_H */
3 changes: 3 additions & 0 deletions include/qemu/cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
#endif

/* Leaf 1, %ecx */
#ifndef bit_PCLMUL
#define bit_PCLMUL (1 << 1)
#endif
#ifndef bit_SSE4_1
#define bit_SSE4_1 (1 << 19)
#endif
Expand Down
16 changes: 5 additions & 11 deletions target/arm/tcg/mve_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "exec/exec-all.h"
#include "tcg/tcg.h"
#include "fpu/softfloat.h"
#include "crypto/clmul.h"

static uint16_t mve_eci_mask(CPUARMState *env)
{
Expand Down Expand Up @@ -984,17 +985,10 @@ DO_2OP_L(vmulltuw, 1, 4, uint32_t, 8, uint64_t, DO_MUL)
* Polynomial multiply. We can always do this generating 64 bits
* of the result at a time, so we don't need to use DO_2OP_L.
*/
#define VMULLPH_MASK 0x00ff00ff00ff00ffULL
#define VMULLPW_MASK 0x0000ffff0000ffffULL
#define DO_VMULLPBH(N, M) pmull_h((N) & VMULLPH_MASK, (M) & VMULLPH_MASK)
#define DO_VMULLPTH(N, M) DO_VMULLPBH((N) >> 8, (M) >> 8)
#define DO_VMULLPBW(N, M) pmull_w((N) & VMULLPW_MASK, (M) & VMULLPW_MASK)
#define DO_VMULLPTW(N, M) DO_VMULLPBW((N) >> 16, (M) >> 16)

DO_2OP(vmullpbh, 8, uint64_t, DO_VMULLPBH)
DO_2OP(vmullpth, 8, uint64_t, DO_VMULLPTH)
DO_2OP(vmullpbw, 8, uint64_t, DO_VMULLPBW)
DO_2OP(vmullptw, 8, uint64_t, DO_VMULLPTW)
DO_2OP(vmullpbh, 8, uint64_t, clmul_8x4_even)
DO_2OP(vmullpth, 8, uint64_t, clmul_8x4_odd)
DO_2OP(vmullpbw, 8, uint64_t, clmul_16x2_even)
DO_2OP(vmullptw, 8, uint64_t, clmul_16x2_odd)

/*
* Because the computation type is at least twice as large as required,
Expand Down

0 comments on commit 13d6b16

Please sign in to comment.