Skip to content

Commit

Permalink
runtests.sh checking sha256sums of test vectors, fixed namespacing fo…
Browse files Browse the repository at this point in the history
…r keccak4x
  • Loading branch information
gregorseiler committed Apr 3, 2020
1 parent f3d53f3 commit ae40695
Show file tree
Hide file tree
Showing 11 changed files with 174 additions and 114 deletions.
12 changes: 6 additions & 6 deletions .gitignore
@@ -1,6 +1,6 @@
*.dSYM
*.pyc
.DS_Store
*.req
*.rsp
*.o
tvecs1024
tvecs1024-90s
tvecs512
tvecs512-90s
tvecs768
tvecs768-90s
6 changes: 6 additions & 0 deletions SHA256SUMS
@@ -0,0 +1,6 @@
2d46d36da383c3ece6fc46512162dabb73bf73a92ee90f29fbf990873766c223 tvecs1024
761e10eda09e1f02204ca5484c2a9f908e8866ade289c1deabe9d0769233df67 tvecs1024-90s
005c74399ff14d2244905f089c6d408a6bb33828bc78d5b77a3a01b227affbca tvecs512
71099d65a79fc1881836dd7cf8802fcce5d81cd64acb35c80102090d800bf131 tvecs512-90s
cca4a24c53f7e8baddddd63bff32ad1e0b1590c742b545e0f682cb4202ac2051 tvecs768
0bf24733d4ee523b60e43a25e85b577eba7ccc6109ba57b798904dd8d08828c0 tvecs768-90s
1 change: 1 addition & 0 deletions avx2/.gitignore
87 changes: 58 additions & 29 deletions avx2/Makefile
@@ -1,9 +1,9 @@
CC = /usr/bin/cc
CFLAGS += -Wall -Wextra -Wpedantic -Wmissing-prototypes -Wredundant-decls \
-Wshadow -Wpointer-arith -march=native -mtune=native -O3 \
-fomit-frame-pointer -fstrict-aliasing -flto
-fomit-frame-pointer -flto
#CFLAGS += -DUSE_RDPMC
NISTFLAGS = -Wno-unused-result -O3 -fomit-frame-pointer -march=native -mtune=native
NISTFLAGS = -Wno-unused-result -O3 -march=native -mtune=native
RM = /bin/rm

SOURCES = kem.c indcpa.c polyvec.c poly.c fq.S shuffle.S ntt.S invntt.S \
Expand All @@ -16,6 +16,8 @@ HEADERS = params.h api.h indcpa.h polyvec.h poly.h reduce.h fq.inc shuffle.inc \
HEADERSKECCAK = $(HEADERS) fips202.h fips202x4.h
HEADERSNINETIES = $(HEADERS) aes256ctr.h

.PHONY: all shared clean

all: \
test_kyber512 \
test_kyber768 \
Expand Down Expand Up @@ -43,17 +45,48 @@ all: \
test_speed1024-90s \
PQCgenKAT_kem

kyber512.so: $(SOURCES) $(HEADERS) symmetric-shake.c
shared: \
kyber512_avx2.so \
kyber768_avx2.so \
kyber1024_avx2.so \
kyber512-90s_avx2.so \
kyber768-90s_avx2.so \
kyber1024-90s_avx2.so \
pqcrystals_ref.so \
pqcrystals_avx2.so \
pqcrystals_avx2_aes256ctr.so

keccak4x/KeccakP-1600-times4-SIMD256.o: \
keccak4x/KeccakP-1600-times4-SIMD256.c \
keccak4x/KeccakP-1600-times4-SnP.h \
keccak4x/KeccakP-1600-unrolling.macros \
keccak4x/SIMD256-config.h \
keccak4x/align.h \
keccak4x/brg_endian.h
$(CC) $(CFLAGS) -c $< -o $@

pqcrystals_ref.so: fips202.c fips202.h
$(CC) -shared -fPIC $(CFLAGS) -o $@ $<

pqcrystals_avx2.so: fips202x4.c fips202x4.h \
keccak4x/KeccakP-1600-times4-SIMD256.o
$(CC) -shared -fPIC $(CFLAGS) -o $@ $< \
keccak4x/KeccakP-1600-times4-SIMD256.o

pqcrystals_avx2_aes256ctr.so: aes256ctr.c aes256ctr.h
$(CC) -shared -fPIC $(CFLAGS) -o $@ $<

kyber512_avx2.so: $(SOURCES) $(HEADERS) symmetric-shake.c
$(CC) -shared -fpic $(CFLAGS) -DKYBER_K=2 $(SOURCES) \
symmetric-shake.c -o kyber512.so
symmetric-shake.c -o kyber512_avx2.so

kyber768.so: $(SOURCES) $(HEADERS) symmetric-shake.c
kyber768_avx2.so: $(SOURCES) $(HEADERS) symmetric-shake.c
$(CC) -shared -fpic $(CFLAGS) -DKYBER_K=3 $(SOURCES) \
symmetric-shake.c -o kyber768.so
symmetric-shake.c -o kyber768_avx2.so

kyber1024.so: $(SOURCES) $(HEADERS) symmetric-shake.c
kyber1024_avx2.so: $(SOURCES) $(HEADERS) symmetric-shake.c
$(CC) -shared -fpic $(CFLAGS) -DKYBER_K=4 $(SOURCES) \
symmetric-shake.c -o kyber1024.so
symmetric-shake.c -o kyber1024_avx2.so

test_kyber512: $(SOURCESKECCAK) $(HEADERSKECCAK) test_kyber.c randombytes.c
$(CC) $(CFLAGS) -DKYBER_K=2 $(SOURCESKECCAK) randombytes.c test_kyber.c -o test_kyber512
Expand Down Expand Up @@ -82,7 +115,7 @@ test_vectors768: $(SOURCESKECCAK) $(HEADERSKECCAK) test_vectors.c
test_vectors1024: $(SOURCESKECCAK) $(HEADERSKECCAK) test_vectors.c
$(CC) $(CFLAGS) -DKYBER_K=4 $(SOURCESKECCAK) test_vectors.c -o test_vectors1024

test_speed512: $(SOURCESKECCAK) $(HEADERSKECCAK) cpucycles.h cpucycles.c test_speed.c speed_print.h speed_print.c randombytes.c kex.c kex.h
test_speed512: $(SOURCESKECCAK) $(HEADERSKECCAK) cpucycles.h cpucycles.c speed_print.h speed_print.c test_speed.c randombytes.c kex.c kex.h
$(CC) $(CFLAGS) -DKYBER_K=2 $(SOURCESKECCAK) randombytes.c cpucycles.c speed_print.c kex.c test_speed.c -o test_speed512

test_speed768: $(SOURCESKECCAK) $(HEADERSKECCAK) cpucycles.h cpucycles.c speed_print.h speed_print.c test_speed.c randombytes.c kex.c kex.h
Expand All @@ -91,17 +124,14 @@ test_speed768: $(SOURCESKECCAK) $(HEADERSKECCAK) cpucycles.h cpucycles.c speed_p
test_speed1024: $(SOURCESKECCAK) $(HEADERSKECCAK) cpucycles.h cpucycles.c speed_print.h speed_print.c test_speed.c randombytes.c kex.c kex.h
$(CC) $(CFLAGS) -DKYBER_K=4 $(SOURCESKECCAK) randombytes.c cpucycles.c speed_print.c kex.c test_speed.c -o test_speed1024

kyber512-90s.so: $(SOURCES) $(HEADERS)
$(CC) -shared -fpic $(CFLAGS) -DKYBER_K=2 -DKYBER_90S $(SOURCES) \
-o kyber512-90s.so
kyber512-90s_avx2.so: $(SOURCES) $(HEADERS)
$(CC) -shared -fpic $(CFLAGS) -DKYBER_K=2 -DKYBER_90S $(SOURCES) -o kyber512-90s_avx2.so

kyber768-90s.so: $(SOURCES) $(HEADERS)
$(CC) -shared -fpic $(CFLAGS) -DKYBER_K=3 -DKYBER_90S $(SOURCES) \
-o kyber768-90s.so
kyber768-90s_avx2.so: $(SOURCES) $(HEADERS)
$(CC) -shared -fpic $(CFLAGS) -DKYBER_K=3 -DKYBER_90S $(SOURCES) -o kyber768-90s_avx2.so

kyber1024-90s.so: $(SOURCES) $(HEADERS)
$(CC) -shared -fpic $(CFLAGS) -DKYBER_K=4 -DKYBER_90S $(SOURCES) \
-o kyber1024-90s.so
kyber1024-90s_avx2.so: $(SOURCES) $(HEADERS)
$(CC) -shared -fpic $(CFLAGS) -DKYBER_K=4 -DKYBER_90S $(SOURCES) -o kyber1024-90s_avx2.so

test_kyber512-90s: $(SOURCESNINETIES) $(HEADERSNINETIES) test_kyber.c randombytes.c
$(CC) $(CFLAGS) -D KYBER_90S -DKYBER_K=2 $(SOURCESNINETIES) randombytes.c test_kyber.c -o test_kyber512-90s -lcrypto
Expand Down Expand Up @@ -142,16 +172,11 @@ test_speed1024-90s: $(SOURCESNINETIES) $(HEADERSNINETIES) cpucycles.h cpucycles.
PQCgenKAT_kem: $(SOURCESKECCAK) $(HEADERSKECCAK) PQCgenKAT_kem.c rng.c rng.h
$(CC) $(NISTFLAGS) -o $@ $(SOURCESKECCAK) -I. rng.c PQCgenKAT_kem.c -lcrypto

test_ntt: $(SOURCESKECCAK) $(HEADERSKECCAK) test_ntt.c randombytes.c
$(CC) $(CFLAGS) -DKYBER_K=2 $(SOURCESKECCAK) randombytes.c test_ntt.c -o test_ntt

.PHONY: clean test

clean:
-$(RM) -rf *.o
-$(RM) -rf kyber512.so
-$(RM) -rf kyber768.so
-$(RM) -rf kyber1024.so
-$(RM) -rf kyber512_avx2.so
-$(RM) -rf kyber768_avx2.so
-$(RM) -rf kyber1024_avx2.so
-$(RM) -rf test_kyber512
-$(RM) -rf test_kyber768
-$(RM) -rf test_kyber1024
Expand All @@ -164,9 +189,9 @@ clean:
-$(RM) -rf test_speed512
-$(RM) -rf test_speed768
-$(RM) -rf test_speed1024
-$(RM) -rf kyber512-90s.so
-$(RM) -rf kyber768-90s.so
-$(RM) -rf kyber1024-90s.so
-$(RM) -rf kyber512-90s_avx2.so
-$(RM) -rf kyber768-90s_avx2.so
-$(RM) -rf kyber1024-90s_avx2.so
-$(RM) -rf test_kyber512-90s
-$(RM) -rf test_kyber768-90s
-$(RM) -rf test_kyber1024-90s
Expand All @@ -180,3 +205,7 @@ clean:
-$(RM) -rf test_speed768-90s
-$(RM) -rf test_speed1024-90s
-$(RM) -rf PQCgenKAT_kem
-$(RM) -rf pqcrystals_ref.so
-$(RM) -rf pqcrystals_avx2.so
-$(RM) -rf pqcrystals_avx2_aes256ctr.so
-$(RM) -rf keccak4x/KeccakP-1600-times4-SIMD256.o
4 changes: 2 additions & 2 deletions avx2/fips202x4.c
Expand Up @@ -6,8 +6,8 @@
#include "fips202x4.h"

/* Use implementation from the Keccak Code Package */
extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s);
#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds
extern void pqcrystals_avx2_KeccakP1600times4_PermuteAll_24rounds(__m256i *s);
#define KeccakF1600_StatePermute4x pqcrystals_avx2_KeccakP1600times4_PermuteAll_24rounds

static inline uint64_t load64(const uint8_t x[8]) {
unsigned int i;
Expand Down
2 changes: 2 additions & 0 deletions avx2/fips202x4.h
Expand Up @@ -41,6 +41,7 @@ void shake256x4_squeezeblocks(uint8_t *out0,
size_t nblocks,
keccakx4_state *state);

#define shake128x4 pqcrystals_avx2_shake128x4
void shake128x4(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
Expand All @@ -52,6 +53,7 @@ void shake128x4(uint8_t *out0,
const uint8_t *in3,
size_t inlen);

#define shake256x4 pqcrystals_avx2_shake256x4
void shake256x4(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
Expand Down
14 changes: 14 additions & 0 deletions avx2/keccak4x/KeccakP-1600-times4-SnP.h
Expand Up @@ -30,21 +30,35 @@ and related or neighboring rights to the source code in this file.
#include <stddef.h>

#define KeccakP1600times4_StaticInitialize()
#define KeccakP1600times4_InitializeAll pqcrystals_avx2_KeccakP1600times4_InitializeAll
void KeccakP1600times4_InitializeAll(void *states);
#define KeccakP1600times4_AddByte(states, instanceIndex, byte, offset) \
((unsigned char*)(states))[(instanceIndex)*8 + ((offset)/8)*4*8 + (offset)%8] ^= (byte)
#define KeccakP1600times4_AddBytes pqcrystals_avx2_KeccakP1600times4_AddBytes
void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
#define KeccakP1600times4_AddLanesAll pqcrystals_avx2_KeccakP1600times4_AddLanesAll
void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
#define KeccakP1600times4_OverwriteBytes pqcrystals_avx2_KeccakP1600times4_OverwriteBytes
void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
#define KeccakP1600times4_OverwriteLanesAll pqcrystals_avx2_KeccakP1600times4_OverwriteLanesAll
void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
#define KeccakP1600times4_OverwriteWithZeroes pqcrystals_avx2_KeccakP1600times4_OverwriteWithZeroes
void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount);
#define KeccakP1600times4_PermuteAll_12rounds pqcrystals_avx2_KeccakP1600times4_PermuteAll_12rounds
void KeccakP1600times4_PermuteAll_12rounds(void *states);
#define KeccakP1600times4_PermuteAll_24rounds pqcrystals_avx2_KeccakP1600times4_PermuteAll_24rounds
void KeccakP1600times4_PermuteAll_24rounds(void *states);
#define KeccakP1600times4_ExtractBytes pqcrystals_avx2_KeccakP1600times4_ExtractBytes
void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length);
#define KeccakP1600times4_ExtractLanesAll pqcrystals_avx2_KeccakP1600times4_ExtractLanesAll
void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
#define KeccakP1600times4_ExtractAndAddBytes pqcrystals_avx2_KeccakP1600times4_ExtractAndAddBytes
void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
#define KeccakP1600times4_ExtractAndAddLanesAll pqcrystals_avx2_KeccakP1600times4_ExtractAndAddLanesAll
void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset);
#define KeccakF1600times4_FastLoop_Absorb pqcrystals_avx2_KeccakF1600times4_FastLoop_Absorb
size_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);
#define KeccakP1600times4_12rounds_FastLoop_Absorb pqcrystals_avx2_KeccakP1600times4_12rounds_FastLoop_Absorb
size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);

#endif
40 changes: 40 additions & 0 deletions ref/.gitignore
@@ -0,0 +1,40 @@
kyber1024-90s_ref.so
kyber1024_ref.so
kyber512-90s_ref.so
kyber512_ref.so
kyber768-90s_ref.so
kyber768_ref.so
pqcrystals_ref.so
test_kex1024
test_kex1024-90s
test_kex512
test_kex512-90s
test_kex768
test_kex768-90s
test_kyber1024
test_kyber1024-90s
test_kyber512
test_kyber512-90s
test_kyber768
test_kyber768-90s
test_speed1024
test_speed1024-90s
test_speed512
test_speed512-90s
test_speed768
test_speed768-90s
test_vectors1024
test_vectors1024-90s
test_vectors512
test_vectors512-90s
test_vectors768
test_vectors768-90s
PQCgenKAT_kem
kyber1024-90s_avx2.so
kyber1024_avx2.so
kyber512-90s_avx2.so
kyber512_avx2.so
kyber768-90s_avx2.so
kyber768_avx2.so
pqcrystals_avx2.so
pqcrystals_avx2_aes256ctr.so

0 comments on commit ae40695

Please sign in to comment.