Skip to content

Commit

Permalink
Added the 128-bit version.
Browse files Browse the repository at this point in the history
  • Loading branch information
mjosaarinen committed Jan 6, 2015
1 parent fe720a9 commit ceec7c5
Show file tree
Hide file tree
Showing 5 changed files with 679 additions and 24 deletions.
4 changes: 3 additions & 1 deletion Makefile
Expand Up @@ -3,7 +3,9 @@

BIN = xtest
OBJS = main.o \
kuznechik_simple.o
kuznechik_128bit.o
# kuznechik_8bit.o

DIST = kuznechik

CC = gcc
Expand Down
17 changes: 11 additions & 6 deletions kuznechik.h
Expand Up @@ -7,20 +7,25 @@
#include <stdint.h>

// my 128-bit datatype
typedef union {
typedef union {
uint64_t q[2];
uint8_t b[16];
} w128_t;

// cipher context
typedef struct {
w128_t k[10]; // round keys
} kuz_ctx;
} kuz_key_t;

// basic ECB interface
// init lookup tables
void kuz_init();

void kuz_key(kuz_ctx *kuz, const uint8_t key[32]); // key setup
void kuz_enc(kuz_ctx *kuz, w128_t *x); // encrypt a block
void kuz_dec(kuz_ctx *kuz, w128_t *x); // decrypt a block
// key setup
void kuz_set_encrypt_key(kuz_key_t *subkeys, const uint8_t key[32]);
void kuz_set_decrypt_key(kuz_key_t *subkeys, const uint8_t key[32]);

// single-block ecp ops
void kuz_encrypt_block(kuz_key_t *subkeys, void *x);
void kuz_decrypt_block(kuz_key_t *subkeys, void *x);

#endif
345 changes: 345 additions & 0 deletions kuznechik_128bit.c
@@ -0,0 +1,345 @@
// kuznechik_128bit.c
// 04-Jan-15 Markku-Juhani O. Saarinen <mjos@iki.fi>

// Basic 128-bit version without platform - specific optimizations

// Conforms to incuded doc/GOSTR-bsh.pdf file, which has an internal
// date of September 2, 2014.

#include "kuznechik.h"

#ifndef __SSE4_1__
#error "This version requires __SSE4_1__"
#endif

#include <mmintrin.h>
#include <emmintrin.h>
#include <smmintrin.h>

// The S-Box from section 5.1.1

uint8_t kuz_pi[0x100] = {
0xFC, 0xEE, 0xDD, 0x11, 0xCF, 0x6E, 0x31, 0x16, // 00..07
0xFB, 0xC4, 0xFA, 0xDA, 0x23, 0xC5, 0x04, 0x4D, // 08..0F
0xE9, 0x77, 0xF0, 0xDB, 0x93, 0x2E, 0x99, 0xBA, // 10..17
0x17, 0x36, 0xF1, 0xBB, 0x14, 0xCD, 0x5F, 0xC1, // 18..1F
0xF9, 0x18, 0x65, 0x5A, 0xE2, 0x5C, 0xEF, 0x21, // 20..27
0x81, 0x1C, 0x3C, 0x42, 0x8B, 0x01, 0x8E, 0x4F, // 28..2F
0x05, 0x84, 0x02, 0xAE, 0xE3, 0x6A, 0x8F, 0xA0, // 30..37
0x06, 0x0B, 0xED, 0x98, 0x7F, 0xD4, 0xD3, 0x1F, // 38..3F
0xEB, 0x34, 0x2C, 0x51, 0xEA, 0xC8, 0x48, 0xAB, // 40..47
0xF2, 0x2A, 0x68, 0xA2, 0xFD, 0x3A, 0xCE, 0xCC, // 48..4F
0xB5, 0x70, 0x0E, 0x56, 0x08, 0x0C, 0x76, 0x12, // 50..57
0xBF, 0x72, 0x13, 0x47, 0x9C, 0xB7, 0x5D, 0x87, // 58..5F
0x15, 0xA1, 0x96, 0x29, 0x10, 0x7B, 0x9A, 0xC7, // 60..67
0xF3, 0x91, 0x78, 0x6F, 0x9D, 0x9E, 0xB2, 0xB1, // 68..6F
0x32, 0x75, 0x19, 0x3D, 0xFF, 0x35, 0x8A, 0x7E, // 70..77
0x6D, 0x54, 0xC6, 0x80, 0xC3, 0xBD, 0x0D, 0x57, // 78..7F
0xDF, 0xF5, 0x24, 0xA9, 0x3E, 0xA8, 0x43, 0xC9, // 80..87
0xD7, 0x79, 0xD6, 0xF6, 0x7C, 0x22, 0xB9, 0x03, // 88..8F
0xE0, 0x0F, 0xEC, 0xDE, 0x7A, 0x94, 0xB0, 0xBC, // 90..97
0xDC, 0xE8, 0x28, 0x50, 0x4E, 0x33, 0x0A, 0x4A, // 98..9F
0xA7, 0x97, 0x60, 0x73, 0x1E, 0x00, 0x62, 0x44, // A0..A7
0x1A, 0xB8, 0x38, 0x82, 0x64, 0x9F, 0x26, 0x41, // A8..AF
0xAD, 0x45, 0x46, 0x92, 0x27, 0x5E, 0x55, 0x2F, // B0..B7
0x8C, 0xA3, 0xA5, 0x7D, 0x69, 0xD5, 0x95, 0x3B, // B8..BF
0x07, 0x58, 0xB3, 0x40, 0x86, 0xAC, 0x1D, 0xF7, // C0..C7
0x30, 0x37, 0x6B, 0xE4, 0x88, 0xD9, 0xE7, 0x89, // C8..CF
0xE1, 0x1B, 0x83, 0x49, 0x4C, 0x3F, 0xF8, 0xFE, // D0..D7
0x8D, 0x53, 0xAA, 0x90, 0xCA, 0xD8, 0x85, 0x61, // D8..DF
0x20, 0x71, 0x67, 0xA4, 0x2D, 0x2B, 0x09, 0x5B, // E0..E7
0xCB, 0x9B, 0x25, 0xD0, 0xBE, 0xE5, 0x6C, 0x52, // E8..EF
0x59, 0xA6, 0x74, 0xD2, 0xE6, 0xF4, 0xB4, 0xC0, // F0..F7
0xD1, 0x66, 0xAF, 0xC2, 0x39, 0x4B, 0x63, 0xB6, // F8..FF
};

// Inverse S-Box

const uint8_t kuz_pi_inv[0x100] = {
0xA5, 0x2D, 0x32, 0x8F, 0x0E, 0x30, 0x38, 0xC0, // 00..07
0x54, 0xE6, 0x9E, 0x39, 0x55, 0x7E, 0x52, 0x91, // 08..0F
0x64, 0x03, 0x57, 0x5A, 0x1C, 0x60, 0x07, 0x18, // 10..17
0x21, 0x72, 0xA8, 0xD1, 0x29, 0xC6, 0xA4, 0x3F, // 18..1F
0xE0, 0x27, 0x8D, 0x0C, 0x82, 0xEA, 0xAE, 0xB4, // 20..27
0x9A, 0x63, 0x49, 0xE5, 0x42, 0xE4, 0x15, 0xB7, // 28..2F
0xC8, 0x06, 0x70, 0x9D, 0x41, 0x75, 0x19, 0xC9, // 30..37
0xAA, 0xFC, 0x4D, 0xBF, 0x2A, 0x73, 0x84, 0xD5, // 38..3F
0xC3, 0xAF, 0x2B, 0x86, 0xA7, 0xB1, 0xB2, 0x5B, // 40..47
0x46, 0xD3, 0x9F, 0xFD, 0xD4, 0x0F, 0x9C, 0x2F, // 48..4F
0x9B, 0x43, 0xEF, 0xD9, 0x79, 0xB6, 0x53, 0x7F, // 50..57
0xC1, 0xF0, 0x23, 0xE7, 0x25, 0x5E, 0xB5, 0x1E, // 58..5F
0xA2, 0xDF, 0xA6, 0xFE, 0xAC, 0x22, 0xF9, 0xE2, // 60..67
0x4A, 0xBC, 0x35, 0xCA, 0xEE, 0x78, 0x05, 0x6B, // 68..6F
0x51, 0xE1, 0x59, 0xA3, 0xF2, 0x71, 0x56, 0x11, // 70..77
0x6A, 0x89, 0x94, 0x65, 0x8C, 0xBB, 0x77, 0x3C, // 78..7F
0x7B, 0x28, 0xAB, 0xD2, 0x31, 0xDE, 0xC4, 0x5F, // 80..87
0xCC, 0xCF, 0x76, 0x2C, 0xB8, 0xD8, 0x2E, 0x36, // 88..8F
0xDB, 0x69, 0xB3, 0x14, 0x95, 0xBE, 0x62, 0xA1, // 90..97
0x3B, 0x16, 0x66, 0xE9, 0x5C, 0x6C, 0x6D, 0xAD, // 98..9F
0x37, 0x61, 0x4B, 0xB9, 0xE3, 0xBA, 0xF1, 0xA0, // A0..A7
0x85, 0x83, 0xDA, 0x47, 0xC5, 0xB0, 0x33, 0xFA, // A8..AF
0x96, 0x6F, 0x6E, 0xC2, 0xF6, 0x50, 0xFF, 0x5D, // B0..B7
0xA9, 0x8E, 0x17, 0x1B, 0x97, 0x7D, 0xEC, 0x58, // B8..BF
0xF7, 0x1F, 0xFB, 0x7C, 0x09, 0x0D, 0x7A, 0x67, // C0..C7
0x45, 0x87, 0xDC, 0xE8, 0x4F, 0x1D, 0x4E, 0x04, // C8..CF
0xEB, 0xF8, 0xF3, 0x3E, 0x3D, 0xBD, 0x8A, 0x88, // D0..D7
0xDD, 0xCD, 0x0B, 0x13, 0x98, 0x02, 0x93, 0x80, // D8..DF
0x90, 0xD0, 0x24, 0x34, 0xCB, 0xED, 0xF4, 0xCE, // E0..E7
0x99, 0x10, 0x44, 0x40, 0x92, 0x3A, 0x01, 0x26, // E8..EF
0x12, 0x1A, 0x48, 0x68, 0xF5, 0x81, 0x8B, 0xC7, // F0..F7
0xD6, 0x20, 0x0A, 0x08, 0x00, 0x4C, 0xD7, 0x74 // F8..FF
};

// lookup tables initialized by kuz_init()

static int kuz_initialized = 0;
__m128i kuz_pil_enc128[16][256];
__m128i kuz_l_dec128[16][256];
__m128i kuz_pil_dec128[16][256];

// Linear vector from sect 5.1.2

const static uint8_t kuz_lvec[16] = {
0x94, 0x20, 0x85, 0x10, 0xC2, 0xC0, 0x01, 0xFB,
0x01, 0xC0, 0xC2, 0x10, 0x85, 0x20, 0x94, 0x01
};

// poly multiplication mod p(x) = x^8 + x^7 + x^6 + x + 1

static uint8_t kuz_mul_gf256(uint8_t x, uint8_t y)
{
uint8_t z;

z = 0;
while (y) {
if (y & 1)
z ^= x;
x = (x << 1) ^ (x & 0x80 ? 0xC3 : 0x00);
y >>= 1;
}

return z;
}

// (slow) linear operation l

static void kuz_l(w128_t *w)
{
int i, j;
uint8_t x;

// 16 rounds
for (j = 0; j < 16; j++) {

// An LFSR with 16 elements from GF(2^8)
x = w->b[15]; // since lvec[15] = 1

for (i = 14; i >= 0; i--) {
w->b[i + 1] = w->b[i];
x ^= kuz_mul_gf256(w->b[i], kuz_lvec[i]);
}
w->b[0] = x;
}
}

// inverse of linear operation l

static void kuz_l_inv(w128_t *w)
{
int i, j;
uint8_t x;

// 16 rounds
for (j = 0; j < 16; j++) {

x = w->b[0];
for (i = 0; i < 15; i++) {
w->b[i] = w->b[i + 1];
x ^= kuz_mul_gf256(w->b[i], kuz_lvec[i]);
}
w->b[15] = x;
}
}

// key setup

void kuz_set_encrypt_key(kuz_key_t *kuz, const uint8_t key[32])
{
int i, j;
w128_t c, x, y, z;

if (!kuz_initialized)
kuz_init();

for (i = 0; i < 16; i++) {
// this will be have to changed for little-endian systems
x.b[i] = key[i];
y.b[i] = key[i + 16];
}

kuz->k[0].q[0] = x.q[0];
kuz->k[0].q[1] = x.q[1];
kuz->k[1].q[0] = y.q[0];
kuz->k[1].q[1] = y.q[1];

for (i = 1; i <= 32; i++) {

// C Value
c.q[0] = 0;
c.q[1] = 0;
c.b[15] = i; // load round in lsb
kuz_l(&c);

z.q[0] = x.q[0] ^ c.q[0];
z.q[1] = x.q[1] ^ c.q[1];
for (j = 0; j < 16; j++)
z.b[j] = kuz_pi[z.b[j]];
kuz_l(&z);
z.q[0] ^= y.q[0];
z.q[1] ^= y.q[1];

y.q[0] = x.q[0];
y.q[1] = x.q[1];

x.q[0] = z.q[0];
x.q[1] = z.q[1];

if ((i & 7) == 0) {
kuz->k[(i >> 2)].q[0] = x.q[0];
kuz->k[(i >> 2)].q[1] = x.q[1];
kuz->k[(i >> 2) + 1].q[0] = y.q[0];
kuz->k[(i >> 2) + 1].q[1] = y.q[1];
}
}
}

// these two funcs are identical in the simple implementation

void kuz_set_decrypt_key(kuz_key_t *kuz, const uint8_t key[32])
{
int i;

kuz_set_encrypt_key(kuz, key);

for (i = 1; i < 10; i++)
kuz_l_inv(&kuz->k[i]);
}

// encrypt a block - 128 bit way

void kuz_encrypt_block(kuz_key_t *key, void *blk)
{
int i;
__m128i x;

x = *((__m128i *) blk);

for (i = 0; i < 9; i++) {
x ^= *((__m128i *) &key->k[i]);
x = kuz_pil_enc128[ 0][((uint8_t *) &x)[ 0]] ^
kuz_pil_enc128[ 1][((uint8_t *) &x)[ 1]] ^
kuz_pil_enc128[ 2][((uint8_t *) &x)[ 2]] ^
kuz_pil_enc128[ 3][((uint8_t *) &x)[ 3]] ^
kuz_pil_enc128[ 4][((uint8_t *) &x)[ 4]] ^
kuz_pil_enc128[ 5][((uint8_t *) &x)[ 5]] ^
kuz_pil_enc128[ 6][((uint8_t *) &x)[ 6]] ^
kuz_pil_enc128[ 7][((uint8_t *) &x)[ 7]] ^
kuz_pil_enc128[ 8][((uint8_t *) &x)[ 8]] ^
kuz_pil_enc128[ 9][((uint8_t *) &x)[ 9]] ^
kuz_pil_enc128[10][((uint8_t *) &x)[10]] ^
kuz_pil_enc128[11][((uint8_t *) &x)[11]] ^
kuz_pil_enc128[12][((uint8_t *) &x)[12]] ^
kuz_pil_enc128[13][((uint8_t *) &x)[13]] ^
kuz_pil_enc128[14][((uint8_t *) &x)[14]] ^
kuz_pil_enc128[15][((uint8_t *) &x)[15]];

}
x ^= *((__m128i *) &key->k[9]);

*((__m128i *) blk) = x;
}

// decrypt a block - 128 bit way

void kuz_decrypt_block(kuz_key_t *key, void *blk)
{
int i, j;
__m128i x;

x = *((__m128i *) blk);

x = kuz_l_dec128[ 0][((uint8_t *) &x)[ 0]] ^
kuz_l_dec128[ 1][((uint8_t *) &x)[ 1]] ^
kuz_l_dec128[ 2][((uint8_t *) &x)[ 2]] ^
kuz_l_dec128[ 3][((uint8_t *) &x)[ 3]] ^
kuz_l_dec128[ 4][((uint8_t *) &x)[ 4]] ^
kuz_l_dec128[ 5][((uint8_t *) &x)[ 5]] ^
kuz_l_dec128[ 6][((uint8_t *) &x)[ 6]] ^
kuz_l_dec128[ 7][((uint8_t *) &x)[ 7]] ^
kuz_l_dec128[ 8][((uint8_t *) &x)[ 8]] ^
kuz_l_dec128[ 9][((uint8_t *) &x)[ 9]] ^
kuz_l_dec128[10][((uint8_t *) &x)[10]] ^
kuz_l_dec128[11][((uint8_t *) &x)[11]] ^
kuz_l_dec128[12][((uint8_t *) &x)[12]] ^
kuz_l_dec128[13][((uint8_t *) &x)[13]] ^
kuz_l_dec128[14][((uint8_t *) &x)[14]] ^
kuz_l_dec128[15][((uint8_t *) &x)[15]];

for (i = 9; i > 1; i--) {
x ^= *((__m128i *) &key->k[i]);
x = kuz_pil_dec128[ 0][((uint8_t *) &x)[ 0]] ^
kuz_pil_dec128[ 1][((uint8_t *) &x)[ 1]] ^
kuz_pil_dec128[ 2][((uint8_t *) &x)[ 2]] ^
kuz_pil_dec128[ 3][((uint8_t *) &x)[ 3]] ^
kuz_pil_dec128[ 4][((uint8_t *) &x)[ 4]] ^
kuz_pil_dec128[ 5][((uint8_t *) &x)[ 5]] ^
kuz_pil_dec128[ 6][((uint8_t *) &x)[ 6]] ^
kuz_pil_dec128[ 7][((uint8_t *) &x)[ 7]] ^
kuz_pil_dec128[ 8][((uint8_t *) &x)[ 8]] ^
kuz_pil_dec128[ 9][((uint8_t *) &x)[ 9]] ^
kuz_pil_dec128[10][((uint8_t *) &x)[10]] ^
kuz_pil_dec128[11][((uint8_t *) &x)[11]] ^
kuz_pil_dec128[12][((uint8_t *) &x)[12]] ^
kuz_pil_dec128[13][((uint8_t *) &x)[13]] ^
kuz_pil_dec128[14][((uint8_t *) &x)[14]] ^
kuz_pil_dec128[15][((uint8_t *) &x)[15]];
}
x ^= *((__m128i *) &key->k[1]);
for (j = 0; j < 16; j++)
((uint8_t *) &x)[j] = kuz_pi_inv[((uint8_t *) &x)[j]];
x ^= *((__m128i *) &key->k[0]);

*((__m128i *) blk) = x;
}

// initalize lookup tables

void kuz_init()
{
int i, j;
w128_t x;

for (i = 0; i < 16; i++) {
for (j = 0; j < 256; j++) {
x.q[0] = 0;
x.q[1] = 0;
x.b[i] = kuz_pi[j];
kuz_l(&x);
kuz_pil_enc128[i][j] = *((__m128i *) &x);

x.q[0] = 0;
x.q[1] = 0;
x.b[i] = j; // kuz_pi[j];
kuz_l_inv(&x);
kuz_l_dec128[i][j] = *((__m128i *) &x);

x.q[0] = 0;
x.q[1] = 0;
x.b[i] = kuz_pi_inv[j];
kuz_l_inv(&x);
kuz_pil_dec128[i][j] = *((__m128i *) &x);
}
}
kuz_initialized = 1;
}

0 comments on commit ceec7c5

Please sign in to comment.