Skip to content
Permalink
Browse files

Fix freeing jit memory.

  • Loading branch information...
nioroso-x3 committed May 13, 2019
1 parent d10b738 commit 23d65e45f37a45902abe4d9f7ad6b957123962db
Showing with 253 additions and 21 deletions.
  1. +253 −21 src/crypto/slow-hash.c
@@ -35,6 +35,10 @@
#include <stdio.h>
#include <unistd.h>

#if defined(__PPC__) || defined(__PPC64__)
#include <endian.h>
#endif

#include "int-util.h"
#include "hash-ops.h"
#include "oaes_lib.h"
@@ -308,8 +312,7 @@ static inline int use_v4_jit(void)



#if defined(__PPC__) || defined(__PPC64__) && (__BYTE_ORDER == __BIG_ENDIAN)

#if (defined(__PPC__) || defined(__PPC64__)) && (__BYTE_ORDER == __BIG_ENDIAN)
#define VARIANT4_RANDOM_MATH(a, b, r, _b, _b1) \
do if (variant >= 4) \
{ \
@@ -330,9 +333,8 @@ static inline int use_v4_jit(void)
V4_REG_LOAD(r + 8, (uint64_t*)(_b1) + 1); \
\
if (jit){ \
uint8_t* tmp = (uint8_t*)hp_jitfunc; \
tmp += 4096 - sizeof(void*); \
(*(v4_random_math_JIT_func)tmp)(r); \
hp_jitfunc = (v4_random_math_JIT_func)((uint8_t*)hp_jitfunc + 4096 - sizeof(void*)) ; \
hp_jitfunc(r); \
} \
else \
v4_random_math(code, r); \
@@ -348,6 +350,44 @@ static inline int use_v4_jit(void)
} \
memcpy(a, t, sizeof(uint64_t) * 2); \
} while (0)
#elif (defined(__PPC__) || defined(__PPC64__)) && (__BYTE_ORDER == __LITTLE_ENDIAN)
#define VARIANT4_RANDOM_MATH(a, b, r, _b, _b1) \
do if (variant >= 4) \
{ \
uint64_t t[2]; \
memcpy(t, b, sizeof(uint64_t)); \
\
if (sizeof(v4_reg) == sizeof(uint32_t)) \
t[0] ^= SWAP64LE((r[0] + r[1]) | ((uint64_t)(r[2] + r[3]) << 32)); \
else \
t[0] ^= SWAP64LE((r[0] + r[1]) ^ (r[2] + r[3])); \
\
memcpy(b, t, sizeof(uint64_t)); \
\
V4_REG_LOAD(r + 4, a); \
V4_REG_LOAD(r + 5, (uint64_t*)(a) + 1); \
V4_REG_LOAD(r + 6, _b); \
V4_REG_LOAD(r + 7, _b1); \
V4_REG_LOAD(r + 8, (uint64_t*)(_b1) + 1); \
\
if (jit){ \
hp_jitfunc(r); \
} \
else \
v4_random_math(code, r); \
\
memcpy(t, a, sizeof(uint64_t) * 2); \
\
if (sizeof(v4_reg) == sizeof(uint32_t)) { \
t[0] ^= SWAP64LE(r[2] | ((uint64_t)(r[3]) << 32)); \
t[1] ^= SWAP64LE(r[0] | ((uint64_t)(r[1]) << 32)); \
} else { \
t[0] ^= SWAP64LE(r[2] ^ r[3]); \
t[1] ^= SWAP64LE(r[0] ^ r[1]); \
} \
memcpy(a, t, sizeof(uint64_t) * 2); \
} while (0)

#else
#define VARIANT4_RANDOM_MATH(a, b, r, _b, _b1) \
do if (variant >= 4) \
@@ -815,24 +855,23 @@ void slow_hash_allocate_state(void)
hp_state = (uint8_t *) malloc(MEMORY);
}


#if defined(_MSC_VER) || defined(__MINGW32__)
hp_jitfunc_memory = (uint8_t *) VirtualAlloc(hp_jitfunc_memory, 4096 + 4095,
MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
#else
#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || \
#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || \
defined(__DragonFly__) || defined(__NetBSD__)
#ifdef __NetBSD__
#define RESERVED_FLAGS PROT_MPROTECT(PROT_EXEC)
#else
#define RESERVED_FLAGS 0
#endif
#ifdef __NetBSD__
#define RESERVED_FLAGS PROT_MPROTECT(PROT_EXEC)
#else
#define RESERVED_FLAGS 0
#endif
hp_jitfunc_memory = mmap(0, 4096 + 4096, PROT_READ | PROT_WRITE | RESERVED_FLAGS,
MAP_PRIVATE | MAP_ANON, -1, 0);
#else
#else
hp_jitfunc_memory = mmap(0, 4096 + 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#endif
#endif
if(hp_jitfunc_memory == MAP_FAILED)
hp_jitfunc_memory = NULL;
#endif
@@ -937,7 +976,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int

// this isn't supposed to happen, but guard against it for now.
if(hp_state == NULL)
slow_hash_allocate_state();
vslow_hash_allocate_state();

// locals to avoid constant TLS dereferencing
uint8_t *local_hp_state = hp_state;
@@ -954,7 +993,6 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
VARIANT1_INIT64();
VARIANT2_INIT64();
VARIANT4_RANDOM_MATH_INIT();

/* CryptoNight Step 2: Iteratively encrypt the results from Keccak to fill
* the 2MB large random access buffer.
*/
@@ -1623,20 +1661,214 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
}
#endif /* !aarch64 || !crypto */

#elif defined(__PPC__) || (__PPC64__)
#include <sys/mman.h>
//#include <SSE2ALTIVEC.h>
// PPC64 implementation just a copy of the portable one for now, plus the JIT.
v4_random_math_JIT_func hp_jitfunc = NULL;
uint8_t *hp_jitfunc_memory = NULL;

void slow_hash_allocate_state(void)
{
if(hp_jitfunc_memory != NULL) return;

hp_jitfunc_memory = mmap(NULL, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANON, -1, 0);
hp_jitfunc = (v4_random_math_JIT_func)hp_jitfunc_memory;
//printf("Allocated JIT mem %p\n",hp_jitfunc);
return;
}

void slow_hash_free_state(void)
{
if(hp_jitfunc_memory != NULL) munmap(hp_jitfunc_memory, 4096);
hp_jitfunc = NULL;
hp_jitfunc_memory = NULL;
return;
}

static void (*const extra_hashes[4])(const void *, size_t, char *) = {
hash_extra_blake, hash_extra_groestl, hash_extra_jh, hash_extra_skein
};

static size_t e2i(const uint8_t* a, size_t count) { return (SWAP64LE(*((uint64_t*)a)) / AES_BLOCK_SIZE) & (count - 1); }

static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) {
uint64_t a0, b0;
uint64_t hi, lo;

a0 = SWAP64LE(((uint64_t*)a)[0]);
b0 = SWAP64LE(((uint64_t*)b)[0]);
lo = mul128(a0, b0, &hi);
((uint64_t*)res)[0] = SWAP64LE(hi);
((uint64_t*)res)[1] = SWAP64LE(lo);
}

static void sum_half_blocks(uint8_t* a, const uint8_t* b) {
uint64_t a0, a1, b0, b1;

a0 = SWAP64LE(((uint64_t*)a)[0]);
a1 = SWAP64LE(((uint64_t*)a)[1]);
b0 = SWAP64LE(((uint64_t*)b)[0]);
b1 = SWAP64LE(((uint64_t*)b)[1]);
a0 += b0;
a1 += b1;
((uint64_t*)a)[0] = SWAP64LE(a0);
((uint64_t*)a)[1] = SWAP64LE(a1);
}
#define U64(x) ((uint64_t *) (x))

static void copy_block(uint8_t* dst, const uint8_t* src) {
memcpy(dst, src, AES_BLOCK_SIZE);
}

static void swap_blocks(uint8_t *a, uint8_t *b){
uint64_t t[2];
U64(t)[0] = U64(a)[0];
U64(t)[1] = U64(a)[1];
U64(a)[0] = U64(b)[0];
U64(a)[1] = U64(b)[1];
U64(b)[0] = U64(t)[0];
U64(b)[1] = U64(t)[1];
}

static void xor_blocks(uint8_t* a, const uint8_t* b) {
size_t i;
for (i = 0; i < AES_BLOCK_SIZE; i++) {
a[i] ^= b[i];
}
}

static void xor64(uint8_t* left, const uint8_t* right)
{
size_t i;
for (i = 0; i < 8; ++i)
{
left[i] ^= right[i];
}
}

#pragma pack(push, 1)
union cn_slow_hash_state {
union hash_state hs;
struct {
uint8_t k[64];
uint8_t init[INIT_SIZE_BYTE];
};
};
#pragma pack(pop)

void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int prehashed, uint64_t height) {
#ifndef FORCE_USE_HEAP
uint8_t long_state[MEMORY];
#else
// Portable implementation as a fallback
uint8_t *long_state = (uint8_t *)malloc(MEMORY);
#endif

#define hp_jitfunc ((v4_random_math_JIT_func)NULL)
union cn_slow_hash_state state;
uint8_t text[INIT_SIZE_BYTE];
uint8_t a[AES_BLOCK_SIZE];
uint8_t a1[AES_BLOCK_SIZE];
uint8_t b[AES_BLOCK_SIZE * 2];
uint8_t c1[AES_BLOCK_SIZE];
uint8_t c2[AES_BLOCK_SIZE];
uint8_t d[AES_BLOCK_SIZE];
size_t i, j;
uint8_t aes_key[AES_KEY_SIZE];
oaes_ctx *aes_ctx;

if (prehashed) {
memcpy(&state.hs, data, length);
} else {
hash_process(&state.hs, data, length);
}
memcpy(text, state.init, INIT_SIZE_BYTE);
memcpy(aes_key, state.hs.b, AES_KEY_SIZE);
aes_ctx = (oaes_ctx *) oaes_alloc();

slow_hash_allocate_state();

VARIANT1_PORTABLE_INIT();
VARIANT2_PORTABLE_INIT();
VARIANT4_RANDOM_MATH_INIT();

oaes_key_import_data(aes_ctx, aes_key, AES_KEY_SIZE);
for (i = 0; i < MEMORY / INIT_SIZE_BYTE; i++) {
for (j = 0; j < INIT_SIZE_BLK; j++) {
aesb_pseudo_round(&text[AES_BLOCK_SIZE * j], &text[AES_BLOCK_SIZE * j], aes_ctx->key->exp_data);
}
memcpy(&long_state[i * INIT_SIZE_BYTE], text, INIT_SIZE_BYTE);
}

for (i = 0; i < AES_BLOCK_SIZE; i++) {
a[i] = state.k[ i] ^ state.k[AES_BLOCK_SIZE * 2 + i];
b[i] = state.k[AES_BLOCK_SIZE + i] ^ state.k[AES_BLOCK_SIZE * 3 + i];
}

for (i = 0; i < ITER / 2; i++) {
/* Dependency chain: address -> read value ------+
* written value <-+ hard function (AES or MUL) <+
* next address <-+
*/
/* Iteration 1 */
j = e2i(a, MEMORY / AES_BLOCK_SIZE) * AES_BLOCK_SIZE;
copy_block(c1, &long_state[j]);
aesb_single_round(c1, c1, a);
VARIANT2_PORTABLE_SHUFFLE_ADD(c1, a, long_state, j);
copy_block(&long_state[j], c1);
xor_blocks(&long_state[j], b);
assert(j == e2i(a, MEMORY / AES_BLOCK_SIZE) * AES_BLOCK_SIZE);
VARIANT1_1(&long_state[j]);
/* Iteration 2 */
j = e2i(c1, MEMORY / AES_BLOCK_SIZE) * AES_BLOCK_SIZE;
copy_block(c2, &long_state[j]);
copy_block(a1, a);
VARIANT2_PORTABLE_INTEGER_MATH(c2, c1);
VARIANT4_RANDOM_MATH(a1, c2, r, b, b + AES_BLOCK_SIZE);
mul(c1, c2, d);
VARIANT2_2_PORTABLE();
VARIANT2_PORTABLE_SHUFFLE_ADD(c1, a, long_state, j);
sum_half_blocks(a1, d);
swap_blocks(a1, c2);
xor_blocks(a1, c2);
VARIANT1_2(c2 + 8);
copy_block(&long_state[j], c2);
if (variant >= 2) {
copy_block(b + AES_BLOCK_SIZE, b);
}
copy_block(b, c1);
copy_block(a, a1);
}

memcpy(text, state.init, INIT_SIZE_BYTE);
oaes_key_import_data(aes_ctx, &state.hs.b[32], AES_KEY_SIZE);
for (i = 0; i < MEMORY / INIT_SIZE_BYTE; i++) {
for (j = 0; j < INIT_SIZE_BLK; j++) {
xor_blocks(&text[j * AES_BLOCK_SIZE], &long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]);
aesb_pseudo_round(&text[AES_BLOCK_SIZE * j], &text[AES_BLOCK_SIZE * j], aes_ctx->key->exp_data);
}
}
memcpy(state.init, text, INIT_SIZE_BYTE);
hash_permutation(&state.hs);
/*memcpy(hash, &state, 32);*/
extra_hashes[state.hs.b[0] & 3](&state, 200, hash);
oaes_free((OAES_CTX **) &aes_ctx);

#ifdef FORCE_USE_HEAP
free(long_state);
#endif
}
#else
// Portable implementation as a fallback

void slow_hash_allocate_state(void)
{
// Do nothing, this is just to maintain compatibility with the upgraded slow-hash.c

return;
}

void slow_hash_free_state(void)
{
// As above
return;
}

@@ -1738,6 +1970,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
memcpy(text, state.init, INIT_SIZE_BYTE);
memcpy(aes_key, state.hs.b, AES_KEY_SIZE);
aes_ctx = (oaes_ctx *) oaes_alloc();


VARIANT1_PORTABLE_INIT();
VARIANT2_PORTABLE_INIT();
@@ -1809,5 +2042,4 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
free(long_state);
#endif
}

#endif

0 comments on commit 23d65e4

Please sign in to comment.
You can’t perform that action at this time.