Skip to content
Browse files

OpenCL kernels RAR, Office, NTLMv2, WPA-PSK: Drop all pragma unrolls.…

… The

compiler unrolls anyway where sensible.
  • Loading branch information...
1 parent ae49c17 commit d1f1bbe3ddba573d7e607fa0ce2ba651fa2db329 @magnumripper committed Nov 9, 2012
View
20 src/opencl/ntlmv2_kernel.cl
@@ -258,7 +258,6 @@ __kernel void ntlmv2_nthash(const __global uint *unicode_pw, __global MAYBE_VECT
/* Input buffer is prepared with 0x80, zero-padding and length << 3 */
md4_init(output);
-#pragma unroll
for (i = 0; i < 16; i++) {
#ifdef SCALAR
block[i] = *pw++;
@@ -271,7 +270,6 @@ __kernel void ntlmv2_nthash(const __global uint *unicode_pw, __global MAYBE_VECT
}
md4_block(block, output);
-#pragma unroll
for (i = 0; i < 4; i++)
nthash[gid * 4 + i] = output[i];
}
@@ -288,55 +286,44 @@ __kernel void ntlmv2_final(const __global MAYBE_VECTOR_UINT *nthash, MAYBE_CONST
/* 1st HMAC */
md5_init(output);
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = 0x36363636 ^ nthash[gid * 4 + i];
-#pragma unroll
for (i = 4; i < 16; i++)
block[i] = 0x36363636;
md5_block(block, output); /* md5_update(ipad, 64) */
/* Salt buffer is prepared with 0x80, zero-padding and length,
* ie. (saltlen + 64) << 3 in get_salt() */
-#pragma unroll
for (i = 0; i < 16; i++)
block[i] = *cp++;
md5_block(block, output); /* md5_update(salt, saltlen), md5_final() */
-#pragma unroll
for (i = 0; i < 4; i++)
hash[i] = output[i];
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = 0x5c5c5c5c ^ nthash[gid * 4 + i];
md5_init(output);
-#pragma unroll
for (i = 4; i < 16; i++)
block[i] = 0x5c5c5c5c;
md5_block(block, output); /* md5_update(opad, 64) */
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = hash[i];
block[4] = 0x80;
-#pragma unroll
for (i = 5; i < 14; i++)
block[i] = 0;
block[14] = (64 + 16) << 3;
block[15] = 0;
md5_block(block, output); /* md5_update(hash, 16), md5_final() */
/* 2nd HMAC */
-#pragma unroll
for (i = 0; i < 4; i++)
hash[i] = output[i];
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = 0x36363636 ^ output[i];
md5_init(output);
-#pragma unroll
for (i = 4; i < 16; i++)
block[i] = 0x36363636;
md5_block(block, output); /* md5_update(ipad, 64) */
@@ -348,37 +335,30 @@ __kernel void ntlmv2_final(const __global MAYBE_VECTOR_UINT *nthash, MAYBE_CONST
/* At least this will not diverge */
while (challenge_size--) {
-#pragma unroll
for (i = 0; i < 16; i++)
block[i] = *cp++;
md5_block(block, output); /* md5_update(challenge, len), md5_final() */
}
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = 0x5c5c5c5c ^ hash[i];
-#pragma unroll
for (i = 0; i < 4; i++)
hash[i] = output[i];
md5_init(output);
-#pragma unroll
for (i = 4; i < 16; i++)
block[i] = 0x5c5c5c5c;
md5_block(block, output); /* md5_update(opad, 64) */
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = hash[i];
block[4] = 0x80;
-#pragma unroll
for (i = 5; i < 14; i++)
block[i] = 0;
block[14] = (64 + 16) << 3;
block[15] = 0;
md5_block(block, output); /* md5_update(hash, 16), md5_final() */
-#pragma unroll
for (i = 0; i < 4; i++) {
#ifdef SCALAR
result[gid * 4 + i] = output[i];
View
17 src/opencl/office2007_kernel.cl
@@ -385,10 +385,8 @@ __kernel void GenerateSHA1pwhash(
/* Initial hash of salt + password */
/* The ending 0x80 is already in the buffer */
sha1_init_s(output);
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = SWAP32(salt[i]);
-#pragma unroll
for (i = 4; i < 16; i++)
block[i] = SWAP32(unicode_pw[gid * (UNICODE_LENGTH>>2) + i - 4]);
if (pw_len[gid] < 40) {
@@ -398,15 +396,13 @@ __kernel void GenerateSHA1pwhash(
sha1_block_s(block, output);
if (pw_len[gid] >= 40) {
-#pragma unroll
for (i = 0; i < 14; i++)
block[i] = SWAP32(unicode_pw[gid * (UNICODE_LENGTH>>2) + i + 12]);
block[14] = 0;
block[15] = (pw_len[gid] + 16) << 3;
sha1_block_s(block, output);
}
-#pragma unroll
for (i = 0; i < 5; i++)
#ifdef SCALAR
pwhash[gid * 6 + i] = output[i];
@@ -429,7 +425,6 @@ __kernel void HashLoop(__global MAYBE_VECTOR_UINT *pwhash)
uint base = pwhash[gid * 6 + 5].s0;
#endif
-#pragma unroll
for (i = 0; i < 5; i++)
output[i] = pwhash[gid * 6 + i];
@@ -438,18 +433,15 @@ __kernel void HashLoop(__global MAYBE_VECTOR_UINT *pwhash)
for (j = 0; j < HASH_LOOPS; j++)
{
block[0] = SWAP32(base + j);
-#pragma unroll
for (i = 1; i < 6; i++)
block[i] = output[i - 1];
sha1_init(output);
block[6] = 0x80000000;
-#pragma unroll
for (i = 7; i < 15; i++)
block[i] = 0;
block[15] = 24 << 3;
sha1_block(block, output);
}
-#pragma unroll
for (i = 0; i < 5; i++)
pwhash[gid * 6 + i] = output[i];
pwhash[gid * 6 + 5] += HASH_LOOPS;
@@ -464,58 +456,49 @@ __kernel void Generate2007key(
MAYBE_VECTOR_UINT output[5];
uint gid = get_global_id(0);
-#pragma unroll
for (i = 0; i < 5; i++)
output[i] = pwhash[gid * 6 + i];
/* Remainder of sha1(serial.last hash)
* We avoid byte-swapping back and forth */
for (j = 50000 - (50000 % HASH_LOOPS); j < 50000; j++)
{
block[0] = SWAP32(j);
-#pragma unroll
for (i = 1; i < 6; i++)
block[i] = output[i - 1];
sha1_init(output);
block[6] = 0x80000000;
-#pragma unroll
for (i = 7; i < 15; i++)
block[i] = 0;
block[15] = 24 << 3;
sha1_block(block, output);
}
/* Final hash */
-#pragma unroll
for (i = 0; i < 5; i++)
block[i] = output[i];
sha1_init(output);
block[5] = 0;
block[6] = 0x80000000;
-#pragma unroll
for (i = 7; i < 15; i++)
block[i] = 0;
block[15] = 24 << 3;
sha1_block(block, output);
/* DeriveKey */
-#pragma unroll
for (i = 0; i < 5; i++)
block[i] = output[i] ^ 0x36363636;
sha1_init(output);
-#pragma unroll
for (i = 5; i < 16; i++)
block[i] = 0x36363636;
sha1_block(block, output);
/* sha1_final (last block was 64 bytes) */
block[0] = 0x80000000;
-#pragma unroll
for (i = 1; i < 15; i++)
block[i] = 0;
block[15] = 64 << 3;
sha1_block(block, output);
/* Endian-swap to output (we only use 16 bytes) */
-#pragma unroll
for (i = 0; i < 4; i++) {
#ifdef SCALAR
key[gid * 4 + i] = SWAP32(output[i]);
View
17 src/opencl/office2010_kernel.cl
@@ -387,10 +387,8 @@ __kernel void GenerateSHA1pwhash(
/* Initial hash of salt + password */
/* The ending 0x80 is already in the buffer */
sha1_init_s(output);
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = SWAP32(salt[i]);
-#pragma unroll
for (i = 4; i < 16; i++)
block[i] = SWAP32(unicode_pw[gid * (UNICODE_LENGTH>>2) + i - 4]);
if (pw_len[gid] < 40) {
@@ -400,15 +398,13 @@ __kernel void GenerateSHA1pwhash(
sha1_block_s(block, output);
if (pw_len[gid] >= 40) {
-#pragma unroll
for (i = 0; i < 14; i++)
block[i] = SWAP32(unicode_pw[gid * (UNICODE_LENGTH>>2) + i + 12]);
block[14] = 0;
block[15] = (pw_len[gid] + 16) << 3;
sha1_block_s(block, output);
}
-#pragma unroll
for (i = 0; i < 5; i++)
#ifdef SCALAR
pwhash[gid * 6 + i] = output[i];
@@ -431,7 +427,6 @@ __kernel void HashLoop(__global MAYBE_VECTOR_UINT *pwhash)
uint base = pwhash[gid * 6 + 5].s0;
#endif
-#pragma unroll
for (i = 0; i < 5; i++)
output[i] = pwhash[gid * 6 + i];
@@ -440,18 +435,15 @@ __kernel void HashLoop(__global MAYBE_VECTOR_UINT *pwhash)
for (j = 0; j < HASH_LOOPS; j++)
{
block[0] = SWAP32(base + j);
-#pragma unroll
for (i = 1; i < 6; i++)
block[i] = output[i - 1];
sha1_init(output);
block[6] = 0x80000000;
-#pragma unroll
for (i = 7; i < 15; i++)
block[i] = 0;
block[15] = 24 << 3;
sha1_block(block, output);
}
-#pragma unroll
for (i = 0; i < 5; i++)
pwhash[gid * 6 + i] = output[i];
pwhash[gid * 6 + 5] += HASH_LOOPS;
@@ -473,28 +465,24 @@ __kernel void Generate2010key(
#endif
uint iterations = *spincount % HASH_LOOPS;
-#pragma unroll
for (i = 0; i < 5; i++)
output[i] = pwhash[gid * 6 + i];
/* Remainder of sha1(serial.last hash)
* We avoid byte-swapping back and forth */
for (j = 0; j < iterations; j++)
{
block[0] = SWAP32(base + j);
-#pragma unroll
for (i = 1; i < 6; i++)
block[i] = output[i - 1];
sha1_init(output);
block[6] = 0x80000000;
-#pragma unroll
for (i = 7; i < 15; i++)
block[i] = 0;
block[15] = 24 << 3;
sha1_block(block, output);
}
/* Our sha1 destroys input so we store it in temp[] */
-#pragma unroll
for (i = 0; i < 5; i++)
block[i] = temp[i] = output[i];
@@ -503,14 +491,12 @@ __kernel void Generate2010key(
block[5] = InputBlockKey[0];
block[6] = InputBlockKey[1];
block[7] = 0x80000000;
-#pragma unroll
for (i = 8; i < 15; i++)
block[i] = 0;
block[15] = 28 << 3;
sha1_block(block, output);
/* Endian-swap to output (we only use 16 bytes) */
-#pragma unroll
for (i = 0; i < 4; i++) {
#ifdef SCALAR
key[gid * 32/4 + i] = SWAP32(output[i]);
@@ -523,20 +509,17 @@ __kernel void Generate2010key(
}
/* Final hash 2 */
sha1_init(output);
-#pragma unroll
for (i = 0; i < 5; i++)
block[i] = temp[i];
block[5] = ValueBlockKey[0];
block[6] = ValueBlockKey[1];
block[7] = 0x80000000;
-#pragma unroll
for (i = 8; i < 15; i++)
block[i] = 0;
block[15] = 28 << 3;
sha1_block(block, output);
/* Endian-swap to output (we only use 16 bytes) */
-#pragma unroll
for (i = 0; i < 4; i++) {
#ifdef SCALAR
key[gid * 32/4 + 16/4 + i] = SWAP32(output[i]);
View
20 src/opencl/office2013_kernel.cl
@@ -103,7 +103,6 @@ inline void sha512_single_s(ulong *w, ulong *output) {
g = 0x1f83d9abfb41bd6bUL;
h = 0x5be0cd19137e2179UL;
-#pragma unroll
for (int i = 0; i < 16; i++) {
t1 = k[i] + w[i] + h + Sigma1(e) + Ch(e, f, g);
t2 = Maj(a, b, c) + Sigma0(a);
@@ -118,7 +117,6 @@ inline void sha512_single_s(ulong *w, ulong *output) {
a = t1 + t2;
}
-#pragma unroll
for (int i = 16; i < 80; i++) {
w[i & 15] = sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i - 16) & 15] + w[(i - 7) & 15];
t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g);
@@ -159,7 +157,6 @@ inline void sha512_single(MAYBE_VECTOR_ULONG *w, MAYBE_VECTOR_ULONG *output) {
g = 0x1f83d9abfb41bd6bUL;
h = 0x5be0cd19137e2179UL;
-#pragma unroll
for (int i = 0; i < 16; i++) {
t1 = k[i] + w[i] + h + Sigma1(e) + Ch(e, f, g);
t2 = Maj(a, b, c) + Sigma0(a);
@@ -174,7 +171,6 @@ inline void sha512_single(MAYBE_VECTOR_ULONG *w, MAYBE_VECTOR_ULONG *output) {
a = t1 + t2;
}
-#pragma unroll
for (int i = 16; i < 80; i++) {
w[i & 15] = sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i - 16) & 15] + w[(i - 7) & 15];
t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g);
@@ -213,17 +209,14 @@ __kernel void GenerateSHA512pwhash(
/* Initial hash of salt + password */
/* The ending 0x80 is already in the buffer */
-#pragma unroll
for (i = 0; i < 2; i++)
block[i] = SWAP64(salt[i]);
-#pragma unroll
for (i = 2; i < 14; i++)
block[i] = SWAP64(unicode_pw[gid * (UNICODE_LENGTH >> 3) + i - 2]);
block[14] = 0;
block[15] = (ulong)(pw_len[gid] + 16) << 3;
sha512_single_s(block, output);
-#pragma unroll
for (i = 0; i < 8; i++)
#ifdef SCALAR
pwhash[gid * 9 + i] = output[i];
@@ -246,7 +239,6 @@ __kernel void HashLoop(__global MAYBE_VECTOR_ULONG *pwhash)
uint base = pwhash[gid * 9 + 8].s0;
#endif
-#pragma unroll
for (i = 0; i < 8; i++)
output[i] = pwhash[gid * 9 + i];
@@ -255,17 +247,14 @@ __kernel void HashLoop(__global MAYBE_VECTOR_ULONG *pwhash)
for (j = 0; j < HASH_LOOPS; j++)
{
block[0] = ((ulong)SWAP32(base + j) << 32) | (output[0] >> 32);
-#pragma unroll
for (i = 1; i < 8; i++)
block[i] = (output[i - 1] << 32) | (output[i] >> 32);
block[8] = (output[7] << 32) | 0x80000000UL;
-#pragma unroll
for (i = 9; i < 15; i++)
block[i] = 0;
block[15] = 68 << 3;
sha512_single(block, output);
}
-#pragma unroll
for (i = 0; i < 8; i++)
pwhash[gid * 9 + i] = output[i];
pwhash[gid * 9 + 8] += HASH_LOOPS;
@@ -287,46 +276,39 @@ __kernel void Generate2013key(
#endif
uint iterations = *spincount % HASH_LOOPS;
-#pragma unroll
for (i = 0; i < 8; i++)
output[i] = pwhash[gid * 9 + i];
/* Remainder of iterations */
for (j = 0; j < iterations; j++)
{
block[0] = ((ulong)SWAP32(base + j) << 32) | (output[0] >> 32);
-#pragma unroll
for (i = 1; i < 8; i++)
block[i] = (output[i - 1] << 32) | (output[i] >> 32);
block[8] = (output[7] << 32) | 0x80000000UL;
-#pragma unroll
for (i = 9; i < 15; i++)
block[i] = 0;
block[15] = 68 << 3;
sha512_single(block, output);
}
/* Our sha512 destroys input so we store a needed portion in temp[] */
-#pragma unroll
for (i = 0; i < 8; i++)
block[i] = temp[i] = output[i];
/* Final hash 1 */
block[8] = InputBlockKey;
block[9] = 0x8000000000000000UL;
-#pragma unroll
for (i = 10; i < 15; i++)
block[i] = 0;
block[15] = 72 << 3;
sha512_single(block, output);
/* Prepare for final hash 2 */
-#pragma unroll
for (i = 0; i < 8; i++)
block[i] = temp[i];
/* Endian-swap to hash 1 output */
-#pragma unroll
for (i = 0; i < 8; i++) {
#ifdef SCALAR
key[gid * 128/8 + i] = SWAP64(output[i]);
@@ -341,14 +323,12 @@ __kernel void Generate2013key(
/* Final hash 2 */
block[8] = ValueBlockKey;
block[9] = 0x8000000000000000UL;
-#pragma unroll
for (i = 10; i < 15; i++)
block[i] = 0;
block[15] = 72 << 3;
sha512_single(block, output);
/* Endian-swap to hash 2 output */
-#pragma unroll
for (i = 0; i < 8; i++) {
#ifdef SCALAR
key[gid * 128/8 + 64/8 + i] = SWAP64(output[i]);
View
8 src/opencl/rar_kernel.cl
@@ -273,7 +273,6 @@ __kernel void RarInit(
/* Copy to 1x buffer */
for (i = 0; i < (pwlen + 3) >> 2; i++)
RawPsw[i] = SWAP32(unicode_pw[gid * UNICODE_LENGTH / 4 + i]);
-#pragma unroll
for (i = 0; i < 8; i++)
PUTCHAR_BE_G(RawPsw, pwlen + i, ((__constant uchar*)salt)[i]);
round[gid] = 0;
@@ -293,10 +292,8 @@ __kernel void RarGetIV(
uint round = round_p[gid];
uint i;
-#pragma unroll
for (i = 0; i < 5; i++)
output[i] = OutputBuf[gid * 5 + i];
-#pragma unroll
for (i = 0; i < (UNICODE_LENGTH + 8) / 4; i++)
block[i] = RawBuf[gid * (UNICODE_LENGTH + 8) / 4 + i];
@@ -341,11 +338,9 @@ __kernel void RarHashLoop(
uint round = round_p[gid];
uint i;
-#pragma unroll
for (i = 0; i < (UNICODE_LENGTH + 8) / 4; i++)
RawPsw[i] = RawBuf[gid * (UNICODE_LENGTH + 8) / 4 + i];
-#pragma unroll
for (i = 0; i < 5; i++)
output[i] = OutputBuf[gid * 5 + i];
@@ -409,7 +404,6 @@ __kernel void RarHashLoop(
}
round_p[gid] = round;
-#pragma unroll
for (i = 0; i < 5; i++)
OutputBuf[gid * 5 + i] = output[i];
}
@@ -423,14 +417,12 @@ __kernel void RarFinal(
uint *block[16], output[5];
uint i;
-#pragma unroll
for (i = 0; i < 5; i++)
output[i] = OutputBuf[gid * 5 + i];
sha1_final((uint*)block, (uint*)output, (pw_len[gid] + 8 + 3) * ROUNDS);
// Still no endian-swap
-#pragma unroll
for (i = 0; i < 4; i++)
aes_key[gid * 4 + i] = output[i];
}

0 comments on commit d1f1bbe

Please sign in to comment.
Something went wrong with that request. Please try again.