Permalink
Browse files

OpenCL kernels RAR, Office, NTLMv2, WPA-PSK: Drop all pragma unrolls.…

… The

compiler unrolls anyway where sensible.
  • Loading branch information...
magnumripper committed Nov 9, 2012
1 parent ae49c17 commit d1f1bbe3ddba573d7e607fa0ce2ba651fa2db329
@@ -258,7 +258,6 @@ __kernel void ntlmv2_nthash(const __global uint *unicode_pw, __global MAYBE_VECT
/* Input buffer is prepared with 0x80, zero-padding and length << 3 */
md4_init(output);
-#pragma unroll
for (i = 0; i < 16; i++) {
#ifdef SCALAR
block[i] = *pw++;
@@ -271,7 +270,6 @@ __kernel void ntlmv2_nthash(const __global uint *unicode_pw, __global MAYBE_VECT
}
md4_block(block, output);
-#pragma unroll
for (i = 0; i < 4; i++)
nthash[gid * 4 + i] = output[i];
}
@@ -288,55 +286,44 @@ __kernel void ntlmv2_final(const __global MAYBE_VECTOR_UINT *nthash, MAYBE_CONST
/* 1st HMAC */
md5_init(output);
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = 0x36363636 ^ nthash[gid * 4 + i];
-#pragma unroll
for (i = 4; i < 16; i++)
block[i] = 0x36363636;
md5_block(block, output); /* md5_update(ipad, 64) */
/* Salt buffer is prepared with 0x80, zero-padding and length,
* ie. (saltlen + 64) << 3 in get_salt() */
-#pragma unroll
for (i = 0; i < 16; i++)
block[i] = *cp++;
md5_block(block, output); /* md5_update(salt, saltlen), md5_final() */
-#pragma unroll
for (i = 0; i < 4; i++)
hash[i] = output[i];
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = 0x5c5c5c5c ^ nthash[gid * 4 + i];
md5_init(output);
-#pragma unroll
for (i = 4; i < 16; i++)
block[i] = 0x5c5c5c5c;
md5_block(block, output); /* md5_update(opad, 64) */
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = hash[i];
block[4] = 0x80;
-#pragma unroll
for (i = 5; i < 14; i++)
block[i] = 0;
block[14] = (64 + 16) << 3;
block[15] = 0;
md5_block(block, output); /* md5_update(hash, 16), md5_final() */
/* 2nd HMAC */
-#pragma unroll
for (i = 0; i < 4; i++)
hash[i] = output[i];
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = 0x36363636 ^ output[i];
md5_init(output);
-#pragma unroll
for (i = 4; i < 16; i++)
block[i] = 0x36363636;
md5_block(block, output); /* md5_update(ipad, 64) */
@@ -348,37 +335,30 @@ __kernel void ntlmv2_final(const __global MAYBE_VECTOR_UINT *nthash, MAYBE_CONST
/* At least this will not diverge */
while (challenge_size--) {
-#pragma unroll
for (i = 0; i < 16; i++)
block[i] = *cp++;
md5_block(block, output); /* md5_update(challenge, len), md5_final() */
}
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = 0x5c5c5c5c ^ hash[i];
-#pragma unroll
for (i = 0; i < 4; i++)
hash[i] = output[i];
md5_init(output);
-#pragma unroll
for (i = 4; i < 16; i++)
block[i] = 0x5c5c5c5c;
md5_block(block, output); /* md5_update(opad, 64) */
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = hash[i];
block[4] = 0x80;
-#pragma unroll
for (i = 5; i < 14; i++)
block[i] = 0;
block[14] = (64 + 16) << 3;
block[15] = 0;
md5_block(block, output); /* md5_update(hash, 16), md5_final() */
-#pragma unroll
for (i = 0; i < 4; i++) {
#ifdef SCALAR
result[gid * 4 + i] = output[i];
@@ -385,10 +385,8 @@ __kernel void GenerateSHA1pwhash(
/* Initial hash of salt + password */
/* The ending 0x80 is already in the buffer */
sha1_init_s(output);
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = SWAP32(salt[i]);
-#pragma unroll
for (i = 4; i < 16; i++)
block[i] = SWAP32(unicode_pw[gid * (UNICODE_LENGTH>>2) + i - 4]);
if (pw_len[gid] < 40) {
@@ -398,15 +396,13 @@ __kernel void GenerateSHA1pwhash(
sha1_block_s(block, output);
if (pw_len[gid] >= 40) {
-#pragma unroll
for (i = 0; i < 14; i++)
block[i] = SWAP32(unicode_pw[gid * (UNICODE_LENGTH>>2) + i + 12]);
block[14] = 0;
block[15] = (pw_len[gid] + 16) << 3;
sha1_block_s(block, output);
}
-#pragma unroll
for (i = 0; i < 5; i++)
#ifdef SCALAR
pwhash[gid * 6 + i] = output[i];
@@ -429,7 +425,6 @@ __kernel void HashLoop(__global MAYBE_VECTOR_UINT *pwhash)
uint base = pwhash[gid * 6 + 5].s0;
#endif
-#pragma unroll
for (i = 0; i < 5; i++)
output[i] = pwhash[gid * 6 + i];
@@ -438,18 +433,15 @@ __kernel void HashLoop(__global MAYBE_VECTOR_UINT *pwhash)
for (j = 0; j < HASH_LOOPS; j++)
{
block[0] = SWAP32(base + j);
-#pragma unroll
for (i = 1; i < 6; i++)
block[i] = output[i - 1];
sha1_init(output);
block[6] = 0x80000000;
-#pragma unroll
for (i = 7; i < 15; i++)
block[i] = 0;
block[15] = 24 << 3;
sha1_block(block, output);
}
-#pragma unroll
for (i = 0; i < 5; i++)
pwhash[gid * 6 + i] = output[i];
pwhash[gid * 6 + 5] += HASH_LOOPS;
@@ -464,58 +456,49 @@ __kernel void Generate2007key(
MAYBE_VECTOR_UINT output[5];
uint gid = get_global_id(0);
-#pragma unroll
for (i = 0; i < 5; i++)
output[i] = pwhash[gid * 6 + i];
/* Remainder of sha1(serial.last hash)
* We avoid byte-swapping back and forth */
for (j = 50000 - (50000 % HASH_LOOPS); j < 50000; j++)
{
block[0] = SWAP32(j);
-#pragma unroll
for (i = 1; i < 6; i++)
block[i] = output[i - 1];
sha1_init(output);
block[6] = 0x80000000;
-#pragma unroll
for (i = 7; i < 15; i++)
block[i] = 0;
block[15] = 24 << 3;
sha1_block(block, output);
}
/* Final hash */
-#pragma unroll
for (i = 0; i < 5; i++)
block[i] = output[i];
sha1_init(output);
block[5] = 0;
block[6] = 0x80000000;
-#pragma unroll
for (i = 7; i < 15; i++)
block[i] = 0;
block[15] = 24 << 3;
sha1_block(block, output);
/* DeriveKey */
-#pragma unroll
for (i = 0; i < 5; i++)
block[i] = output[i] ^ 0x36363636;
sha1_init(output);
-#pragma unroll
for (i = 5; i < 16; i++)
block[i] = 0x36363636;
sha1_block(block, output);
/* sha1_final (last block was 64 bytes) */
block[0] = 0x80000000;
-#pragma unroll
for (i = 1; i < 15; i++)
block[i] = 0;
block[15] = 64 << 3;
sha1_block(block, output);
/* Endian-swap to output (we only use 16 bytes) */
-#pragma unroll
for (i = 0; i < 4; i++) {
#ifdef SCALAR
key[gid * 4 + i] = SWAP32(output[i]);
@@ -387,10 +387,8 @@ __kernel void GenerateSHA1pwhash(
/* Initial hash of salt + password */
/* The ending 0x80 is already in the buffer */
sha1_init_s(output);
-#pragma unroll
for (i = 0; i < 4; i++)
block[i] = SWAP32(salt[i]);
-#pragma unroll
for (i = 4; i < 16; i++)
block[i] = SWAP32(unicode_pw[gid * (UNICODE_LENGTH>>2) + i - 4]);
if (pw_len[gid] < 40) {
@@ -400,15 +398,13 @@ __kernel void GenerateSHA1pwhash(
sha1_block_s(block, output);
if (pw_len[gid] >= 40) {
-#pragma unroll
for (i = 0; i < 14; i++)
block[i] = SWAP32(unicode_pw[gid * (UNICODE_LENGTH>>2) + i + 12]);
block[14] = 0;
block[15] = (pw_len[gid] + 16) << 3;
sha1_block_s(block, output);
}
-#pragma unroll
for (i = 0; i < 5; i++)
#ifdef SCALAR
pwhash[gid * 6 + i] = output[i];
@@ -431,7 +427,6 @@ __kernel void HashLoop(__global MAYBE_VECTOR_UINT *pwhash)
uint base = pwhash[gid * 6 + 5].s0;
#endif
-#pragma unroll
for (i = 0; i < 5; i++)
output[i] = pwhash[gid * 6 + i];
@@ -440,18 +435,15 @@ __kernel void HashLoop(__global MAYBE_VECTOR_UINT *pwhash)
for (j = 0; j < HASH_LOOPS; j++)
{
block[0] = SWAP32(base + j);
-#pragma unroll
for (i = 1; i < 6; i++)
block[i] = output[i - 1];
sha1_init(output);
block[6] = 0x80000000;
-#pragma unroll
for (i = 7; i < 15; i++)
block[i] = 0;
block[15] = 24 << 3;
sha1_block(block, output);
}
-#pragma unroll
for (i = 0; i < 5; i++)
pwhash[gid * 6 + i] = output[i];
pwhash[gid * 6 + 5] += HASH_LOOPS;
@@ -473,28 +465,24 @@ __kernel void Generate2010key(
#endif
uint iterations = *spincount % HASH_LOOPS;
-#pragma unroll
for (i = 0; i < 5; i++)
output[i] = pwhash[gid * 6 + i];
/* Remainder of sha1(serial.last hash)
* We avoid byte-swapping back and forth */
for (j = 0; j < iterations; j++)
{
block[0] = SWAP32(base + j);
-#pragma unroll
for (i = 1; i < 6; i++)
block[i] = output[i - 1];
sha1_init(output);
block[6] = 0x80000000;
-#pragma unroll
for (i = 7; i < 15; i++)
block[i] = 0;
block[15] = 24 << 3;
sha1_block(block, output);
}
/* Our sha1 destroys input so we store it in temp[] */
-#pragma unroll
for (i = 0; i < 5; i++)
block[i] = temp[i] = output[i];
@@ -503,14 +491,12 @@ __kernel void Generate2010key(
block[5] = InputBlockKey[0];
block[6] = InputBlockKey[1];
block[7] = 0x80000000;
-#pragma unroll
for (i = 8; i < 15; i++)
block[i] = 0;
block[15] = 28 << 3;
sha1_block(block, output);
/* Endian-swap to output (we only use 16 bytes) */
-#pragma unroll
for (i = 0; i < 4; i++) {
#ifdef SCALAR
key[gid * 32/4 + i] = SWAP32(output[i]);
@@ -523,20 +509,17 @@ __kernel void Generate2010key(
}
/* Final hash 2 */
sha1_init(output);
-#pragma unroll
for (i = 0; i < 5; i++)
block[i] = temp[i];
block[5] = ValueBlockKey[0];
block[6] = ValueBlockKey[1];
block[7] = 0x80000000;
-#pragma unroll
for (i = 8; i < 15; i++)
block[i] = 0;
block[15] = 28 << 3;
sha1_block(block, output);
/* Endian-swap to output (we only use 16 bytes) */
-#pragma unroll
for (i = 0; i < 4; i++) {
#ifdef SCALAR
key[gid * 32/4 + 16/4 + i] = SWAP32(output[i]);
Oops, something went wrong.

0 comments on commit d1f1bbe

Please sign in to comment.