Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

wpapsk-opencl: Move all post processing from CPU to GPU or otherwise …

…out of

the inner loop. Great boost! Also bump max plaintext length from 15 to 32.
  • Loading branch information...
commit aa91e0d78cafa37d2e83f8ab77baf434d3193cfa 1 parent d1f1bbe
@magnumripper authored
View
13 run/john.conf
@@ -100,9 +100,13 @@ MPIOMPverbose = Y
#rar_LWS = 128
#rar_GWS = 8192
-# For crypt SHA-512.
+# For crypt SHA-2.
#sha512crypt_LWS = 64
#sha512crypt_GWS = 8192
+#sha256crypt_LWS = xxx
+#sha256crypt_GWS = xxx
+#rawsha512_LWS = xxx
+#rawsha512_GWS = xxx
# For office formats.
#office2007_LWS = 64
@@ -114,7 +118,12 @@ MPIOMPverbose = Y
# For NTLMv2 format
#ntlmv2_LWS = 1024
-#ntlmv2_LWS = 32768
+#ntlmv2_GWS = 32768
+
+# WPA-PSK
+#wpapsk_LWS = xxx
+#wpapsk_GWS = xxx
+
# Markov modes, see ../doc/MARKOV for more information
[Markov:Default]
View
524 src/opencl/wpapsk_kernel.cl
@@ -15,6 +15,13 @@
#define USE_BITSELECT
#endif
+/* Workaround for driver bug seen in version 295.49 */
+#if gpu_nvidia(DEVICE_INFO)
+#define MAYBE_CONSTANT __global const
+#else
+#define MAYBE_CONSTANT __constant
+#endif
+
#ifdef SCALAR
inline uint SWAP32(uint x)
{
@@ -26,17 +33,21 @@ inline uint SWAP32(uint x)
#endif
typedef struct {
- uchar length;
- uchar v[15];
+ uint length;
+ uchar v[PLAINTEXT_LENGTH];
} wpapsk_password;
-typedef struct {
- uint v[8];
-} wpapsk_hash;
+typedef struct
+{
+ uint keymic[16 / 4];
+} mic_t;
typedef struct {
- uchar length;
- uchar salt[15];
+ uint length;
+ uint eapol[(256 + 64) / 4];
+ uint eapol_size;
+ uint data[(64 + 12) / 4]; // pre-processed mac and nonce
+ uchar salt[15]; // essid
} wpapsk_salt;
typedef struct {
@@ -44,6 +55,7 @@ typedef struct {
uint ipad[5];
uint opad[5];
uint out[5];
+ uint partial[5];
} wpapsk_state;
#define INIT_A 0x67452301
@@ -312,25 +324,177 @@ typedef struct {
#define SHA1_SHORT(A, B, C, D, E, W) SHA1_SHORT_BEG(A, B, C, D, E, W) SHA1_SHORT_END(A, B, C, D, E, W)
+#define sha1_init(o) { \
+ o[0] = INIT_A; \
+ o[1] = INIT_B; \
+ o[2] = INIT_C; \
+ o[3] = INIT_D; \
+ o[4] = INIT_E; \
+ }
+
+#define sha1_block(b, o) { \
+ A = o[0]; \
+ B = o[1]; \
+ C = o[2]; \
+ D = o[3]; \
+ E = o[4]; \
+ SHA1(A, B, C, D, E, b); \
+ o[0] += A; \
+ o[1] += B; \
+ o[2] += C; \
+ o[3] += D; \
+ o[4] += E; \
+ }
+
+#define sha1_block_short(b, o) { \
+ A = o[0]; \
+ B = o[1]; \
+ C = o[2]; \
+ D = o[3]; \
+ E = o[4]; \
+ SHA1_SHORT(A, B, C, D, E, b); \
+ o[0] += A; \
+ o[1] += B; \
+ o[2] += C; \
+ o[3] += D; \
+ o[4] += E; \
+ }
+
+
+/* The basic MD5 functions */
+#ifdef USE_BITSELECT
+#define F(x, y, z) bitselect((z), (y), (x))
+#define G(x, y, z) bitselect((y), (x), (z))
+#else
+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
+#endif
+
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define I(x, y, z) ((y) ^ ((x) | ~(z)))
+
+
+/* The MD5 transformation for all four rounds. */
+#define STEP(f, a, b, c, d, x, t, s) \
+ (a) += f((b), (c), (d)) + (x) + (t); \
+ (a) = rotate((a), (uint)(s)); \
+ (a) += (b)
+
+
+/* Raw'n'lean MD5 with context in output buffer */
+/* NOTE: This version thrashes the input block! */
+inline void md5_block(uint *W, uint *output)
+{
+ uint a, b, c, d;
+
+ a = output[0];
+ b = output[1];
+ c = output[2];
+ d = output[3];
+
+ /* Round 1 */
+ STEP(F, a, b, c, d, W[0], 0xd76aa478, 7);
+ STEP(F, d, a, b, c, W[1], 0xe8c7b756, 12);
+ STEP(F, c, d, a, b, W[2], 0x242070db, 17);
+ STEP(F, b, c, d, a, W[3], 0xc1bdceee, 22);
+ STEP(F, a, b, c, d, W[4], 0xf57c0faf, 7);
+ STEP(F, d, a, b, c, W[5], 0x4787c62a, 12);
+ STEP(F, c, d, a, b, W[6], 0xa8304613, 17);
+ STEP(F, b, c, d, a, W[7], 0xfd469501, 22);
+ STEP(F, a, b, c, d, W[8], 0x698098d8, 7);
+ STEP(F, d, a, b, c, W[9], 0x8b44f7af, 12);
+ STEP(F, c, d, a, b, W[10], 0xffff5bb1, 17);
+ STEP(F, b, c, d, a, W[11], 0x895cd7be, 22);
+ STEP(F, a, b, c, d, W[12], 0x6b901122, 7);
+ STEP(F, d, a, b, c, W[13], 0xfd987193, 12);
+ STEP(F, c, d, a, b, W[14], 0xa679438e, 17);
+ STEP(F, b, c, d, a, W[15], 0x49b40821, 22);
+
+ /* Round 2 */
+ STEP(G, a, b, c, d, W[1], 0xf61e2562, 5);
+ STEP(G, d, a, b, c, W[6], 0xc040b340, 9);
+ STEP(G, c, d, a, b, W[11], 0x265e5a51, 14);
+ STEP(G, b, c, d, a, W[0], 0xe9b6c7aa, 20);
+ STEP(G, a, b, c, d, W[5], 0xd62f105d, 5);
+ STEP(G, d, a, b, c, W[10], 0x02441453, 9);
+ STEP(G, c, d, a, b, W[15], 0xd8a1e681, 14);
+ STEP(G, b, c, d, a, W[4], 0xe7d3fbc8, 20);
+ STEP(G, a, b, c, d, W[9], 0x21e1cde6, 5);
+ STEP(G, d, a, b, c, W[14], 0xc33707d6, 9);
+ STEP(G, c, d, a, b, W[3], 0xf4d50d87, 14);
+ STEP(G, b, c, d, a, W[8], 0x455a14ed, 20);
+ STEP(G, a, b, c, d, W[13], 0xa9e3e905, 5);
+ STEP(G, d, a, b, c, W[2], 0xfcefa3f8, 9);
+ STEP(G, c, d, a, b, W[7], 0x676f02d9, 14);
+ STEP(G, b, c, d, a, W[12], 0x8d2a4c8a, 20);
+
+ /* Round 3 */
+ STEP(H, a, b, c, d, W[5], 0xfffa3942, 4);
+ STEP(H, d, a, b, c, W[8], 0x8771f681, 11);
+ STEP(H, c, d, a, b, W[11], 0x6d9d6122, 16);
+ STEP(H, b, c, d, a, W[14], 0xfde5380c, 23);
+ STEP(H, a, b, c, d, W[1], 0xa4beea44, 4);
+ STEP(H, d, a, b, c, W[4], 0x4bdecfa9, 11);
+ STEP(H, c, d, a, b, W[7], 0xf6bb4b60, 16);
+ STEP(H, b, c, d, a, W[10], 0xbebfbc70, 23);
+ STEP(H, a, b, c, d, W[13], 0x289b7ec6, 4);
+ STEP(H, d, a, b, c, W[0], 0xeaa127fa, 11);
+ STEP(H, c, d, a, b, W[3], 0xd4ef3085, 16);
+ STEP(H, b, c, d, a, W[6], 0x04881d05, 23);
+ STEP(H, a, b, c, d, W[9], 0xd9d4d039, 4);
+ STEP(H, d, a, b, c, W[12], 0xe6db99e5, 11);
+ STEP(H, c, d, a, b, W[15], 0x1fa27cf8, 16);
+ STEP(H, b, c, d, a, W[2], 0xc4ac5665, 23);
+
+ /* Round 4 */
+ STEP(I, a, b, c, d, W[0], 0xf4292244, 6);
+ STEP(I, d, a, b, c, W[7], 0x432aff97, 10);
+ STEP(I, c, d, a, b, W[14], 0xab9423a7, 15);
+ STEP(I, b, c, d, a, W[5], 0xfc93a039, 21);
+ STEP(I, a, b, c, d, W[12], 0x655b59c3, 6);
+ STEP(I, d, a, b, c, W[3], 0x8f0ccc92, 10);
+ STEP(I, c, d, a, b, W[10], 0xffeff47d, 15);
+ STEP(I, b, c, d, a, W[1], 0x85845dd1, 21);
+ STEP(I, a, b, c, d, W[8], 0x6fa87e4f, 6);
+ STEP(I, d, a, b, c, W[15], 0xfe2ce6e0, 10);
+ STEP(I, c, d, a, b, W[6], 0xa3014314, 15);
+ STEP(I, b, c, d, a, W[13], 0x4e0811a1, 21);
+ STEP(I, a, b, c, d, W[4], 0xf7537e82, 6);
+ STEP(I, d, a, b, c, W[11], 0xbd3af235, 10);
+ STEP(I, c, d, a, b, W[2], 0x2ad7d2bb, 15);
+ STEP(I, b, c, d, a, W[9], 0xeb86d391, 21);
+
+ output[0] += a;
+ output[1] += b;
+ output[2] += c;
+ output[3] += d;
+}
+
+
+#define md5_init(output) { \
+ output[0] = 0x67452301; \
+ output[1] = 0xefcdab89; \
+ output[2] = 0x98badcfe; \
+ output[3] = 0x10325476; \
+ }
+
inline void preproc(__global const uchar *key, uint keylen,
__global uint *state, uchar var1, uint var4)
{
uint i;
uint W[16], temp;
+ uint A = INIT_A;
+ uint B = INIT_B;
+ uint C = INIT_C;
+ uint D = INIT_D;
+ uint E = INIT_E;
-#pragma unroll
for (i = 0; i < 16; i++)
W[i] = var4;
for (i = 0; i < keylen; i++)
XORCHAR_BE(W, i, key[i]);
- uint A = INIT_A;
- uint B = INIT_B;
- uint C = INIT_C;
- uint D = INIT_D;
- uint E = INIT_E;
-
SHA1(A, B, C, D, E, W);
state[0] = A + INIT_A;
@@ -341,15 +505,14 @@ inline void preproc(__global const uchar *key, uint keylen,
}
inline void hmac_sha1(__global uint *output,
- __global uint *ipad_state,
- __global uint *opad_state,
- __constant uchar *salt, uint saltlen, uchar add)
+ __global uint *ipad,
+ __global uint *opad,
+ MAYBE_CONSTANT uchar *salt, uint saltlen, uchar add)
{
uint i;
uint W[16], temp;
uint A, B, C, D, E;
-#pragma unroll
for (i = 0; i < 16; i++)
W[i] = 0;
@@ -360,19 +523,19 @@ inline void hmac_sha1(__global uint *output,
PUTCHAR_BE(W, saltlen + 4, 0x80);
W[15] = (64 + saltlen + 4) << 3;
- A = ipad_state[0];
- B = ipad_state[1];
- C = ipad_state[2];
- D = ipad_state[3];
- E = ipad_state[4];
+ A = ipad[0];
+ B = ipad[1];
+ C = ipad[2];
+ D = ipad[3];
+ E = ipad[4];
SHA1(A, B, C, D, E, W);
- A += ipad_state[0];
- B += ipad_state[1];
- C += ipad_state[2];
- D += ipad_state[3];
- E += ipad_state[4];
+ A += ipad[0];
+ B += ipad[1];
+ C += ipad[2];
+ D += ipad[3];
+ E += ipad[4];
W[0] = A;
W[1] = B;
@@ -382,19 +545,19 @@ inline void hmac_sha1(__global uint *output,
W[5] = 0x80000000;
W[15] = (64 + 20) << 3;
- A = opad_state[0];
- B = opad_state[1];
- C = opad_state[2];
- D = opad_state[3];
- E = opad_state[4];
+ A = opad[0];
+ B = opad[1];
+ C = opad[2];
+ D = opad[3];
+ E = opad[4];
SHA1_SHORT(A, B, C, D, E, W);
- A += opad_state[0];
- B += opad_state[1];
- C += opad_state[2];
- D += opad_state[3];
- E += opad_state[4];
+ A += opad[0];
+ B += opad[1];
+ C += opad[2];
+ D += opad[3];
+ E += opad[4];
output[0] = A;
output[1] = B;
@@ -404,7 +567,7 @@ inline void hmac_sha1(__global uint *output,
}
__kernel void wpapsk_init(__global const wpapsk_password *inbuffer,
- __constant wpapsk_salt *salt,
+ MAYBE_CONSTANT wpapsk_salt *salt,
__global wpapsk_state *state)
{
uint gid = get_global_id(0);
@@ -415,7 +578,6 @@ __kernel void wpapsk_init(__global const wpapsk_password *inbuffer,
hmac_sha1(state[gid].out, state[gid].ipad, state[gid].opad, salt->salt, salt->length, 0x01);
-#pragma unroll
for (i = 0; i < 5; i++)
state[gid].W[i] = state[gid].out[i];
}
@@ -430,16 +592,12 @@ __kernel void wpapsk_loop(__global wpapsk_state *state)
uint out[5];
uint A, B, C, D, E;
-#pragma unroll
for (i = 0; i < 5; i++)
W[i] = state[gid].W[i];
-#pragma unroll
for (i = 0; i < 5; i++)
ipad[i] = state[gid].ipad[i];
-#pragma unroll
for (i = 0; i < 5; i++)
opad[i] = state[gid].opad[i];
-#pragma unroll
for (i = 0; i < 5; i++)
out[i] = state[gid].out[i];
@@ -496,45 +654,295 @@ __kernel void wpapsk_loop(__global wpapsk_state *state)
out[4] ^= E;
}
-#pragma unroll
for (i = 0; i < 5; i++)
state[gid].W[i] = W[i];
-#pragma unroll
for (i = 0; i < 5; i++)
state[gid].ipad[i] = ipad[i];
-#pragma unroll
for (i = 0; i < 5; i++)
state[gid].opad[i] = opad[i];
-#pragma unroll
for (i = 0; i < 5; i++)
state[gid].out[i] = out[i];
}
-__kernel void wpapsk_pass2(__global wpapsk_hash *outbuffer,
- __constant wpapsk_salt *salt,
+__kernel void wpapsk_pass2(MAYBE_CONSTANT wpapsk_salt *salt,
__global wpapsk_state *state)
{
uint gid = get_global_id(0);
uint i;
-#pragma unroll
for (i = 0; i < 5; i++)
- outbuffer[gid].v[i] = state[gid].out[i] = SWAP32(state[gid].out[i]);
+ state[gid].partial[i] = state[gid].out[i];
+ for (i = 0; i < 5; i++)
+ state[gid].out[i] = SWAP32(state[gid].out[i]);
hmac_sha1(state[gid].out, state[gid].ipad, state[gid].opad, salt->salt, salt->length, 0x02);
-#pragma unroll
for (i = 0; i < 5; i++)
state[gid].W[i] = state[gid].out[i];
}
-__kernel void wpapsk_final(__global wpapsk_hash *outbuffer,
- __global wpapsk_state *state)
+#define dump_stuff_msg(msg, x, size) { \
+ uint ii; \
+ printf("%s : ", msg); \
+ for (ii = 0; ii < (size)/4; ii++) \
+ printf("%08x ", x[ii]); \
+ printf("\n"); \
+ }
+
+inline void prf_512(const uint *key, MAYBE_CONSTANT uint *data, uint *ret)
{
- uint gid = get_global_id(0);
+ //const uchar *text = "Pairwise key expansion\0";
+ //const uint text[6] = { 0x72696150, 0x65736977, 0x79656b20, 0x70786520, 0x69736e61, 0x00006e6f };
+ const uint text[6] = { 0x50616972, 0x77697365, 0x206b6579, 0x20657870, 0x616e7369, 0x6f6e0000 };
uint i;
+ uint output[5];
+ uint hash[5];
+ uint W[16], temp;
+ uint A, B, C, D, E;
+
+ // HMAC(EVP_sha1(), key, 32, (text.data), 100, ret, NULL);
+
+ for (i = 0; i < 8; i++)
+ W[i] = 0x36363636 ^ key[i]; // key is already swapped
+ for (i = 8; i < 16; i++)
+ W[i] = 0x36363636;
+
+ sha1_init(output);
+ sha1_block(W, output); // update(ipad)
+
+ /* 64 first bytes */
+ for (i = 0; i < 6; i++)
+ W[i] = text[i];
+ for (i = 5; i < 15; i++) {
+ W[i] = (W[i] & 0xffffff00) | *data >> 24;
+ W[i + 1] = *data++ << 8;
+ }
+ W[15] |= *data >> 24;
+
+ sha1_block(W, output); // update(data)
+
+ /* 36 remaining bytes */
+ W[0] = *data++ << 8;
+ for (i = 0; i < 8; i++) {
+ W[i] = (W[i] & 0xffffff00) | *data >> 24;
+ W[i + 1] = *data++ << 8;
+ }
+ W[9] = 0x80000000;
+ for (i = 10; i < 15; i++)
+ W[i] = 0;
+ W[15] = (64 + 100) << 3;
+
+ sha1_block(W, output); // update(data) + final
+
+ for (i = 0; i < 5; i++)
+ hash[i] = output[i];
+ for (i = 0; i < 8; i++)
+ W[i] = 0x5c5c5c5c ^ key[i];
+ for (i = 8; i < 16; i++)
+ W[i] = 0x5c5c5c5c;
+
+ sha1_init(output);
+ sha1_block(W, output); // update(opad)
+
+ for (i = 0; i < 5; i++)
+ W[i] = hash[i];
+ W[5] = 0x80000000;
+ W[15] = (64 + 20) << 3;
+ sha1_block_short(W, output); // update(digest) + final
+
+ /* Only 16 bits used */
+ for (i = 0; i < 4; i++)
+ ret[i] = output[i];
+}
+
+__kernel void wpapsk_final_md5(__global wpapsk_state *state,
+ MAYBE_CONSTANT wpapsk_salt *salt,
+ __global mic_t *mic)
+{
+ uint outbuffer[8];
+ uint prf[4];
+ uint W[16];
+ uint output[4], hash[4];
+ uint gid = get_global_id(0);
+ uint i, eapol_blocks;
+ MAYBE_CONSTANT uint *cp = salt->eapol;
+
+ for (i = 0; i < 5; i++)
+ outbuffer[i] = state[gid].partial[i];
+
+ for (i = 0; i < 3; i++)
+ outbuffer[5 + i] = state[gid].out[i];
+
+ prf_512(outbuffer, salt->data, prf);
+
+ // HMAC(EVP_md5(), prf, 16, hccap.eapol, hccap.eapol_size, mic[gid].keymic, NULL);
+ // prf is the key (16 bytes)
+ // eapol is the message (eapol_size blocks, already prepared with 0x80 and len)
+ md5_init(output);
+ for (i = 0; i < 4; i++)
+ W[i] = 0x36363636 ^ SWAP32(prf[i]);
+ for (i = 4; i < 16; i++)
+ W[i] = 0x36363636;
+ md5_block(W, output); /* md5_update(ipad, 64) */
+
+ /* eapol_blocks (of MD5),
+ * eapol data + 0x80, null padded and len set in set_salt() */
+ eapol_blocks = salt->eapol_size;
+ //printf("md5 eapol blocks: %u\n", eapol_blocks);
+
+ /* At least this will not diverge */
+ while (eapol_blocks--) {
+ for (i = 0; i < 16; i++)
+ W[i] = *cp++;
+ md5_block(W, output); /* md5_update(), md5_final() */
+ }
+
+ for (i = 0; i < 4; i++)
+ hash[i] = output[i];
+ md5_init(output);
+ for (i = 0; i < 4; i++)
+ W[i] = 0x5c5c5c5c ^ SWAP32(prf[i]);
+ for (i = 4; i < 16; i++)
+ W[i] = 0x5c5c5c5c;
+ md5_block(W, output); /* md5_update(opad, 64) */
+
+ for (i = 0; i < 4; i++)
+ W[i] = hash[i];
+ W[4] = 0x80;
+ for (i = 5; i < 14; i++)
+ W[i] = 0;
+ W[14] = (64 + 16) << 3;
+ W[15] = 0;
+ md5_block(W, output); /* md5_update(hash, 16), md5_final() */
+
+ for (i = 0; i < 4; i++)
+ mic[gid].keymic[i] = output[i];
+}
+
+__kernel void wpapsk_final_sha1(__global wpapsk_state *state,
+ MAYBE_CONSTANT wpapsk_salt *salt,
+ __global mic_t *mic)
+{
+ uint outbuffer[8];
+ uint prf[4];
+ uint gid = get_global_id(0);
+ uint ipad[5];
+ uint opad[5];
+ uint W[16], temp;
+ uint A, B, C, D, E;
+ uint i, eapol_blocks;
+ MAYBE_CONSTANT uint *cp = salt->eapol;
+
+ for (i = 0; i < 5; i++)
+ outbuffer[i] = state[gid].partial[i];
-#pragma unroll
for (i = 0; i < 3; i++)
- outbuffer[gid].v[5 + i] = SWAP32(state[gid].out[i]);
+ outbuffer[5 + i] = state[gid].out[i];
+
+ prf_512(outbuffer, salt->data, prf);
+
+ // HMAC(EVP_sha1(), prf, 16, hccap.eapol, hccap.eapol_size, mic[gid].keymic, NULL);
+ // prf is the key (16 bytes)
+ // eapol is the message (eapol_size bytes)
+ A = INIT_A;
+ B = INIT_B;
+ C = INIT_C;
+ D = INIT_D;
+ E = INIT_E;
+
+ for (i = 0; i < 4; i++)
+ W[i] = 0x36363636 ^ prf[i];
+ for (i = 4; i < 16; i++)
+ W[i] = 0x36363636;
+
+ SHA1(A, B, C, D, E, W);
+
+ A += INIT_A;
+ B += INIT_B;
+ C += INIT_C;
+ D += INIT_D;
+ E += INIT_E;
+
+ ipad[0] = A;
+ ipad[1] = B;
+ ipad[2] = C;
+ ipad[3] = D;
+ ipad[4] = E;
+
+ A = INIT_A;
+ B = INIT_B;
+ C = INIT_C;
+ D = INIT_D;
+ E = INIT_E;
+
+ for (i = 0; i < 4; i++)
+ W[i] = 0x5c5c5c5c ^ prf[i];
+ for (i = 4; i < 16; i++)
+ W[i] = 0x5c5c5c5c;
+
+ SHA1(A, B, C, D, E, W);
+
+ A += INIT_A;
+ B += INIT_B;
+ C += INIT_C;
+ D += INIT_D;
+ E += INIT_E;
+
+ opad[0] = A;
+ opad[1] = B;
+ opad[2] = C;
+ opad[3] = D;
+ opad[4] = E;
+
+ A = ipad[0];
+ B = ipad[1];
+ C = ipad[2];
+ D = ipad[3];
+ E = ipad[4];
+
+ /* eapol_blocks (of SHA1),
+ * eapol data + 0x80, null padded and len set in set_salt() */
+ eapol_blocks = salt->eapol_size;
+
+ /* At least this will not diverge */
+ while (eapol_blocks--) {
+ for (i = 0; i < 16; i++)
+ W[i] = *cp++;
+
+ SHA1(A, B, C, D, E, W);
+
+ A += ipad[0];
+ B += ipad[1];
+ C += ipad[2];
+ D += ipad[3];
+ E += ipad[4];
+
+ ipad[0] = A;
+ ipad[1] = B;
+ ipad[2] = C;
+ ipad[3] = D;
+ ipad[4] = E;
+ }
+
+ W[0] = A;
+ W[1] = B;
+ W[2] = C;
+ W[3] = D;
+ W[4] = E;
+ W[5] = 0x80000000;
+ W[15] = (64 + 20) << 3;
+
+ A = opad[0];
+ B = opad[1];
+ C = opad[2];
+ D = opad[3];
+ E = opad[4];
+
+ SHA1_SHORT(A, B, C, D, E, W);
+
+ /* We only use 16 bytes */
+ mic[gid].keymic[0] = SWAP32(A + opad[0]);
+ mic[gid].keymic[1] = SWAP32(B + opad[1]);
+ mic[gid].keymic[2] = SWAP32(C + opad[2]);
+ mic[gid].keymic[3] = SWAP32(D + opad[3]);
}
View
73 src/opencl_wpapsk_fmt.c
@@ -14,6 +14,12 @@
#include "config.h"
#include "common-opencl.h"
+
+static cl_mem mem_in, mem_out, mem_salt, mem_state;
+static cl_kernel wpapsk_init, wpapsk_loop, wpapsk_pass2, wpapsk_final_md5, wpapsk_final_sha1;
+static int VF = 1; /* Will be set to 4 when we run vectorized */
+
+#define JOHN_OCL_WPAPSK
#include "wpapsk.h"
#define FORMAT_LABEL "wpapsk-opencl"
@@ -36,24 +42,20 @@
#define MAX(a, b) (a > b) ? (a) : (b)
extern wpapsk_password *inbuffer;
-extern wpapsk_hash *outbuffer;
extern wpapsk_salt currentsalt;
extern mic_t *mic;
extern hccap_t hccap;
-static cl_mem mem_in, mem_out, mem_salt, mem_state;
-static cl_kernel wpapsk_init, wpapsk_loop, wpapsk_pass2, wpapsk_final;
-static int VF = 1; /* Will be set to 4 when we run vectorized */
-
typedef struct {
cl_uint W[5];
cl_uint ipad[5];
cl_uint opad[5];
cl_uint out[5];
+ cl_uint partial[5];
} wpapsk_state;
static struct fmt_tests tests[] = {
-/// testcase from http://wiki.wireshark.org/SampleCaptures = wpa-Induction.pcap
+/// testcase from http://wiki.wireshark.org/SampleCaptures = wpa-Induction.pcap. This is SHA-1 post-process.
{"$WPAPSK$Coherer#..l/Uf7J..qHUXMunTE3nfbMWSwxv27Ua0XutIOrfRSuv9gOCIugIVGlosMyXdNxfBZUAYmgKqeb6GBPxLiIZr56NtWTGR/Cp5ldAk61.5I0.Ec.2...........nTE3nfbMWSwxv27Ua0XutIOrfRSuv9gOCIugIVGlosM.................................................................3X.I.E..1uk0.E..1uk2.E..1uk0....................................................................................................................................................................................../t.....U...8FWdk8OpPckhewBwt4MXYI", "Induction"},
{NULL}
};
@@ -67,8 +69,6 @@ static void create_clobj(int gws, struct fmt_main *self)
self->params.min_keys_per_crypt = self->params.max_keys_per_crypt = gws;
/// Allocate memory
- mic = (mic_t *) malloc(sizeof(mic_t) * gws);
-
mem_in = clCreateBuffer(context[ocl_gpu_id], CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(wpapsk_password) * gws, NULL, &ret_code);
HANDLE_CLERROR(ret_code, "Error allocating mem in");
inbuffer = clEnqueueMapBuffer(queue[ocl_gpu_id], mem_in, CL_TRUE, CL_MAP_READ, 0, sizeof(wpapsk_password) * gws, 0, NULL, NULL, &ret_code);
@@ -80,9 +80,9 @@ static void create_clobj(int gws, struct fmt_main *self)
mem_salt = clCreateBuffer(context[ocl_gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(wpapsk_salt), &currentsalt, &ret_code);
HANDLE_CLERROR(ret_code, "Error allocating mem setting");
- mem_out = clCreateBuffer(context[ocl_gpu_id], CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(wpapsk_hash) * gws, NULL, &ret_code);
+ mem_out = clCreateBuffer(context[ocl_gpu_id], CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(mic_t) * gws, NULL, &ret_code);
HANDLE_CLERROR(ret_code, "Error allocating mem out");
- outbuffer = clEnqueueMapBuffer(queue[ocl_gpu_id], mem_out, CL_TRUE, CL_MAP_WRITE, 0, sizeof(wpapsk_hash) * gws, 0, NULL, NULL, &ret_code);
+ mic = clEnqueueMapBuffer(queue[ocl_gpu_id], mem_out, CL_TRUE, CL_MAP_WRITE, 0, sizeof(mic_t) * gws, 0, NULL, NULL, &ret_code);
HANDLE_CLERROR(ret_code, "Error mapping page-locked memory");
/*
@@ -98,20 +98,22 @@ static void create_clobj(int gws, struct fmt_main *self)
HANDLE_CLERROR(clSetKernelArg(wpapsk_loop, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
- HANDLE_CLERROR(clSetKernelArg(wpapsk_pass2, 0, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
- HANDLE_CLERROR(clSetKernelArg(wpapsk_pass2, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
- HANDLE_CLERROR(clSetKernelArg(wpapsk_pass2, 2, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
+ HANDLE_CLERROR(clSetKernelArg(wpapsk_pass2, 0, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
+ HANDLE_CLERROR(clSetKernelArg(wpapsk_pass2, 1, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
+
+ HANDLE_CLERROR(clSetKernelArg(wpapsk_final_md5, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
+ HANDLE_CLERROR(clSetKernelArg(wpapsk_final_md5, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
+ HANDLE_CLERROR(clSetKernelArg(wpapsk_final_md5, 2, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
- HANDLE_CLERROR(clSetKernelArg(wpapsk_final, 0, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
- HANDLE_CLERROR(clSetKernelArg(wpapsk_final, 1, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
+ HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha1, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
+ HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha1, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
+ HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha1, 2, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
}
static void release_clobj(void)
{
- MEM_FREE(mic);
-
HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], mem_in, inbuffer, 0, NULL, NULL), "Error Unmapping mem in");
- HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], mem_out, outbuffer, 0, NULL, NULL), "Error Unmapping mem in");
+ HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], mem_out, mic, 0, NULL, NULL), "Error Unmapping mem in");
HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem_state");
HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem setting");
@@ -126,7 +128,8 @@ static void release_all(void)
HANDLE_CLERROR(clReleaseKernel(wpapsk_init), "Release Kernel");
HANDLE_CLERROR(clReleaseKernel(wpapsk_loop), "Release Kernel");
HANDLE_CLERROR(clReleaseKernel(wpapsk_pass2), "Release Kernel");
- HANDLE_CLERROR(clReleaseKernel(wpapsk_final), "Release Kernel");
+ HANDLE_CLERROR(clReleaseKernel(wpapsk_final_md5), "Release Kernel");
+ HANDLE_CLERROR(clReleaseKernel(wpapsk_final_sha1), "Release Kernel");
}
static void set_key(char *key, int index);
@@ -164,10 +167,10 @@ static cl_ulong gws_test(int gws, int do_benchmark, struct fmt_main *self)
for (i = 0; i < ITERATIONS / HASH_LOOPS; i++)
HANDLE_CLERROR(clEnqueueNDRangeKernel(queue_prof, wpapsk_loop, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL), "Run loop kernel (2nd)");
- HANDLE_CLERROR(clEnqueueNDRangeKernel(queue_prof, wpapsk_final, 1, NULL, &global_work_size, &local_work_size, 0, NULL, &Event[5]), "Run final kernel");
+ HANDLE_CLERROR(clEnqueueNDRangeKernel(queue_prof, wpapsk_final_sha1, 1, NULL, &global_work_size, &local_work_size, 0, NULL, &Event[5]), "Run final kernel");
/// Read the result back
- HANDLE_CLERROR(clEnqueueReadBuffer(queue_prof, mem_out, CL_TRUE, 0, sizeof(wpapsk_hash) * global_work_size, outbuffer, 0, NULL, &Event[6]), "Copy result back");
+ HANDLE_CLERROR(clEnqueueReadBuffer(queue_prof, mem_out, CL_TRUE, 0, sizeof(mic_t) * global_work_size, mic, 0, NULL, &Event[6]), "Copy result back");
#if 0
HANDLE_CLERROR(clGetEventProfilingInfo(Event[2], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &startTime, NULL), "Failed to get profiling info");
@@ -215,7 +218,7 @@ static void find_best_gws(int do_benchmark, struct fmt_main *self)
cl_ulong run_time, min_time = CL_ULONG_MAX;
unsigned int SHAspeed, bestSHAspeed = 0;
int optimal_gws = local_work_size;
- const int sha1perkey = 2 * ITERATIONS * 2 + 6; // With postprocess it's 10 more or so
+ const int sha1perkey = 2 * ITERATIONS * 2 + 6 + 10;
unsigned long long int MaxRunTime = cpu(device_info[ocl_gpu_id]) ? 1000000000ULL : 5000000000ULL;
if (do_benchmark) {
@@ -259,7 +262,7 @@ static void find_best_gws(int do_benchmark, struct fmt_main *self)
static void init(struct fmt_main *self)
{
- char *temp, build_opts[64];
+ char *temp, build_opts[128];
cl_ulong maxsize, maxsize2;
global_work_size = 0;
@@ -277,7 +280,7 @@ static void init(struct fmt_main *self)
if ((temp = getenv("GWS")))
global_work_size = atoi(temp);
- snprintf(build_opts, sizeof(build_opts), "-DHASH_LOOPS=%u -DITERATIONS=%u", HASH_LOOPS, ITERATIONS);
+ snprintf(build_opts, sizeof(build_opts), "-DHASH_LOOPS=%u -DITERATIONS=%u -DPLAINTEXT_LENGTH=%u", HASH_LOOPS, ITERATIONS, PLAINTEXT_LENGTH);
opencl_init_opt("$JOHN/wpapsk_kernel.cl", ocl_gpu_id, platform_id, build_opts);
crypt_kernel = wpapsk_init = clCreateKernel(program[ocl_gpu_id], "wpapsk_init", &ret_code);
@@ -286,7 +289,9 @@ static void init(struct fmt_main *self)
HANDLE_CLERROR(ret_code, "Error creating kernel");
wpapsk_pass2 = clCreateKernel(program[ocl_gpu_id], "wpapsk_pass2", &ret_code);
HANDLE_CLERROR(ret_code, "Error creating kernel");
- wpapsk_final = clCreateKernel(program[ocl_gpu_id], "wpapsk_final", &ret_code);
+ wpapsk_final_md5 = clCreateKernel(program[ocl_gpu_id], "wpapsk_final_md5", &ret_code);
+ HANDLE_CLERROR(ret_code, "Error creating kernel");
+ wpapsk_final_sha1 = clCreateKernel(program[ocl_gpu_id], "wpapsk_final_sha1", &ret_code);
HANDLE_CLERROR(ret_code, "Error creating kernel");
/* Note: we ask for the kernels' max sizes, not the device's! */
@@ -295,7 +300,9 @@ static void init(struct fmt_main *self)
if (maxsize2 < maxsize) maxsize = maxsize2;
HANDLE_CLERROR(clGetKernelWorkGroupInfo(wpapsk_pass2, devices[ocl_gpu_id], CL_KERNEL_WORK_GROUP_SIZE, sizeof(maxsize2), &maxsize2, NULL), "Query max work group size");
if (maxsize2 < maxsize) maxsize = maxsize2;
- HANDLE_CLERROR(clGetKernelWorkGroupInfo(wpapsk_final, devices[ocl_gpu_id], CL_KERNEL_WORK_GROUP_SIZE, sizeof(maxsize2), &maxsize2, NULL), "Query max work group size");
+ HANDLE_CLERROR(clGetKernelWorkGroupInfo(wpapsk_final_md5, devices[ocl_gpu_id], CL_KERNEL_WORK_GROUP_SIZE, sizeof(maxsize2), &maxsize2, NULL), "Query max work group size");
+ if (maxsize2 < maxsize) maxsize = maxsize2;
+ HANDLE_CLERROR(clGetKernelWorkGroupInfo(wpapsk_final_sha1, devices[ocl_gpu_id], CL_KERNEL_WORK_GROUP_SIZE, sizeof(maxsize2), &maxsize2, NULL), "Query max work group size");
if (maxsize2 < maxsize) maxsize = maxsize2;
//fprintf(stderr, "Max LWS %lu\n", maxsize);
@@ -331,7 +338,6 @@ static void crypt_all(int count)
/// Copy data to gpu
HANDLE_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], mem_in, CL_FALSE, 0, sizeof(wpapsk_password) * global_work_size, inbuffer, 0, NULL, NULL), "Copy data to gpu");
- HANDLE_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], mem_salt, CL_FALSE, 0, sizeof(wpapsk_salt), &currentsalt, 0, NULL, NULL), "Copy setting to gpu");
/// Run kernel
HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], wpapsk_init, 1, NULL, &global_work_size, &local_work_size, 0, NULL, firstEvent), "Run initial kernel");
@@ -342,15 +348,16 @@ static void crypt_all(int count)
HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], wpapsk_pass2, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL), "Run intermediate kernel");
for (i = 0; i < ITERATIONS / HASH_LOOPS; i++)
- HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], wpapsk_loop, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL), "Run loop kernel (2nd)");
+ HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], wpapsk_loop, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL), "Run loop kernel (2nd pass)");
- HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], wpapsk_final, 1, NULL, &global_work_size, &local_work_size, 0, NULL, lastEvent), "Run final kernel");
+ if (hccap.keyver == 1)
+ HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], wpapsk_final_md5, 1, NULL, &global_work_size, &local_work_size, 0, NULL, lastEvent), "Run final kernel (MD5)");
+ else
+ HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], wpapsk_final_sha1, 1, NULL, &global_work_size, &local_work_size, 0, NULL, lastEvent), "Run final kernel (SHA1)");
+ HANDLE_CLERROR(clFinish(queue[ocl_gpu_id]), "Failed running final kernel");
/// Read the result back
- HANDLE_CLERROR(clEnqueueReadBuffer(queue[ocl_gpu_id], mem_out, CL_TRUE, 0, sizeof(wpapsk_hash) * global_work_size, outbuffer, 0, NULL, NULL), "Copy result back");
-
- ///Make last computations on CPU
- wpapsk_postprocess(global_work_size);
+ HANDLE_CLERROR(clEnqueueReadBuffer(queue[ocl_gpu_id], mem_out, CL_TRUE, 0, sizeof(mic_t) * global_work_size, mic, 0, NULL, NULL), "Copy result back");
}
View
89 src/wpapsk.h
@@ -22,7 +22,15 @@
#define uint32_t ARCH_WORD_32
#define BINARY_SIZE sizeof(mic_t)
+
+/* The OpenCL format defines JOHN_OCL_WPAPSK before including this
+ * header file, changing some behaviors. */
+#ifdef JOHN_OCL_WPAPSK
+#define PLAINTEXT_LENGTH 32
+#else
#define PLAINTEXT_LENGTH 15
+#endif
+
#define SALT_SIZE sizeof(hccap_t)
#define BENCHMARK_COMMENT ""
#define BENCHMARK_LENGTH -1
@@ -47,8 +55,8 @@ typedef struct
} mic_t;
typedef struct {
- uint8_t length;
- uint8_t v[15];
+ uint32_t length;
+ uint8_t v[PLAINTEXT_LENGTH];
} wpapsk_password;
typedef struct {
@@ -56,8 +64,13 @@ typedef struct {
} wpapsk_hash;
typedef struct {
- uint8_t length;
- uint8_t salt[15];
+ uint32_t length;
+#ifdef JOHN_OCL_WPAPSK
+ uint8_t eapol[256 + 64];
+ uint32_t eapol_size; // blocks
+ uint8_t data[64 + 12];
+#endif
+ uint8_t salt[15]; // essid
} wpapsk_salt;
@@ -68,7 +81,9 @@ static hccap_t hccap; ///structure with hccap data
static wpapsk_salt currentsalt; ///structure for essid
static mic_t *mic; ///table for MIC keys
static wpapsk_password *inbuffer; ///table for candidate passwords
+#ifndef JOHN_OCL_WPAPSK
static wpapsk_hash *outbuffer; ///table for PMK calculated by GPU
+#endif
static const char wpapsk_prefix[] = "$WPAPSK$";
@@ -165,6 +180,7 @@ static int valid(char *ciphertext, struct fmt_main *self)
return 1;
}
+#ifndef JOHN_OCL_WPAPSK
static MAYBE_INLINE void prf_512(uint32_t * key, uint8_t * data, uint32_t * ret)
{
HMAC_CTX ctx;
@@ -180,12 +196,52 @@ static MAYBE_INLINE void prf_512(uint32_t * key, uint8_t * data, uint32_t * ret)
HMAC_Final(&ctx, (unsigned char *) ret, NULL);
HMAC_CTX_cleanup(&ctx);
}
+#endif
+
+static void insert_mac(uint8_t * data)
+{
+ int k = memcmp(hccap.mac1, hccap.mac2, 6);
+ if (k > 0) {
+ memcpy(data, hccap.mac2, 6);
+ memcpy(data + 6, hccap.mac1, 6);
+ } else {
+ memcpy(data, hccap.mac1, 6);
+ memcpy(data + 6, hccap.mac2, 6);
+ }
+}
+
+static void insert_nonce(uint8_t * data)
+{
+ int k = memcmp(hccap.nonce1, hccap.nonce2, 32);
+ if (k > 0) {
+ memcpy(data, hccap.nonce2, 32);
+ memcpy(data + 32, hccap.nonce1, 32);
+ } else {
+ memcpy(data, hccap.nonce1, 32);
+ memcpy(data + 32, hccap.nonce2, 32);
+ }
+}
static void set_salt(void *salt)
{
memcpy(&hccap, salt, SALT_SIZE);
strcpy((char*)currentsalt.salt, hccap.essid);
currentsalt.length = strlen(hccap.essid);
+
+#ifdef JOHN_OCL_WPAPSK
+ currentsalt.eapol_size = 1 + (hccap.eapol_size + 8) / 64;
+ memcpy(currentsalt.eapol, hccap.eapol, hccap.eapol_size);
+ memset(currentsalt.eapol + hccap.eapol_size, 0x80, 1);
+ memset(currentsalt.eapol + hccap.eapol_size + 1, 0, 256 + 64 - hccap.eapol_size - 1);
+ if (hccap.keyver != 1)
+ alter_endianity(currentsalt.eapol, 256+56);
+ ((unsigned int*)currentsalt.eapol)[16 * ((hccap.eapol_size + 8) / 64) + ((hccap.keyver == 1) ? 14 : 15)] = (64 + hccap.eapol_size) << 3;
+ insert_mac(currentsalt.data);
+ insert_nonce(currentsalt.data + 12);
+ alter_endianity(currentsalt.data, 64 + 12);
+
+ HANDLE_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], mem_salt, CL_FALSE, 0, sizeof(wpapsk_salt), &currentsalt, 0, NULL, NULL), "Copy setting to gpu");
+#endif
}
#undef set_key
@@ -206,30 +262,8 @@ static char *get_key(int index)
ret[length] = '\0';
return ret;
}
-static void insert_mac(uint8_t * data)
-{
- int k = memcmp(hccap.mac1, hccap.mac2, 6);
- if (k > 0) {
- memcpy(data, hccap.mac2, 6);
- memcpy(data + 6, hccap.mac1, 6);
- } else {
- memcpy(data, hccap.mac1, 6);
- memcpy(data + 6, hccap.mac2, 6);
- }
-}
-
-static void insert_nonce(uint8_t * data)
-{
- int k = memcmp(hccap.nonce1, hccap.nonce2, 32);
- if (k > 0) {
- memcpy(data, hccap.nonce2, 32);
- memcpy(data + 32, hccap.nonce1, 32);
- } else {
- memcpy(data, hccap.nonce1, 32);
- memcpy(data + 32, hccap.nonce2, 32);
- }
-}
+#ifndef JOHN_OCL_WPAPSK
static void wpapsk_postprocess(int keys)
{
int i;
@@ -261,6 +295,7 @@ static void wpapsk_postprocess(int keys)
}
}
}
+#endif
static int binary_hash_0(void *binary)
{
Please sign in to comment.
Something went wrong with that request. Please try again.