Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: qqldd/myrice-JtR
...
head fork: qqldd/myrice-JtR
Checking mergeability… Don't worry, you can still create the pull request.
  • 13 commits
  • 18 files changed
  • 0 commit comments
  • 5 contributors
View
9 .gitignore
@@ -9,23 +9,24 @@ run/hccap2john
run/john
run/mkvcalcproba
run/mozilla2john
+run/office2john
+run/opencl_bf_std.h
run/opencl_cryptsha512.h
+run/opencl_rar.h
run/pdf2john
-run/racf2john
run/pwsafe2john
+run/racf2john
run/rar2john
run/ssh2john
run/tgtsnarf
-run/office2john
-run/vncpcap2john
run/unafs
run/undrop
run/unique
run/unshadow
+run/vncpcap2john
run/zip2john
src/arch.h
src/fmt_externs.h
src/fmt_registers.h
src/john_build_rule.h
test/
-run/opencl_rar.h
View
25 doc/README.opencl
@@ -72,7 +72,8 @@ LWS is the local work size aka, the number of "threads" the job
will be split and sent to the GPU.
- if $LWS is not setted john will try to get the one
- best for your system
+ best for your system. On some slow hashes, a good default
+ is going to be picked.
KPC is the Keys Per Crypt, the number of keys they will be tried
at the same time .
@@ -114,6 +115,28 @@ a good idea to set up PLAINTEXT_LENGTH to a lower value than
- KPC should always be the possible product of LWS: you should always
be able to divide KPC / LWS and get an integer number
+====================
+Supported formats:
+====================
+
+More information about supported hashes can be seen at:
+http://openwall.info/wiki/john/GPU
+
+Currently John the Ripper supports OpenCL enabled devices for
+the following hashes:
+- crypt MD5
+- crypt SHA-512 (http://openwall.info/wiki/john/OpenCL-SHA-512)
+- Mac OS X 10.7+ salted SHA-512
+- MsCash2
+- MySQL 4.1 double-SHA-1
+- Netscape LDAP SSHA
+- NT MD4
+- phpass
+- RAR3
+- Raw MD4
+- Raw MD5
+- Raw SHA-1
+- WPA-PSK
============================================================
Following is the verbatim original content of this file:
View
2  src/Makefile
@@ -133,7 +133,7 @@ OCL_OBJS = \
opencl_cryptmd5_fmt.o opencl_phpass_fmt.o opencl_rawsha1_fmt.o \
opencl_nt_fmt.o opencl_rawmd5_fmt.o opencl_nsldaps_fmt.o \
opencl_cryptsha512_fmt.o opencl_mscash2_fmt.o opencl_wpapsk_fmt.o \
- opencl_xsha512_fmt.o opencl_rawsha512_fmt.o
+ opencl_xsha512_fmt.o opencl_rawsha512_fmt.o opencl_bf_std.o opencl_bf_fmt.o
CUDA_OBJS = \
cuda_common.o \
View
56 src/common-opencl.c
@@ -8,8 +8,6 @@
static char opencl_log[LOG_SIZE];
static char *kernel_source;
static int kernel_loaded;
-static int device_info;
-static int cores_per_MP;
void advance_cursor()
{
@@ -99,8 +97,8 @@ static char *include_source(char *pathname, int dev_id)
sprintf(include, "-I %s %s %s%d %s %s", path_expand(pathname),
get_device_type(dev_id) == CL_DEVICE_TYPE_CPU ?
"-DDEVICE_IS_CPU" : "",
- "-DDEVICE_INFO=", device_info,
- gpu_nvidia(device_info) ? "-cl-nv-verbose" : "",
+ "-DDEVICE_INFO=", device_info[dev_id],
+ gpu_nvidia(device_info[dev_id]) ? "-cl-nv-verbose" : "",
"-cl-strict-aliasing -cl-mad-enable");
//fprintf(stderr, "Options used: %s\n", include);
@@ -162,14 +160,14 @@ void opencl_get_dev_info(unsigned int dev_id)
device = get_device_type(dev_id);
if (device == CL_DEVICE_TYPE_CPU)
- device_info = CPU;
+ device_info[dev_id] = CPU;
else if (device == CL_DEVICE_TYPE_GPU)
- device_info = GPU;
+ device_info[dev_id] = GPU;
else if (device == CL_DEVICE_TYPE_ACCELERATOR)
- device_info = ACCELERATOR;
+ device_info[dev_id] = ACCELERATOR;
- device_info += get_vendor_id(dev_id);
- device_info += get_processor_family(dev_id);
+ device_info[dev_id] += get_vendor_id(dev_id);
+ device_info[dev_id] += get_processor_family(dev_id);
}
void opencl_init_dev(unsigned int dev_id, unsigned int platform_id)
@@ -192,11 +190,6 @@ void opencl_init(char *kernel_filename, unsigned int dev_id,
opencl_build_kernel(kernel_filename, dev_id);
}
-int get_device_info()
-{
- return device_info;
-}
-
cl_device_type get_device_type(int dev_id)
{
cl_device_type type;
@@ -270,28 +263,28 @@ cl_uint get_processors_count(int dev_id)
{
cl_uint core_count = get_max_compute_units(dev_id);
- cores_per_MP = 0;
+ cores_per_MP[dev_id] = 0;
#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
- if (gpu_nvidia(device_info)) {
+ if (gpu_nvidia(device_info[dev_id])) {
unsigned int major = 0, minor = 0;
get_compute_capability(dev_id, &major, &minor);
if (major == 1)
- core_count *= (cores_per_MP = 8);
+ core_count *= (cores_per_MP[dev_id] = 8);
else if (major == 2 && minor == 0)
- core_count *= (cores_per_MP = 32); //2.0
+ core_count *= (cores_per_MP[dev_id] = 32); //2.0
else if (major == 2 && minor >= 1)
- core_count *= (cores_per_MP = 48); //2.1
+ core_count *= (cores_per_MP[dev_id] = 48); //2.1
else if (major == 3)
- core_count *= (cores_per_MP = 192); //3.0
+ core_count *= (cores_per_MP[dev_id] = 192); //3.0
} else
#endif
- if (gpu_amd(device_info)) {
- core_count *= (cores_per_MP = (16 * //16 thread proc * 5 SP
- ((amd_gcn(device_info) ||
- amd_vliw4(device_info)) ? 4 : 5)));
- } else if (gpu(device_info)) //Any other GPU
- core_count *= (cores_per_MP = 8);
+ if (gpu_amd(device_info[dev_id])) {
+ core_count *= (cores_per_MP[dev_id] = (16 * //16 thread proc * 5 SP
+ ((amd_gcn(device_info[dev_id]) ||
+ amd_vliw4(device_info[dev_id])) ? 4 : 5)));
+ } else if (gpu(device_info[dev_id])) //Any other GPU
+ core_count *= (cores_per_MP[dev_id] = 8);
return core_count;
}
@@ -303,10 +296,9 @@ cl_uint get_processor_family(int dev_id)
HANDLE_CLERROR(clGetDeviceInfo(devices[dev_id], CL_DEVICE_NAME,
sizeof(dname), dname, NULL), "Error querying CL_DEVICE_NAME");
- if gpu
- (device_info) {
+ if gpu (device_info[dev_id]) {
- if (gpu_amd(device_info) && (strstr(dname, "Cedar") ||
+ if (gpu_amd(device_info[dev_id]) && (strstr(dname, "Cedar") ||
strstr(dname, "Redwood") ||
strstr(dname, "Juniper") ||
strstr(dname, "Cypress") ||
@@ -516,13 +508,13 @@ void listOpenCLdevices(void)
opencl_get_dev_info(d);
long_entries = get_processors_count(d);
- if (cores_per_MP)
+ if (cores_per_MP[d])
printf
("\tStream processors:\t%lu (%d x %d)\n",
- long_entries, entries, cores_per_MP);
+ long_entries, entries, cores_per_MP[d]);
#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV
- if (gpu_nvidia(device_info)) {
+ if (gpu_nvidia(device_info[d])) {
unsigned int major = 0, minor = 0;
clGetDeviceInfo(devices[d],
View
4 src/common-opencl.h
@@ -36,6 +36,9 @@ cl_kernel crypt_kernel;
size_t local_work_size;
size_t max_group_size;
+int device_info[MAXGPUS];
+int cores_per_MP[MAXGPUS];
+
cl_int oclGetDevCap(cl_device_id device, cl_int *iComputeCapMajor, cl_int *iComputeCapMinor);
void opencl_init_dev(unsigned int dev_id, unsigned int platform_id);
@@ -43,7 +46,6 @@ void opencl_init(char *kernel_filename, unsigned int dev_id,
unsigned int platform_id);
void opencl_build_kernel(char *kernel_filename, unsigned int dev_id);
-int get_device_info();
cl_device_type get_device_type(int dev_id);
cl_ulong get_local_memory_size(int dev_id);
size_t get_max_work_group_size(int dev_id);
View
0  src/django_fmt_plug.c 100755 → 100644
File mode changed
View
31 src/john.bash_completion
@@ -60,6 +60,9 @@
#
# FIXME: should completion for --make-charset really list existing .chr files?
#
+# FIXME: should I generally use LC_ALL=C, not just in a few places?
+# (This could also be a little bit faster.)
+#
# TODO:
# --wordlist=~user/filename or --wordlist=~/dir/file doesn't work,
# but pressing [tab] expands this to something useful
@@ -430,7 +433,7 @@ _john()
return 0
;;
--subformat=*)
- if echo "${options}" | grep "^--subformat=" > /dev/null ; then
+ if echo "${valopts}" | grep "^--subformat=" > /dev/null ; then
cur=${cur#*=}
COMPREPLY=( $(compgen -W "LIST" -- ${cur}) )
fi
@@ -439,6 +442,29 @@ _john()
--session=*|--mem-file-size=*|--field-separator-char=*|--fix-state-delay=*|--max-run-time=*|--mkpc=*)
return 0
;;
+ --platform=L*|--device=L*|--platform=l*|--device=l*)
+ # CUDA doesn't allow --device=LIST
+ # workaround: check if --platform= is allowed
+ if echo "${valopts}" | grep "^--platform=$" > /dev/null ; then
+ cur=${cur#*=}
+ COMPREPLY=( $(compgen -W "LIST list" -- ${cur}) )
+ fi
+ return 0
+ ;;
+ --platform=|--device=)
+ # --device=LIST isn't supported for CUDA, but for CUDA
+ # --platform= is not a valid option
+ if echo "${valopts}" | grep "^--platform=$" > /dev/null ; then
+ # Calling john --platform=LIST just to find possible completions
+ # will take too long
+ cur=${cur#*=}
+ COMPREPLY=( $(compgen -W "LIST N" -- ${cur}) )
+ fi
+ return 0
+ ;;
+ --platform=*|--device=*)
+ return 0
+ ;;
--list=*)
if echo "${hidden}" | grep "^--list=" > /dev/null ; then
cur=${cur#*=}
@@ -478,14 +504,13 @@ complete -F _john john
## have grep && have sed &&
_unique()
{
- local first filename cur usage options valopts compreplya compreplyb
+ local first cur usage options valopts compreplya compreplyb
COMPREPLY=()
_get_comp_words_by_ref -n = cur
# we need to make sure we run the correct program, not some other program
# called unique which is located somewhere in $PATH
first="${COMP_WORDS[0]}"
- filename=`echo "${first}"|sed 's#^.*/\(.*\)$#\1#'`
usage=`${first}|grep '^Usage:'|sed 's#^Usage:\? \?[^ ]*unique *##'`
case "_${cur}" in
_|_${first})
View
2  src/john.c
@@ -139,6 +139,7 @@ extern struct fmt_main fmt_opencl_mscash2;
extern struct fmt_main fmt_opencl_wpapsk;
extern struct fmt_main fmt_opencl_xsha512;
extern struct fmt_main fmt_opencl_rawsha512;
+extern struct fmt_main fmt_opencl_bf;
#endif
#ifdef HAVE_CUDA
extern struct fmt_main fmt_cuda_cryptmd5;
@@ -275,6 +276,7 @@ static void john_register_all(void)
john_register_one(&fmt_opencl_wpapsk);
john_register_one(&fmt_opencl_xsha512);
john_register_one(&fmt_opencl_rawsha512);
+ john_register_one(&fmt_opencl_bf);
#endif
#ifdef HAVE_CUDA
View
212 src/opencl/bf_kernel.cl
@@ -0,0 +1,212 @@
+/*
+* This software is Copyright (c) 2012 Sayantan Datta <std2048 at gmail dot com>
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+* Based on Solar Designer implementation of bf_std.c in jtr-v1.7.8
+*/
+#define BF_ROUNDS 16
+
+typedef uint BF_word;
+
+typedef uint BF_key[BF_ROUNDS + 2];
+
+struct BF_ctx_S {
+ uint S[4][0x100];
+};
+
+struct BF_ctx_P{
+ uint P[18];
+};
+
+#define INDEX [index]
+
+#define BF_ROUND(ctx_S,ctx_P, L, R, N, tmp1, tmp2, tmp3, tmp4) \
+ tmp1 = ((unsigned long)L & 0xff); \
+ tmp2 = ((unsigned long)L >> 8); \
+ tmp2 = ((unsigned long)tmp2 & 0xff); \
+ tmp3 = ((unsigned long)L >> 16); \
+ tmp3 = ((unsigned long)tmp3 & 0xff); \
+ tmp4 = ((unsigned long)L >> 24); \
+ tmp1 = ctx_S.S[3][tmp1]; \
+ tmp2 = ctx_S.S[2][tmp2]; \
+ tmp3 = ctx_S.S[1][tmp3]; \
+ tmp3 = (unsigned long)((unsigned long)tmp3 + (unsigned long)ctx_S.S[0][tmp4]); \
+ tmp3 ^= tmp2; \
+ R =R ^ ctx_P[N + 1]; \
+ tmp3 = (unsigned long)((unsigned long)tmp3 + (unsigned long)tmp1); \
+ R =R ^ tmp3;
+
+#define BF_ENCRYPT(ctx_S,ctx_P, L, R) \
+ L ^= ctx_P[0]; \
+ BF_ROUND(ctx_S,ctx_P, L, R, 0, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, R, L, 1, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, L, R, 2, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, R, L, 3, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, L, R, 4, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, R, L, 5, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, L, R, 6, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, R, L, 7, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, L, R, 8, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, R, L, 9, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, L, R, 10, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, R, L, 11, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, L, R, 12, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, R, L, 13, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, L, R, 14, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P,R, L, 15, u1, u2, u3, u4); \
+ u4 = R; \
+ R = L; \
+ L = u4 ^ ctx_P[BF_ROUNDS + 1];
+
+#define BF_body() \
+ L0 = R0 = 0; \
+ ptr0 = BF_current_P; \
+ do { \
+ BF_ENCRYPT(BF_current_S INDEX,BF_current_P , L0, R0); \
+ *ptr0 = L0; \
+ *(ptr0 + 1) = R0; \
+ ptr0 += 2; \
+ } while (ptr0 < &BF_current_P [BF_ROUNDS + 2]); \
+\
+ ptr2 = BF_current_S INDEX.S[0]; \
+ do { \
+ ptr2 += 2; \
+ BF_ENCRYPT(BF_current_S INDEX,BF_current_P , L0, R0); \
+ *(ptr2 - 2) = L0; \
+ *(ptr2 - 1) = R0; \
+ } while (ptr2 < &BF_current_S INDEX.S[3][0xFF]);
+
+
+__kernel void blowfish(const __global uint *salt_global,
+ const __global uint *BF_key_exp_global,
+ __global uint *BF_out,
+ __global struct BF_ctx_S *BF_current_S,
+ __global struct BF_ctx_P *BF_current_P_global,
+ uint rounds )
+{
+ int index = get_global_id(0);
+ int lid = get_local_id(0);
+ __local uint salt[4];
+
+ if(lid==0){
+ salt[0]=salt_global[0];
+ salt[1]=salt_global[1];
+ salt[2]=salt_global[2];
+ salt[3]=salt_global[3];
+ }
+
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ int i;
+ __private uint BF_key_exp[18];
+ uint BF_current_P[18];
+
+ for(i=0;i<18;i++){
+ BF_key_exp[i]=BF_key_exp_global[18*index+i];
+ BF_current_P[i]=BF_current_P_global INDEX.P[i];
+ }
+
+
+ uint L0, R0;
+ uint u1, u2, u3, u4;
+ uint *ptr0;
+ uint count;
+ __global uint *ptr2;
+
+ L0 = R0 = 0;
+ for (i = 0; i < BF_ROUNDS + 2; i += 2) {
+ L0 ^= salt[i & 2];
+ R0 ^= salt[(i & 2) + 1];
+ BF_ENCRYPT(BF_current_S INDEX,BF_current_P , L0, R0);
+ BF_current_P[i] = L0;
+ BF_current_P[i + 1] = R0;
+ }
+
+ ptr2 = BF_current_S INDEX.S[0];
+ do {
+ ptr2 += 4;
+ L0 ^= salt[(BF_ROUNDS + 2) & 3];
+ R0 ^= salt[(BF_ROUNDS + 3) & 3];
+ BF_ENCRYPT(BF_current_S INDEX,BF_current_P , L0, R0);
+ *(ptr2 - 4) = L0;
+ *(ptr2 - 3) = R0;
+ L0 ^= salt[(BF_ROUNDS + 4) & 3];
+ R0 ^= salt[(BF_ROUNDS + 5) & 3];
+ BF_ENCRYPT(BF_current_S INDEX,BF_current_P , L0, R0);
+ *(ptr2 - 2) = L0;
+ *(ptr2 - 1) = R0;
+ } while (ptr2 < &BF_current_S INDEX.S[3][0xFF]);
+
+ count = 1 << rounds;
+
+ do {
+ BF_current_P[0] ^= BF_key_exp[0];
+ BF_current_P[1] ^= BF_key_exp[1];
+ BF_current_P[2] ^= BF_key_exp[2];
+ BF_current_P[3] ^= BF_key_exp[3];
+ BF_current_P[4] ^= BF_key_exp[4];
+ BF_current_P[5] ^= BF_key_exp[5];
+ BF_current_P[6] ^= BF_key_exp[6];
+ BF_current_P[7] ^= BF_key_exp[7];
+ BF_current_P[8] ^= BF_key_exp[8];
+ BF_current_P[9] ^= BF_key_exp[9];
+ BF_current_P[10] ^= BF_key_exp[10];
+ BF_current_P[11] ^= BF_key_exp[11];
+ BF_current_P[12] ^= BF_key_exp[12];
+ BF_current_P[13] ^= BF_key_exp[13];
+ BF_current_P[14] ^= BF_key_exp[14];
+ BF_current_P[15] ^= BF_key_exp[15];
+ BF_current_P[16] ^= BF_key_exp[16];
+ BF_current_P[17] ^= BF_key_exp[17];
+
+ BF_body();
+
+ u1 = salt[0];
+ u2 = salt[1];
+ u3 = salt[2];
+ u4 = salt[3];
+ BF_current_P[0] ^= u1;
+ BF_current_P[1] ^= u2;
+ BF_current_P[2] ^= u3;
+ BF_current_P[3] ^= u4;
+ BF_current_P[4] ^= u1;
+ BF_current_P[5] ^= u2;
+ BF_current_P[6] ^= u3;
+ BF_current_P[7] ^= u4;
+ BF_current_P[8] ^= u1;
+ BF_current_P[9] ^= u2;
+ BF_current_P[10] ^= u3;
+ BF_current_P[11] ^= u4;
+ BF_current_P[12] ^= u1;
+ BF_current_P[13] ^= u2;
+ BF_current_P[14] ^= u3;
+ BF_current_P[15] ^= u4;
+ BF_current_P[16] ^= u1;
+ BF_current_P[17] ^= u2;
+
+ BF_body();
+
+ } while (--count);
+
+
+ L0 = 0x4F727068;
+ R0 = 0x65616E42;
+
+ count = 64;
+
+ do {
+ BF_ENCRYPT(BF_current_S INDEX,BF_current_P , L0, R0);
+ } while (--count);
+
+ BF_out[2*index]=L0;
+ BF_out[2*index+1]=R0;
+
+ for(i=0;i<18;i++)
+ BF_current_P_global INDEX.P[i]=BF_current_P[i];
+
+}
+
+
+
+
+
View
177 src/opencl/cryptsha512_kernel_AMD_V1.cl
@@ -1,15 +1,15 @@
/*
- * Developed by Claudio André <claudio.andre at correios.net.br> in 2012
+ * Developed by Claudio André <claudio.andre at correios.net.br> in 2012
* Based on source code provided by Lukas Odzioba
*
* More information at http://openwall.info/wiki/john/OpenCL-SHA-512
*
* This software is:
- * Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com>
+ * Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com>
* Copyright (c) 2012 Claudio André <claudio.andre at correios.net.br>
* and it is hereby released to the general public under the following terms:
* Redistribution and use in source and binary forms, with or without modification, are permitted.
- *
+ *
* This program comes with ABSOLUTELY NO WARRANTY; express or implied .
*/
@@ -53,10 +53,10 @@ void init_ctx(__local sha512_ctx * ctx) {
}
void copy_data_to_local_memory(
- __constant crypt_sha512_salt * informed_salt,
- __global crypt_sha512_password * pass_data,
- __local crypt_sha512_salt * salt_data,
- __local working_memory * fast_tmp_memory) {
+ __constant sha512_salt * informed_salt,
+ __global sha512_password * pass_data,
+ __local sha512_salt * salt_data,
+ __local working_memory * fast_tmp_memory) {
//Transfer data to faster memory
//Password information
@@ -64,12 +64,12 @@ void copy_data_to_local_memory(
#pragma unroll
for (int i = 0; i < PLAINTEXT_ARRAY; i++)
- fast_tmp_memory->pass_data.pass->mem_64[i] =
- pass_data->pass->mem_64[i];
-
+ fast_tmp_memory->pass_data.pass->mem_64[i] =
+ pass_data->pass->mem_64[i];
+
if (get_local_id(0) == 0){
//Copy salt information to fast local memory. Only once in a group.
- salt_data->length = informed_salt->length;
+ salt_data->length = informed_salt->length;
salt_data->rounds = informed_salt->rounds;
#pragma unroll
@@ -79,17 +79,6 @@ void copy_data_to_local_memory(
mem_fence(CLK_LOCAL_MEM_FENCE);
}
-void insert_to_buffer(__local sha512_ctx * ctx,
- __local const uint8_t * string,
- const uint32_t len) {
- __local uint8_t * dest;
- dest = ctx->buffer->mem_08 + ctx->buflen;
-
- for (int i = 0; i < len; i++)
- PUTCHAR(dest, i, GETCHAR(string, i));
- ctx->buflen += len;
-}
-
void sha512_block(__local sha512_ctx * ctx) {
uint64_t a = ctx->H[0];
uint64_t b = ctx->H[1];
@@ -98,13 +87,13 @@ void sha512_block(__local sha512_ctx * ctx) {
uint64_t e = ctx->H[4];
uint64_t f = ctx->H[5];
uint64_t g = ctx->H[6];
- uint64_t h = ctx->H[7];
+ uint64_t h = ctx->H[7];
uint64_t t1, t2;
uint64_t w[16];
#ifdef VECTOR_USAGE
ulong16 w_vector;
- w_vector = vload16(0, ctx->buffer->mem_64);
+ w_vector = vload16(0, ctx->buffer->mem_64);
w_vector = SWAP64_V(w_vector);
vstore16(w_vector, 0, w);
#else
@@ -141,6 +130,33 @@ void sha512_block(__local sha512_ctx * ctx) {
ctx->H[7] += h;
}
+void insert_to_buffer(__local sha512_ctx * ctx,
+ __local const uint8_t * string,
+ const uint32_t len) {
+ __local uint8_t * dest;
+ dest = ctx->buffer->mem_08 + ctx->buflen;
+
+ for (int i = 0; i < len; i++)
+ PUTCHAR(dest, i, GETCHAR(string, i));
+ ctx->buflen += len;
+}
+
+void ctx_update(__local sha512_ctx * ctx,
+ __local uint8_t * string, uint32_t len) {
+
+ ctx->total += len;
+ uint32_t startpos = ctx->buflen;
+
+ insert_to_buffer(ctx, string, (startpos + len <= 128 ? len : 128 - startpos));
+
+ if (ctx->buflen == 128) { //Branching.
+ sha512_block(ctx);
+ ctx->buflen = 0;
+ uint32_t offset = 128 - startpos;
+ insert_to_buffer(ctx, (string + offset), len - offset);
+ }
+}
+
void ctx_append_1(__local sha512_ctx * ctx) {
uint32_t length = ctx->buflen;
@@ -148,12 +164,12 @@ void ctx_append_1(__local sha512_ctx * ctx) {
while (++length & 3)
PUTCHAR(ctx->buffer->mem_08, length, 0);
-
+
if (length & 7) {
__local uint32_t * l = (__local uint32_t *) (ctx->buffer->mem_08 + length);
*l = 0;
- length += 4;
- }
+ length += 4;
+ }
__local uint64_t * l = (__local uint64_t *) (ctx->buffer->mem_08 + length);
while (length < 128) {
@@ -173,26 +189,10 @@ void finish_ctx(__local sha512_ctx * ctx) {
ctx->buflen = 0;
}
-void ctx_update(__local sha512_ctx * ctx,
- __local uint8_t * string, uint32_t len) {
-
- ctx->total += len;
- uint32_t startpos = ctx->buflen;
-
- insert_to_buffer(ctx, string, (startpos + len <= 128 ? len : 128 - startpos));
-
- if (ctx->buflen == 128) { //Branching.
- sha512_block(ctx);
- ctx->buflen = 0;
- uint32_t offset = 128 - startpos;
- insert_to_buffer(ctx, (string + offset), len - offset);
- }
-}
-
void clear_ctx_buffer(__local sha512_ctx * ctx) {
#ifdef VECTOR_USAGE
- ulong16 w_vector = 0;
+ ulong16 w_vector = 0;
vstore16(w_vector, 0, ctx->buffer->mem_64);
#else
#pragma unroll
@@ -203,7 +203,7 @@ void clear_ctx_buffer(__local sha512_ctx * ctx) {
ctx->buflen = 0;
}
-void sha512_digest(__local sha512_ctx * ctx,
+void sha512_digest(__local sha512_ctx * ctx,
__local uint64_t * result) {
if (ctx->buflen <= 111) { //data+0x80+datasize fits in one 1024bit block
@@ -220,7 +220,7 @@ void sha512_digest(__local sha512_ctx * ctx,
clear_ctx_buffer(ctx);
if (moved) //append 1,the rest is already clean
- PUTCHAR(ctx->buffer->mem_08, 0, 0x80);
+ PUTCHAR(ctx->buffer->mem_08, 0, 0x80);
ctx_add_length(ctx);
}
sha512_block(ctx);
@@ -231,9 +231,9 @@ void sha512_digest(__local sha512_ctx * ctx,
}
-void sha512crypt(__local working_memory * fast_tmp_memory,
- __local crypt_sha512_salt * salt_data,
- __global crypt_sha512_hash * output) {
+void sha512crypt(__local working_memory * fast_tmp_memory,
+ __local sha512_salt * salt_data,
+ __global sha512_hash * output) {
#define pass fast_tmp_memory->pass_data.pass->mem_08
#define passlen fast_tmp_memory->pass_data.length
@@ -244,7 +244,6 @@ void sha512crypt(__local working_memory * fast_tmp_memory,
#define p_sequence fast_tmp_memory->p_sequence
#define ctx fast_tmp_memory->ctx_data
- int rounds;
init_ctx(&ctx);
ctx_update(&ctx, pass, passlen);
@@ -259,8 +258,8 @@ void sha512crypt(__local working_memory * fast_tmp_memory,
ctx_update(&ctx, alt_result->mem_08, passlen);
for (int i = passlen; i > 0; i >>= 1) {
- ctx_update(&ctx, ((i & 1) != 0 ? alt_result->mem_08 : pass),
- ((i & 1) != 0 ? 64 : passlen));
+ ctx_update(&ctx, ((i & 1) ? alt_result->mem_08 : pass),
+ ((i & 1) ? 64 : passlen));
}
sha512_digest(&ctx, alt_result->mem_64);
init_ctx(&ctx);
@@ -269,71 +268,67 @@ void sha512crypt(__local working_memory * fast_tmp_memory,
ctx_update(&ctx, pass, passlen);
sha512_digest(&ctx, p_sequence->mem_64);
- init_ctx(&ctx);
-
- rounds = 16 + alt_result->mem_08[0];
+ init_ctx(&ctx);
/* For every character in the password add the entire password. */
- for (int i = 0; i < rounds; i++)
+ for (int i = 0; i < 16 + alt_result->mem_08[0]; i++)
ctx_update(&ctx, salt, saltlen);
- /* Finish the digest. */
+ /* Finish the digest. */
sha512_digest(&ctx, temp_result->mem_64);
-
- rounds = salt_data->rounds;
- /* Repeatedly run the collected hash value through SHA512 to burn CPU cycles. */
- for (int i = 0; i < rounds; i++) {
+ /* Repeatedly run the collected hash value through SHA512 to burn cycles. */
+ for (int i = 0; i < salt_data->rounds; i++) {
init_ctx(&ctx);
- ctx_update(&ctx, ((i & 1) != 0 ? p_sequence->mem_08 : alt_result->mem_08),
- ((i & 1) != 0 ? passlen : 64));
+ ctx_update(&ctx, ((i & 1) ? p_sequence->mem_08 : alt_result->mem_08),
+ ((i & 1) ? passlen : 64));
- if ((i % 3) != 0)
+ if (i % 3)
ctx_update(&ctx, temp_result->mem_08, saltlen);
- if ((i % 7) != 0)
+ if (i % 7)
ctx_update(&ctx, p_sequence->mem_08, passlen);
- ctx_update(&ctx, ((i & 1) != 0 ? alt_result->mem_08 : p_sequence->mem_08),
- ((i & 1) != 0 ? 64 : passlen));
+ ctx_update(&ctx, ((i & 1) ? alt_result->mem_08 : p_sequence->mem_08),
+ ((i & 1) ? 64 : passlen));
sha512_digest(&ctx, alt_result->mem_64);
}
//Send results to the host.
#pragma unroll
for (int i = 0; i < 8; i++)
- output->v[i] = alt_result[i].mem_64[0];
+ output->v[i] = alt_result[i].mem_64[0];
}
-#undef salt
-#undef saltlen
+#undef salt
+#undef saltlen
#undef pass
__kernel
// __attribute__((vec_type_hint(ulong2))) Not recognized.
// __attribute__((reqd_work_group_size(32, 1, 1))) No gain.
-void kernel_crypt(__constant crypt_sha512_salt * informed_salt,
- __global crypt_sha512_password * pass_data,
- __global crypt_sha512_hash * out_buffer,
- __local crypt_sha512_salt * salt_data,
- __local working_memory * fast_tmp_memory) {
+void kernel_crypt(__constant sha512_salt * salt,
+ __global sha512_password * keys_buffer,
+ __global sha512_hash * out_buffer,
+ __local sha512_salt * salt_data,
+ __local working_memory * tmp_memory) {
//Get the task to be done
size_t gid = get_global_id(0);
size_t lid = get_local_id(0);
- //Copy to faster memory
- copy_data_to_local_memory(informed_salt, &pass_data[gid], salt_data, &fast_tmp_memory[lid]);
+ //Copy data to faster memory
+ copy_data_to_local_memory(salt, &keys_buffer[gid], salt_data, &tmp_memory[lid]);
//Do the job
- sha512crypt(&fast_tmp_memory[lid], salt_data, &out_buffer[gid]);
+ sha512crypt(&tmp_memory[lid], salt_data, &out_buffer[gid]);
}
/***
-* To improve performance, it uses __local memory to keep working variables
-* (password, temp buffers, etc). In SHA 512 it means about 350 bytes per
-* "thread". It improves performance, but, local memory is a scarce
-* resource.
-* It means the max group size allowed in OpenCL SHA 512 is going to be
+* To improve performance, it uses __local memory to keep working variables
+* (password, temp buffers, etc). In SHA 512 it means about 350 bytes per
+* "thread". It improves performance, but, local memory is a scarce
+* resource.
+* It means the max group size allowed in OpenCL SHA 512 is going to be
* 64 (it depends on hardware local memory size).
*
* Gain Optimizations
@@ -345,9 +340,9 @@ void kernel_crypt(__constant crypt_sha512_salt * informed_salt,
* ### Do the compare task on GPU.
* 5% Remove some unecessary code.
* ### Move almost everything to global and local memory. BAD.
-* 1% Use vector types in SHA_Block in some variables.
-* 5% Use bitselect in SHA_Block.
-* 15% Use PUTCHAR macro, only on CPU.
+* 1% Use vector types in SHA_Block in some variables.
+* 5% Use bitselect in SHA_Block.
+* 15% Use PUTCHAR macro, only on CPU.
*
* Conclusions
* - Compare on GPU: CPU is more efficient for now.
@@ -355,9 +350,13 @@ void kernel_crypt(__constant crypt_sha512_salt * informed_salt,
* - No register spilling happens after optimization. Although, might need to use less registers.
* - Tried to use less local memory. Got register spilling again.
* - Vectorized do not give better performance, but result in less instructions.
-* In reality, I'm not doing vector operations (doing the same thing in n bytes),
+* In reality, I'm not doing vector operations (doing the same thing in n bytes),
* so should not expect big gains anyway.
-* If i have a lot of memory, i might solve more than one hash at once
+* If i have a lot of memory, i might solve more than one hash at once
* (and use more vectors). But it is not possible (at least for a while).
-* - Crack process fails if i use PUTCHAR everywhere (seems GPU memory alignment).
+* - Crack process fails if i use PUTCHAR everywhere (seems GPU memory alignment).
+* - Tried to break this program into 3 kernels controled by CPU (for i=0; i<rounds on CPU).
+* No gain, but the final performance was almost the same of this version.
+* - Tried to break this program into 2 kernels (prepare and crypt). Prepare do the job done
+* outside the for loop. The gain was only 1%.
***/
View
145 src/opencl/cryptsha512_kernel_CPU.cl
@@ -1,15 +1,15 @@
/*
- * Developed by Claudio André <claudio.andre at correios.net.br> in 2012
+ * Developed by Claudio André <claudio.andre at correios.net.br> in 2012
* Based on source code provided by Lukas Odzioba
*
* More information at http://openwall.info/wiki/john/OpenCL-SHA-512
*
* This software is:
- * Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com>
+ * Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com>
* Copyright (c) 2012 Claudio André <claudio.andre at correios.net.br>
* and it is hereby released to the general public under the following terms:
* Redistribution and use in source and binary forms, with or without modification, are permitted.
- *
+ *
* This program comes with ABSOLUTELY NO WARRANTY; express or implied .
*/
@@ -52,8 +52,8 @@ void init_ctx(sha512_ctx * ctx) {
ctx->buflen = 0;
}
-inline void memcpy( uint8_t * dest,
- const uint8_t * src,
+inline void memcpy( uint8_t * dest,
+ const uint8_t * src,
const uint32_t destlen, const uint32_t srclen) {
int i = 0;
@@ -66,8 +66,8 @@ inline void memcpy( uint8_t * dest,
}
}
-inline void memcpy_G( uint8_t * dest,
- __global const uint8_t * src,
+inline void memcpy_G( uint8_t * dest,
+ __global const uint8_t * src,
const uint32_t destlen, const uint32_t srclen) {
int i = 0;
@@ -80,26 +80,6 @@ inline void memcpy_G( uint8_t * dest,
}
}
-void insert_to_buffer(sha512_ctx * ctx,
- const uint8_t * string,
- const uint32_t len) {
- uint8_t *d;
- d = ctx->buffer->mem_08 + ctx->buflen;
-
- memcpy(d, string, ctx->buflen, len);
- ctx->buflen += len;
-}
-
-void insert_to_buffer_G( sha512_ctx * ctx,
- __global const uint8_t * string,
- const uint32_t len) {
- uint8_t *d;
- d = ctx->buffer->mem_08 + ctx->buflen;
-
- memcpy_G(d, string, ctx->buflen, len);
- ctx->buflen += len;
-}
-
void sha512_block(sha512_ctx * ctx) {
uint64_t a = ctx->H[0];
uint64_t b = ctx->H[1];
@@ -157,35 +137,28 @@ void sha512_block(sha512_ctx * ctx) {
ctx->H[7] += h;
}
-void ctx_append_1(sha512_ctx * ctx) {
-
- uint32_t length = ctx->buflen;
- PUTCHAR(ctx->buffer->mem_08, length, 0x80);
-
- while (++length & 7)
- PUTCHAR(ctx->buffer->mem_08, length, 0);
-
- uint64_t * l = (uint64_t *) (ctx->buffer->mem_08 + length);
-
- while (length < 128) {
- *l++ = 0;
- length += 8;
- }
-}
-void ctx_add_length(sha512_ctx * ctx) {
+void insert_to_buffer(sha512_ctx * ctx,
+ const uint8_t * string,
+ const uint32_t len) {
+ uint8_t *d;
+ d = ctx->buffer->mem_08 + ctx->buflen;
- ctx->buffer->mem_64[15] = SWAP64((uint64_t) (ctx->total * 8));
+ memcpy(d, string, ctx->buflen, len);
+ ctx->buflen += len;
}
-void finish_ctx(sha512_ctx * ctx) {
+void insert_to_buffer_G( sha512_ctx * ctx,
+ __global const uint8_t * string,
+ const uint32_t len) {
+ uint8_t *d;
+ d = ctx->buffer->mem_08 + ctx->buflen;
- ctx_append_1(ctx);
- ctx_add_length(ctx);
- ctx->buflen = 0;
+ memcpy_G(d, string, ctx->buflen, len);
+ ctx->buflen += len;
}
-void ctx_update(sha512_ctx * ctx,
+void ctx_update(sha512_ctx * ctx,
uint8_t * string, uint32_t len) {
ctx->total += len;
@@ -201,7 +174,7 @@ void ctx_update(sha512_ctx * ctx,
}
}
-void ctx_update_G( sha512_ctx * ctx,
+void ctx_update_G( sha512_ctx * ctx,
__global uint8_t * string, uint32_t len) {
ctx->total += len;
@@ -217,6 +190,34 @@ void ctx_update_G( sha512_ctx * ctx,
}
}
+void ctx_append_1(sha512_ctx * ctx) {
+
+ uint32_t length = ctx->buflen;
+ PUTCHAR(ctx->buffer->mem_08, length, 0x80);
+
+ while (++length & 7)
+ PUTCHAR(ctx->buffer->mem_08, length, 0);
+
+ uint64_t * l = (uint64_t *) (ctx->buffer->mem_08 + length);
+
+ while (length < 128) {
+ *l++ = 0;
+ length += 8;
+ }
+}
+
+void ctx_add_length(sha512_ctx * ctx) {
+
+ ctx->buffer->mem_64[15] = SWAP64((uint64_t) (ctx->total * 8));
+}
+
+void finish_ctx(sha512_ctx * ctx) {
+
+ ctx_append_1(ctx);
+ ctx_add_length(ctx);
+ ctx->buflen = 0;
+}
+
void clear_ctx_buffer(sha512_ctx * ctx) {
#pragma unroll
@@ -226,7 +227,7 @@ void clear_ctx_buffer(sha512_ctx * ctx) {
ctx->buflen = 0;
}
-void sha512_digest(sha512_ctx * ctx,
+void sha512_digest(sha512_ctx * ctx,
uint64_t * result) {
if (ctx->buflen <= 111) { //data+0x80+datasize fits in one 1024bit block
@@ -253,9 +254,9 @@ void sha512_digest(sha512_ctx * ctx,
result[i] = SWAP64(ctx->H[i]);
}
-void sha512crypt(__global crypt_sha512_salt * salt_data,
- __global crypt_sha512_password * pass_data,
- __global crypt_sha512_hash * output) {
+void sha512crypt(__global sha512_salt * salt_data,
+ __global sha512_password * pass_data,
+ __global sha512_hash * output) {
#define pass pass_data->pass->mem_08
#define passlen pass_data->length
@@ -284,7 +285,7 @@ void sha512crypt(__global crypt_sha512_salt * salt_data,
for (uint32_t i = passlen; i > 0; i >>= 1) {
- if ((i & 1) != 0)
+ if (i & 1)
ctx_update(&ctx, alt_result->mem_08, 64);
else
ctx_update_G(&ctx, pass, passlen);
@@ -298,7 +299,7 @@ void sha512crypt(__global crypt_sha512_salt * salt_data,
sha512_digest(&ctx, p_sequence->mem_64);
init_ctx(&ctx);
-
+
/* For every character in the password add the entire password. */
for (uint32_t i = 0; i < 16 + alt_result->mem_08[0]; i++)
ctx_update_G(&ctx, salt, saltlen);
@@ -311,41 +312,37 @@ void sha512crypt(__global crypt_sha512_salt * salt_data,
for (uint32_t i = 0; i < rounds; i++) {
init_ctx(&ctx);
- ctx_update(&ctx, ((i & 1) != 0 ? p_sequence->mem_08 : alt_result->mem_08),
- ((i & 1) != 0 ? passlen : 64));
+ ctx_update(&ctx, ((i & 1) ? p_sequence->mem_08 : alt_result->mem_08),
+ ((i & 1) ? passlen : 64));
- if ((i % 3) != 0)
+ if (i % 3)
ctx_update(&ctx, temp_result->mem_08, saltlen);
- if ((i % 7) != 0)
+ if (i % 7)
ctx_update(&ctx, p_sequence->mem_08, passlen);
- ctx_update(&ctx, ((i & 1) != 0 ? alt_result->mem_08 : p_sequence->mem_08),
- ((i & 1) != 0 ? 64 : passlen));
+ ctx_update(&ctx, ((i & 1) ? alt_result->mem_08 : p_sequence->mem_08),
+ ((i & 1) ? 64 : passlen));
sha512_digest(&ctx, alt_result->mem_64);
}
//Send results to the host.
#pragma unroll
for (int i = 0; i < 8; i++)
- output->v[i] = alt_result[i].mem_64[0];
+ output->v[i] = alt_result[i].mem_64[0];
}
-#undef salt
-#undef saltlen
-#undef rounds
+#undef salt
+#undef saltlen
+#undef rounds
#undef pass
__kernel
-// __attribute__((vec_type_hint(ulong2))) Not recognized.
-// __attribute__((reqd_work_group_size(32, 1, 1))) No gain.
-void kernel_crypt(__global crypt_sha512_salt * informed_salt,
- __global crypt_sha512_password * pass_data,
- __global crypt_sha512_hash * out_buffer,
- __local crypt_sha512_salt * salt_data,
- __local working_memory * fast_tmp_memory) {
+void kernel_crypt(__global sha512_salt * informed_salt,
+ __global sha512_password * pass_data,
+ __global sha512_hash * out_buffer) {
//Get the task to be done
uint32_t gid = get_global_id(0);
//Do the job
- sha512crypt(informed_salt, &pass_data[gid], &out_buffer[gid]);
+ sha512crypt(informed_salt, &pass_data[gid], &out_buffer[gid]);
}
View
203 src/opencl/cryptsha512_kernel_NVIDIA.cl
@@ -1,15 +1,15 @@
/*
- * Developed by Claudio André <claudio.andre at correios.net.br> in 2012
+ * Developed by Claudio André <claudio.andre at correios.net.br> in 2012
* Based on source code provided by Lukas Odzioba
*
* More information at http://openwall.info/wiki/john/OpenCL-SHA-512
*
* This software is:
- * Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com>
+ * Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com>
* Copyright (c) 2012 Claudio André <claudio.andre at correios.net.br>
* and it is hereby released to the general public under the following terms:
* Redistribution and use in source and binary forms, with or without modification, are permitted.
- *
+ *
* This program comes with ABSOLUTELY NO WARRANTY; express or implied .
*/
@@ -53,23 +53,22 @@ void init_ctx(sha512_ctx * ctx) {
}
void copy_data_to_local_memory(
- __constant crypt_sha512_salt * informed_salt,
- __global crypt_sha512_password * pass_data,
- __local crypt_sha512_salt * salt_data,
- __local working_memory * fast_tmp_memory) {
+ __constant sha512_salt * informed_salt,
+ __global sha512_password * pass_data,
+ __local sha512_salt * salt_data,
+ __local sha512_password * fast_tmp_memory) {
//Transfer data to faster memory
//Password information
- fast_tmp_memory->pass_data.length = pass_data->length;
+ fast_tmp_memory->length = pass_data->length;
#pragma unroll
for (int i = 0; i < PLAINTEXT_ARRAY; i++)
- fast_tmp_memory->pass_data.pass->mem_64[i] =
- pass_data->pass->mem_64[i];
-
+ fast_tmp_memory->pass->mem_64[i] = pass_data->pass->mem_64[i];
+
if (get_local_id(0) == 0){
//Copy salt information to fast local memory. Only once in a group.
- salt_data->length = informed_salt->length;
+ salt_data->length = informed_salt->length;
salt_data->rounds = informed_salt->rounds;
#pragma unroll
@@ -79,30 +78,6 @@ void copy_data_to_local_memory(
mem_fence(CLK_LOCAL_MEM_FENCE);
}
-void insert_to_buffer(sha512_ctx * ctx,
- const uint8_t * string,
- const uint32_t len) {
- uint8_t *d;
- d = ctx->buffer->mem_08 + ctx->buflen; //ctx->buffer[buflen] (in char size)
-
- for (uint32_t i = 0; i < len; i++)
- PUTCHAR(d, i, GETCHAR(string, i));
-
- ctx->buflen += len;
-}
-
-void insert_to_buffer_L( sha512_ctx * ctx,
- __local const uint8_t * string,
- const uint32_t len) {
- uint8_t *d;
- d = ctx->buffer->mem_08 + ctx->buflen; //ctx->buffer[buflen] (in char size)
-
- for (uint32_t i = 0; i < len; i++)
- PUTCHAR(d, i, GETCHAR(string, i));
-
- ctx->buflen += len;
-}
-
void sha512_block(sha512_ctx * ctx) {
uint64_t a = ctx->H[0];
uint64_t b = ctx->H[1];
@@ -134,7 +109,7 @@ void sha512_block(sha512_ctx * ctx) {
a = t1 + t2;
}
- //#pragma unroll *** NVIDIA Compiler segfaults ***
+ #pragma unroll 16 // NVIDIA Compiler segfaults if uses: "#pragma unroll"
for (int i = 16; i < 80; i++) {
w[i & 15] = sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i - 16) & 15] + w[(i - 7) & 15];
t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g);
@@ -160,40 +135,31 @@ void sha512_block(sha512_ctx * ctx) {
ctx->H[7] += h;
}
-void ctx_append_1(sha512_ctx * ctx) {
-
- uint32_t length = ctx->buflen;
- PUTCHAR(ctx->buffer->mem_08, length, 0x80);
+void insert_to_buffer(sha512_ctx * ctx,
+ const uint8_t * string,
+ const uint32_t len) {
+ uint8_t *d;
+ d = ctx->buffer->mem_08 + ctx->buflen; //ctx->buffer[buflen] (in char size)
- while (++length & 3)
- PUTCHAR(ctx->buffer->mem_08, length, 0);
-
- if (length & 7) {
- uint32_t * l = (uint32_t *) (ctx->buffer->mem_08 + length);
- *l = 0;
- length += 4;
- }
- uint64_t * l = (uint64_t *) (ctx->buffer->mem_08 + length);
+ for (uint32_t i = 0; i < len; i++)
+ PUTCHAR(d, i, GETCHAR(string, i));
- while (length < 128) {
- *l++ = 0;
- length += 8;
- }
+ ctx->buflen += len;
}
-void ctx_add_length(sha512_ctx * ctx) {
-
- ctx->buffer->mem_64[15] = SWAP64((uint64_t) (ctx->total * 8));
-}
+void insert_to_buffer_L( sha512_ctx * ctx,
+ __local const uint8_t * string,
+ const uint32_t len) {
+ uint8_t *d;
+ d = ctx->buffer->mem_08 + ctx->buflen; //ctx->buffer[buflen] (in char size)
-void finish_ctx(sha512_ctx * ctx) {
+ for (uint32_t i = 0; i < len; i++)
+ PUTCHAR(d, i, GETCHAR(string, i));
- ctx_append_1(ctx);
- ctx_add_length(ctx);
- ctx->buflen = 0;
+ ctx->buflen += len;
}
-void ctx_update(sha512_ctx * ctx,
+void ctx_update(sha512_ctx * ctx,
uint8_t * string, uint32_t len) {
ctx->total += len;
@@ -209,7 +175,7 @@ void ctx_update(sha512_ctx * ctx,
}
}
-void ctx_update_L( sha512_ctx * ctx,
+void ctx_update_L( sha512_ctx * ctx,
__local uint8_t * string, uint32_t len) {
ctx->total += len;
@@ -225,6 +191,39 @@ void ctx_update_L( sha512_ctx * ctx,
}
}
+void ctx_append_1(sha512_ctx * ctx) {
+
+ uint32_t length = ctx->buflen;
+ PUTCHAR(ctx->buffer->mem_08, length, 0x80);
+
+ while (++length & 3)
+ PUTCHAR(ctx->buffer->mem_08, length, 0);
+
+ if (length & 7) {
+ uint32_t * l = (uint32_t *) (ctx->buffer->mem_08 + length);
+ *l = 0;
+ length += 4;
+ }
+ uint64_t * l = (uint64_t *) (ctx->buffer->mem_08 + length);
+
+ while (length < 128) {
+ *l++ = 0;
+ length += 8;
+ }
+}
+
+void ctx_add_length(sha512_ctx * ctx) {
+
+ ctx->buffer->mem_64[15] = SWAP64((uint64_t) (ctx->total * 8));
+}
+
+void finish_ctx(sha512_ctx * ctx) {
+
+ ctx_append_1(ctx);
+ ctx_add_length(ctx);
+ ctx->buflen = 0;
+}
+
void clear_ctx_buffer(sha512_ctx * ctx) {
#pragma unroll
@@ -234,7 +233,7 @@ void clear_ctx_buffer(sha512_ctx * ctx) {
ctx->buflen = 0;
}
-void sha512_digest(sha512_ctx * ctx,
+void sha512_digest(sha512_ctx * ctx,
uint64_t * result) {
if (ctx->buflen <= 111) { //data+0x80+datasize fits in one 1024bit block
@@ -251,7 +250,7 @@ void sha512_digest(sha512_ctx * ctx,
clear_ctx_buffer(ctx);
if (moved) //append 1,the rest is already clean
- PUTCHAR(ctx->buffer->mem_08, 0, 0x80);
+ PUTCHAR(ctx->buffer->mem_08, 0, 0x80);
ctx_add_length(ctx);
}
sha512_block(ctx);
@@ -261,21 +260,20 @@ void sha512_digest(sha512_ctx * ctx,
result[i] = SWAP64(ctx->H[i]);
}
-void sha512crypt(__local working_memory * tmp_memory,
- __local crypt_sha512_salt * salt_data,
- __global crypt_sha512_hash * output) {
+void sha512crypt(__local sha512_password * pass_data,
+ __local sha512_salt * salt_data,
+ __global sha512_hash * output) {
-#define pass tmp_memory->pass_data.pass->mem_08
-#define passlen tmp_memory->pass_data.length
+#define pass pass_data->pass->mem_08
+#define passlen pass_data->length
#define salt salt_data->salt->mem_08
#define saltlen salt_data->length
-#define alt_result fast_tmp_memory.alt_result
-#define temp_result fast_tmp_memory.temp_result
-#define p_sequence fast_tmp_memory.p_sequence
-#define ctx fast_tmp_memory.ctx_data
+#define alt_result tmp_memory.alt_result
+#define temp_result tmp_memory.temp_result
+#define p_sequence tmp_memory.p_sequence
+#define ctx tmp_memory.ctx_data
- int rounds;
- working_memory fast_tmp_memory;
+ sha512_buffer tmp_memory;
init_ctx(&ctx);
@@ -292,7 +290,7 @@ void sha512crypt(__local working_memory * tmp_memory,
for (uint32_t i = passlen; i > 0; i >>= 1) {
- if ((i & 1) != 0)
+ if (i & 1)
ctx_update(&ctx, alt_result->mem_08, 64);
else
ctx_update_L(&ctx, pass, passlen);
@@ -306,60 +304,55 @@ void sha512crypt(__local working_memory * tmp_memory,
sha512_digest(&ctx, p_sequence->mem_64);
init_ctx(&ctx);
- rounds = 16 + alt_result->mem_08[0];
-
/* For every character in the password add the entire password. */
- for (int i = 0; i < rounds; i++)
+ for (int i = 0; i < 16 + alt_result->mem_08[0]; i++)
ctx_update_L(&ctx, salt, saltlen);
/* Finish the digest. */
sha512_digest(&ctx, temp_result->mem_64);
- rounds = salt_data->rounds;
-
- /* Repeatedly run the collected hash value through SHA512 to burn CPU cycles. */
- for (int i = 0; i < rounds; i++) {
+ /* Repeatedly run the collected hash value through SHA512 to burn cycles. */
+ for (uint32_t i = 0; i < salt_data->rounds; i++) {
init_ctx(&ctx);
- ctx_update(&ctx, ((i & 1) != 0 ? p_sequence->mem_08 : alt_result->mem_08),
- ((i & 1) != 0 ? passlen : 64));
+ ctx_update(&ctx, ((i & 1) ? p_sequence->mem_08 : alt_result->mem_08),
+ ((i & 1) ? passlen : 64));
- if ((i % 3) != 0)
+ if (i % 3)
ctx_update(&ctx, temp_result->mem_08, saltlen);
- if ((i % 7) != 0)
+ if (i % 7)
ctx_update(&ctx, p_sequence->mem_08, passlen);
- ctx_update(&ctx, ((i & 1) != 0 ? alt_result->mem_08 : p_sequence->mem_08),
- ((i & 1) != 0 ? 64 : passlen));
+ ctx_update(&ctx, ((i & 1) ? alt_result->mem_08 : p_sequence->mem_08),
+ ((i & 1) ? 64 : passlen));
sha512_digest(&ctx, alt_result->mem_64);
}
//Send results to the host.
#pragma unroll
for (int i = 0; i < 8; i++)
- output->v[i] = alt_result[i].mem_64[0];
+ output->v[i] = alt_result[i].mem_64[0];
}
-#undef salt
-#undef saltlen
-#undef rounds
+#undef salt
+#undef saltlen
+#undef rounds
#undef pass
__kernel
-// __attribute__((vec_type_hint(ulong2))) Not recognized.
-// __attribute__((reqd_work_group_size(32, 1, 1))) No gain.
-void kernel_crypt(__constant crypt_sha512_salt * informed_salt,
- __global crypt_sha512_password * pass_data,
- __global crypt_sha512_hash * out_buffer,
- __local crypt_sha512_salt * salt_data,
- __local working_memory * fast_tmp_memory) {
+void kernel_crypt(__constant sha512_salt * salt,
+ __global sha512_password * keys_buffer,
+ __global sha512_hash * out_buffer) {
//Get the task to be done
size_t gid = get_global_id(0);
size_t lid = get_local_id(0);
- //Copy to faster memory
- copy_data_to_local_memory(informed_salt, &pass_data[gid], salt_data, &fast_tmp_memory[lid]);
+ __local sha512_salt salt_data[1];
+ __local sha512_password tmp_memory[512];
+
+ //Copy data to faster memory
+ copy_data_to_local_memory(salt, &keys_buffer[gid], salt_data, &tmp_memory[lid]);
//Do the job
- sha512crypt(&fast_tmp_memory[lid], salt_data, &out_buffer[gid]);
+ sha512crypt(&tmp_memory[lid], salt_data, &out_buffer[gid]);
}
View
295 src/opencl_bf_fmt.c
@@ -0,0 +1,295 @@
+/*
+ * This file is part of John the Ripper password cracker,
+ * Copyright (c) 2012 by Sayantan Datta <std2048 at gmail dot com>
+ * It is hereby released to the general public under the following terms:
+ * Redistribution and use in source and binary forms, with or without modification, are permitted.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "arch.h"
+#include "misc.h"
+#include "opencl_bf_std.h"
+#include "common.h"
+#include "formats.h"
+
+#define FORMAT_LABEL "bf-opencl"
+#define FORMAT_NAME "OpenBSD Blowfish OpenCL"
+
+#define BENCHMARK_COMMENT ""
+#define BENCHMARK_LENGTH -1
+
+#define PLAINTEXT_LENGTH 72
+#define CIPHERTEXT_LENGTH 60
+
+#define BINARY_SIZE 4
+#define SALT_SIZE sizeof(BF_salt)
+
+#define MIN_KEYS_PER_CRYPT BF_N
+#define MAX_KEYS_PER_CRYPT BF_N
+
+#define OPENCL_BF_ALGORITHM_NAME "BF_OPENCL"
+
+static struct fmt_tests tests[] = {
+ {"$2a$05$CCCCCCCCCCCCCCCCCCCCC.E5YPO9kmyuRGyh0XouQYb4YMJKvyOeW",
+ "U*U"},
+ {"$2a$05$CCCCCCCCCCCCCCCCCCCCC.VGOzA784oUp/Z0DY336zx7pLYAy0lwK",
+ "U*U*"},
+ {"$2a$05$XXXXXXXXXXXXXXXXXXXXXOAcXxm9kjPGEMsLznoKqmqw7tc8WCx4a",
+ "U*U*U"},
+ {"$2a$05$CCCCCCCCCCCCCCCCCCCCC.7uG0VCzI2bS7j6ymqJi9CdcdxiRTWNy",
+ ""},
+ {"$2a$05$abcdefghijklmnopqrstuu5s2v8.iXieOjg/.AySBTTZIIVFJeBui",
+ "0123456789abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+ "chars after 72 are ignored"},
+ {"$2x$05$/OK.fbVrR/bpIqNJ5ianF.CE5elHaaO4EbggVDjb8P19RukzXSM3e",
+ "\xa3"},
+ {"$2a$05$/OK.fbVrR/bpIqNJ5ianF.Sa7shbm4.OzKpvFnX1pQLmQW96oUlCq",
+ "\xa3"},
+ {"$2x$05$6bNw2HLQYeqHYyBfLMsv/OiwqTymGIGzFsA4hOTWebfehXHNprcAS",
+ "\xd1\x91"},
+ {"$2x$05$6bNw2HLQYeqHYyBfLMsv/O9LIGgn8OMzuDoHfof8AQimSGfcSWxnS",
+ "\xd0\xc1\xd2\xcf\xcc\xd8"},
+ {"$2a$05$/OK.fbVrR/bpIqNJ5ianF.swQOIzjOiJ9GHEPuhEkvqrUyvWhEMx6",
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+ "chars after 72 are ignored as usual"},
+ {"$2a$05$/OK.fbVrR/bpIqNJ5ianF.R9xrDjiycxMbQE2bp.vgqlYpW5wx2yy",
+ "\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55"
+ "\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55"
+ "\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55"
+ "\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55"
+ "\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55"
+ "\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55"},
+ {"$2a$05$CCCCCCCCCCCCCCCCCCCCC.7uG0VCzI2bS7j6ymqJi9CdcdxiRTWNy",
+ ""},
+ {"$2a$05$/OK.fbVrR/bpIqNJ5ianF.9tQZzcJfm3uj2NvJ/n5xkhpqLrMpWCe",
+ "\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff"
+ "\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff"
+ "\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff"
+ "\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff"
+ "\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff"
+ "\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff"},
+ {NULL}
+};
+
+static char saved_key[BF_N][PLAINTEXT_LENGTH + 1];
+static char keys_mode;
+static int sign_extension_bug;
+static BF_salt saved_salt;
+
+static void init(struct fmt_main *pFmt)
+{ // BF_select_device(platform,device);
+ BF_select_device(0,0);
+ keys_mode = 'a';
+ sign_extension_bug = 0;
+}
+
+static int valid(char *ciphertext,struct fmt_main *pFmt)
+{
+ int rounds;
+ char *pos;
+
+ if (strncmp(ciphertext, "$2a$", 4) &&
+ strncmp(ciphertext, "$2x$", 4)) return 0;
+
+ if (ciphertext[4] < '0' || ciphertext[4] > '9') return 0;
+ if (ciphertext[5] < '0' || ciphertext[5] > '9') return 0;
+ rounds = atoi(ciphertext + 4);
+ if (rounds < 4 || rounds > 31) return 0;
+
+ if (ciphertext[6] != '$') return 0;
+
+ for (pos = &ciphertext[7]; atoi64[ARCH_INDEX(*pos)] != 0x7F; pos++);
+ if (*pos || pos - ciphertext != CIPHERTEXT_LENGTH) return 0;
+
+ if (opencl_BF_atoi64[ARCH_INDEX(*(pos - 1))] & 3) return 0;
+ if (opencl_BF_atoi64[ARCH_INDEX(ciphertext[28])] & 0xF) return 0;
+
+ return 1;
+}
+
+static int binary_hash_0(void *binary)
+{
+ return *(BF_word *)binary & 0xF;
+}
+
+static int binary_hash_1(void *binary)
+{
+ return *(BF_word *)binary & 0xFF;
+}
+
+static int binary_hash_2(void *binary)
+{
+ return *(BF_word *)binary & 0xFFF;
+}
+
+static int binary_hash_3(void *binary)
+{
+ return *(BF_word *)binary & 0xFFFF;
+}
+
+static int binary_hash_4(void *binary)
+{
+ return *(BF_word *)binary & 0xFFFFF;
+}
+
+static int binary_hash_5(void *binary)
+{
+ return *(BF_word *)binary & 0xFFFFFF;
+}
+
+static int binary_hash_6(void *binary)
+{
+ return *(BF_word *)binary & 0x7FFFFFF;
+}
+
+static int get_hash_0(int index)
+{
+ return opencl_BF_out[index][0] & 0xF;
+}
+
+static int get_hash_1(int index)
+{
+ return opencl_BF_out[index][0] & 0xFF;
+}
+
+static int get_hash_2(int index)
+{
+ return opencl_BF_out[index][0] & 0xFFF;
+}
+
+static int get_hash_3(int index)
+{
+ return opencl_BF_out[index][0] & 0xFFFF;
+}
+
+static int get_hash_4(int index)
+{
+ return opencl_BF_out[index][0] & 0xFFFFF;
+}
+
+static int get_hash_5(int index)
+{
+ return opencl_BF_out[index][0] & 0xFFFFFF;
+}
+
+static int get_hash_6(int index)
+{
+ return opencl_BF_out[index][0] & 0x7FFFFFF;
+}
+
+static int salt_hash(void *salt)
+{
+ return ((BF_salt *)salt)->salt[0] & 0x3FF;
+}
+
+static void set_salt(void *salt)
+{
+ memcpy(&saved_salt, salt, sizeof(saved_salt));
+}
+
+static void set_key(char *key, int index)
+{
+ opencl_BF_std_set_key(key, index, sign_extension_bug);
+
+ strnzcpy(saved_key[index], key, PLAINTEXT_LENGTH + 1);
+}
+
+static char *get_key(int index)
+{
+ return saved_key[index];
+}
+
+static void crypt_all(int count)
+{
+ if (keys_mode != saved_salt.subtype) {
+ int i;
+
+ keys_mode = saved_salt.subtype;
+ sign_extension_bug = (keys_mode == 'x');
+ for (i = 0; i < count; i++)
+ opencl_BF_std_set_key(saved_key[i], i, sign_extension_bug);
+ }
+
+ opencl_BF_std_crypt(&saved_salt, count);
+}
+
+static int cmp_all(void *binary, int count)
+{
+ int i;
+ for (i = 0; i < count; i++)
+ if (*(BF_word *)binary == opencl_BF_out[i][0])
+ return 1;
+ return 0;
+}
+
+static int cmp_one(void *binary, int index)
+{
+ return *(BF_word *)binary == opencl_BF_out[index][0];
+}
+
+static int cmp_exact(char *source, int index)
+{
+ opencl_BF_std_crypt_exact(index);
+
+ return !memcmp(opencl_BF_std_get_binary(source), opencl_BF_out[index],
+ sizeof(BF_binary));
+}
+
+struct fmt_main fmt_opencl_bf = {
+ {
+ FORMAT_LABEL,
+ FORMAT_NAME,
+ OPENCL_BF_ALGORITHM_NAME,
+ BENCHMARK_COMMENT,
+ BENCHMARK_LENGTH,
+ PLAINTEXT_LENGTH,
+ BINARY_SIZE,
+ SALT_SIZE,
+ MIN_KEYS_PER_CRYPT,
+ MAX_KEYS_PER_CRYPT,
+ FMT_CASE | FMT_8_BIT,
+ tests
+ }, {
+ init,
+ fmt_default_prepare,
+ valid,
+ fmt_default_split,
+ opencl_BF_std_get_binary,
+ opencl_BF_std_get_salt,
+ {
+ binary_hash_0,
+ binary_hash_1,
+ binary_hash_2,
+ binary_hash_3,
+ binary_hash_4,
+ binary_hash_5,
+ binary_hash_6
+ },
+ salt_hash,
+ set_salt,
+ set_key,
+ get_key,
+ fmt_default_clear_keys,
+ crypt_all,
+ {
+ get_hash_0,
+ get_hash_1,
+ get_hash_2,
+ get_hash_3,
+ get_hash_4,
+ get_hash_5,
+ get_hash_6
+ },
+ cmp_all,
+ cmp_one,
+ cmp_exact
+ }
+};
View
647 src/opencl_bf_std.c
@@ -0,0 +1,647 @@
+/*
+* This software is Copyright (c) 2012 Sayantan Datta <std2048 at gmail dot com>
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+* Based on Solar Designer implementation of bf_std.c in jtr-v1.7.8
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include "arch.h"
+#include "common.h"
+#include "opencl_bf_std.h"
+
+BF_binary opencl_BF_out[BF_N];
+
+/* Number of Blowfish rounds, this is also hardcoded into a few places */
+#define BF_ROUNDS 16
+
+typedef BF_word BF_key[BF_ROUNDS + 2];
+
+struct BF_ctx_S {
+ BF_word S[4][0x100];
+};
+
+struct BF_ctx_P{
+ BF_key P;
+};
+#define INDICES [BF_N]
+#define INDEX [index]
+#define for_each_index() \
+ for (index = 0; index < BF_N; index++)
+
+static struct BF_ctx_S CC_CACHE_ALIGN BF_current_S INDICES;
+static struct BF_ctx_P CC_CACHE_ALIGN BF_current_P INDICES;
+
+/* Current Blowfish key */
+static BF_key CC_CACHE_ALIGN BF_exp_key INDICES;
+#if defined(__linux__) && defined(__sparc__)
+static BF_key BF_init_key INDICES;
+#else
+static BF_key CC_CACHE_ALIGN BF_init_key INDICES;
+#endif
+
+/*
+ * Magic IV for 64 Blowfish encryptions that we do at the end.
+ * The string is "OrpheanBeholderScryDoubt" on big-endian.
+ */
+static BF_word BF_magic_w[6] = {
+ 0x4F727068, 0x65616E42, 0x65686F6C,
+ 0x64657253, 0x63727944, 0x6F756274
+};
+
+/*
+ * P-box and S-box tables initialized with digits of Pi.
+ */
+
+static struct BF_ctx_P BF_init_state_P = {
+ {
+ 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
+ 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
+ 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
+ 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917,
+ 0x9216d5d9, 0x8979fb1b
+ }
+};
+
+static uint state_S[1024] ={
+ 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
+ 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
+ 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
+ 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e,
+ 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee,
+ 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
+ 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef,
+ 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e,
+ 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
+ 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440,
+ 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce,
+ 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
+ 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e,
+ 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677,
+ 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
+ 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032,
+ 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88,
+ 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
+ 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e,
+ 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0,
+ 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
+ 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98,
+ 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88,
+ 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
+ 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6,
+ 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d,
+ 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
+ 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7,
+ 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba,
+ 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
+ 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f,
+ 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09,
+ 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
+ 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb,
+ 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279,
+ 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
+ 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab,
+ 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82,
+ 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
+ 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573,
+ 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0,
+ 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
+ 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790,
+ 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8,
+ 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
+ 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0,
+ 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7,
+ 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
+ 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad,
+ 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1,
+ 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
+ 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9,
+ 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477,
+ 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
+ 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49,
+ 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af,
+ 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
+ 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5,
+ 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41,
+ 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
+ 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400,
+ 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915,
+ 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
+ 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a,
+
+ 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
+ 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
+ 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
+ 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e,
+ 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6,
+ 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
+ 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e,
+ 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1,
+ 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
+ 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8,
+ 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff,
+ 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
+ 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701,
+ 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7,
+ 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
+ 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331,
+ 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf,
+ 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
+ 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e,
+ 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87,
+ 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
+ 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2,
+ 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16,
+ 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
+ 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b,
+ 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509,
+ 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
+ 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3,
+ 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f,
+ 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
+ 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4,
+ 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960,
+ 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
+ 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28,
+ 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802,
+ 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
+ 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510,
+ 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf,
+ 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
+ 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e,
+ 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50,
+ 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
+ 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8,
+ 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281,
+ 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
+ 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696,
+ 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128,
+ 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
+ 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0,
+ 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0,
+ 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
+ 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250,
+ 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3,
+ 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
+ 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00,
+ 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061,
+ 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
+ 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e,
+ 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735,
+ 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
+ 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9,
+ 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340,
+ 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
+ 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7,
+
+ 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
+ 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
+ 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
+ 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840,
+ 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45,
+ 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
+ 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a,
+ 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb,
+ 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
+ 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6,
+ 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42,
+ 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
+ 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2,
+ 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb,
+ 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
+ 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b,
+ 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33,
+ 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
+ 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3,
+ 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc,
+ 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
+ 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564,
+ 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b,
+ 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
+ 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922,
+ 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728,
+ 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
+ 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e,
+ 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37,
+ 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
+ 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804,
+ 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b,
+ 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
+ 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb,
+ 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d,
+ 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
+ 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350,
+ 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9,
+ 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
+ 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe,
+ 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d,
+ 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
+ 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f,
+ 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61,
+ 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
+ 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9,
+ 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2,
+ 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
+ 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e,
+ 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633,
+ 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
+ 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169,
+ 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52,
+ 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
+ 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5,
+ 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62,
+ 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
+ 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76,
+ 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24,
+ 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
+ 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4,
+ 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c,
+ 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
+ 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0,
+
+ 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
+ 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
+ 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
+ 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4,
+ 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8,
+ 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
+ 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304,
+ 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22,
+ 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
+ 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6,
+ 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9,
+ 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
+ 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593,
+ 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51,
+ 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
+ 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c,
+ 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b,
+ 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
+ 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c,
+ 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd,
+ 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
+ 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319,
+ 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb,
+ 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
+ 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991,
+ 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32,
+ 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
+ 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166,
+ 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae,
+ 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
+ 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5,
+ 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47,
+ 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
+ 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d,
+ 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84,
+ 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
+ 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8,
+ 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd,
+ 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
+ 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7,
+ 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38,
+ 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
+ 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c,
+ 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525,
+ 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
+ 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442,
+ 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964,
+ 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
+ 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8,
+ 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d,
+ 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
+ 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299,
+ 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02,
+ 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
+ 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614,
+ 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a,
+ 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
+ 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b,
+ 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0,
+ 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
+ 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e,
+ 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9,
+ 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
+ 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
+ };
+
+
+
+/*
+ * Same charset, different order -- can't use the common.c table here.
+ */
+unsigned char opencl_BF_atoi64[0x80] = {
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 1,
+ 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 64, 64, 64, 64, 64,
+ 64, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 64, 64, 64, 64, 64,
+ 64, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+ 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 64, 64, 64, 64, 64
+};
+
+ static cl_platform_id pltfrmid[MAX_PLATFORMS];
+
+ static cl_device_id devid[MAX_PLATFORMS][MAX_DEVICES_PER_PLATFORM];
+
+ static cl_context cntxt[MAX_PLATFORMS][MAX_DEVICES_PER_PLATFORM];
+
+ static cl_command_queue cmdq[MAX_PLATFORMS][MAX_DEVICES_PER_PLATFORM];
+
+ static cl_kernel krnl[MAX_PLATFORMS][MAX_DEVICES_PER_PLATFORM];
+
+ static cl_program prg[MAX_PLATFORMS][MAX_DEVICES_PER_PLATFORM];
+
+ static cl_int err;
+
+ static int devno,pltfrmno;
+
+static void BF_swap(BF_word *x, int count)
+{
+ BF_word tmp;
+
+ do {
+ tmp = *x;
+ tmp = (tmp << 16) | (tmp >> 16);
+ *x++ = ((tmp & 0x00FF00FF) << 8) | ((tmp >> 8) & 0x00FF00FF);
+ } while (--count);
+}
+
+#define BF_ROUND(ctx_S,ctx_P, L, R, N, tmp1, tmp2, tmp3, tmp4) \
+ tmp1 = L & 0xFF; \
+ tmp2 = L >> 8; \
+ tmp2 &= 0xFF; \
+ tmp3 = L >> 16; \
+ tmp3 &= 0xFF; \
+ tmp4 = L >> 24; \
+ tmp1 = ctx_S.S[3][tmp1]; \
+ tmp2 = ctx_S.S[2][tmp2]; \
+ tmp3 = ctx_S.S[1][tmp3]; \
+ tmp3 += ctx_S.S[0][tmp4]; \
+ tmp3 ^= tmp2; \
+ R ^= ctx_P.P[N + 1]; \
+ tmp3 += tmp1; \
+ R ^= tmp3;
+
+
+/*
+ * Encrypt one block, BF_ROUNDS is hardcoded here.
+ */
+#define BF_ENCRYPT(ctx_S,ctx_P, L, R) \
+ L ^= ctx_P.P[0]; \
+ BF_ROUND(ctx_S,ctx_P, L, R, 0, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, R, L, 1, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, L, R, 2, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, R, L, 3, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, L, R, 4, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, R, L, 5, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, L, R, 6, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, R, L, 7, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, L, R, 8, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, R, L, 9, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, L, R, 10, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, R, L, 11, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, L, R, 12, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, R, L, 13, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P, L, R, 14, u1, u2, u3, u4); \
+ BF_ROUND(ctx_S,ctx_P,R, L, 15, u1, u2, u3, u4); \
+ u4 = R; \
+ R = L; \
+ L = u4 ^ ctx_P.P[BF_ROUNDS + 1];
+
+#define BF_body() \
+ L0 = R0 = 0; \
+ ptr = BF_current_P INDEX.P; \
+ do { \
+ BF_ENCRYPT(BF_current_S INDEX,BF_current_P INDEX, L0, R0); \
+ *ptr = L0; \
+ *(ptr + 1) = R0; \
+ ptr += 2; \
+ } while (ptr < &BF_current_P INDEX.P[BF_ROUNDS + 2]); \
+\
+ ptr = BF_current_S INDEX.S[0]; \
+ do { \
+ ptr += 2; \
+ BF_ENCRYPT(BF_current_S INDEX,BF_current_P INDEX, L0, R0); \
+ *(ptr - 2) = L0; \
+ *(ptr - 1) = R0; \
+ } while (ptr < &BF_current_S INDEX.S[3][0xFF]);
+
+void BF_select_device(int platform_no,int dev_no)
+{
+ devno=dev_no;pltfrmno=platform_no;
+ opencl_init("$JOHN/bf_kernel.cl", dev_no, platform_no);
+ pltfrmid[platform_no]=platform[platform_no];
+ devid[platform_no][dev_no]=devices[dev_no];
+ cntxt[platform_no][dev_no]=context[dev_no];
+ prg[platform_no][dev_no]=program[dev_no];
+ krnl[platform_no][dev_no]=clCreateKernel(prg[platform_no][dev_no],"blowfish",&err) ;
+ if(err) {printf("Create Kernel blowfish FAILED\n"); return ;}
+ cmdq[platform_no][dev_no]=queue[dev_no];
+}
+
+
+void opencl_BF_std_set_key(char *key, int index, int sign_extension_bug)
+{
+ char *ptr = key;
+ int i, j;
+ BF_word tmp;
+
+ for (i = 0; i < BF_ROUNDS + 2; i++) {
+ tmp = 0;
+ for (j = 0; j < 4; j++) {
+ tmp <<= 8;
+ if (sign_extension_bug)
+ tmp |= (int)(signed char)*ptr;
+ else
+ tmp |= (unsigned char)*ptr;
+
+ if (!*ptr) ptr = key; else ptr++;
+ }
+
+ BF_exp_key INDEX[i] = tmp;
+ BF_init_key INDEX[i] = BF_init_state_P.P[i] ^ tmp;
+ }
+}
+
+void exec_bf(cl_uint *salt_api,cl_uint *BF_key_exp,cl_uint *BF_out,cl_uint rounds,int platform_no,int dev_no)
+{
+ cl_event evnt;
+
+ size_t N=BF_N;
+
+ cl_mem salt_gpu,key_exp_gpu,out_gpu,BF_current_S_gpu,BF_current_P_gpu;
+
+
+ salt_gpu=clCreateBuffer(cntxt[platform_no][dev_no],CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,4*sizeof(cl_uint),salt_api,&err);
+ if((salt_gpu==(cl_mem)0)) { HANDLE_CLERROR(err, "Create Buffer FAILED\n"); }
+
+ key_exp_gpu=clCreateBuffer(cntxt[platform_no][dev_no],CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,BF_N*sizeof(cl_uint)*18,BF_key_exp,&err);
+ if((key_exp_gpu==(cl_mem)0)) { HANDLE_CLERROR(err, "Create Buffer FAILED\n"); }
+
+ out_gpu=clCreateBuffer(cntxt[platform_no][dev_no],CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,BF_N*sizeof(cl_uint)*2,BF_out,&err);
+ if((key_exp_gpu==(cl_mem)0)) { HANDLE_CLERROR(err, "Create Buffer FAILED\n"); }
+
+ BF_current_S_gpu=clCreateBuffer(cntxt[platform_no][dev_no],CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,BF_N*sizeof(struct BF_ctx_S),BF_current_S,&err);
+ if((BF_current_S_gpu==(cl_mem)0)) { HANDLE_CLERROR(err, "Create Buffer FAILED\n"); }
+
+ BF_current_P_gpu=clCreateBuffer(cntxt[platform_no][dev_no],CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,BF_N*sizeof(struct BF_ctx_P),BF_current_P,&err);
+ if((BF_current_P_gpu==(cl_mem)0)) { HANDLE_CLERROR(err, "Create Buffer FAILED\n"); }
+
+
+
+ HANDLE_CLERROR(clSetKernelArg(krnl[platform_no][dev_no],0,sizeof(cl_mem),&salt_gpu),"Set Kernel Arg FAILED arg0\n");
+
+ HANDLE_CLERROR(clSetKernelArg(krnl[platform_no][dev_no],1,sizeof(cl_mem),&key_exp_gpu),"Set Kernel Arg FAILED arg2\n");
+
+ HANDLE_CLERROR(clSetKernelArg(krnl[platform_no][dev_no],2,sizeof(cl_mem),&out_gpu),"Set Kernel Arg FAILED arg3\n");
+
+ HANDLE_CLERROR(clSetKernelArg(krnl[platform_no][dev_no],3,sizeof(cl_mem),&BF_current_S_gpu),"Set Kernel Arg FAILED arg4\n");
+
+ HANDLE_CLERROR(clSetKernelArg(krnl[platform_no][dev_no],4,sizeof(cl_mem),&BF_current_P_gpu),"Set Kernel Arg FAILED arg4\n");
+
+ HANDLE_CLERROR(clSetKernelArg(krnl[platform_no][dev_no],5,sizeof(cl_uint),&rounds),"Set Kernel Arg FAILED arg4\n");
+
+ err=clEnqueueNDRangeKernel(cmdq[platform_no][dev_no],krnl[platform_no][dev_no],1,NULL,&N,NULL,0,NULL,&evnt);
+
+ clWaitForEvents(1,&evnt);
+
+ HANDLE_CLERROR(clEnqueueReadBuffer(cmdq[platform_no][dev_no],out_gpu,CL_FALSE,0,2*BF_N*sizeof(cl_uint),BF_out, 0, NULL, NULL),"Write FAILED\n");
+
+ HANDLE_CLERROR(clEnqueueReadBuffer(cmdq[platform_no][dev_no],BF_current_S_gpu,CL_FALSE,0,BF_N*sizeof(struct BF_ctx_S),BF_current_S, 0, NULL, NULL),"Write FAILED\n");
+
+ HANDLE_CLERROR(clEnqueueReadBuffer(cmdq[platform_no][dev_no],BF_current_P_gpu,CL_TRUE,0,BF_N*sizeof(struct BF_ctx_P),BF_current_P, 0, NULL, NULL),"Write FAILED\n");
+
+ clFinish(cmdq[platform_no][dev_no]);
+
+ HANDLE_CLERROR(clReleaseMemObject(salt_gpu),"Release Memory Object FAILED.");
+ HANDLE_CLERROR(clReleaseMemObject(out_gpu),"Release Memory Object FAILED.");
+ HANDLE_CLERROR(clReleaseMemObject(BF_current_P_gpu),"Release Memory Object FAILED.");
+ HANDLE_CLERROR(clReleaseMemObject(BF_current_S_gpu),"Release Memory Object FAILED.");
+ HANDLE_CLERROR(clReleaseMemObject(key_exp_gpu),"Release Memory Object FAILED.");
+
+
+}
+
+void opencl_BF_std_crypt(BF_salt *salt, int n)
+{
+ int index=0,i,j,k;
+ unsigned int salt_api[4];
+ unsigned int rounds=salt->rounds;
+ salt_api[0]=salt->salt[0];
+ salt_api[1]=salt->salt[1];
+ salt_api[2]=salt->salt[2];
+ salt_api[3]=salt->salt[3];
+ unsigned int *BF_out=(unsigned int*)malloc(BF_N*2*sizeof(unsigned int));
+ unsigned int *BF_key_exp = (unsigned int*)malloc(BF_N*18*sizeof(unsigned int));
+
+ index=0;
+ for(k=0;k<BF_N;++k)
+ for(j=0;j<18;++j)
+ BF_key_exp[index++]=BF_exp_key[k][j];
+
+ for_each_index(){
+
+ for(i=0;i<256;++i)
+ {BF_current_S INDEX.S[0][i]=state_S[i];
+ BF_current_S INDEX.S[1][i]=state_S[256+i];
+ BF_current_S INDEX.S[2][i]=state_S[512+i];
+ BF_current_S INDEX.S[3][i]=state_S[768+i];
+ }
+ for(i=0;i<18;++i)
+ BF_current_P INDEX.P[i]=BF_init_key INDEX[i];
+
+ }
+
+ exec_bf(salt_api,BF_key_exp,BF_out,rounds,pltfrmno,devno);
+
+ for_each_index(){
+ opencl_BF_out INDEX[0]=BF_out[2*index];
+ opencl_BF_out INDEX[1]=BF_out[2*index+1];
+ }
+
+
+ free(BF_out);
+ free(BF_key_exp);
+
+}
+
+
+
+void opencl_BF_std_crypt_exact(int index)
+{
+ BF_word L, R;
+ BF_word u1, u2, u3, u4;
+ BF_word count;
+ int i;
+
+ memcpy(&opencl_BF_out[index][2], &BF_magic_w[2], sizeof(BF_word) * 4);
+
+ count = 64;
+ do
+ for (i = 2; i < 6; i += 2) {
+ L = opencl_BF_out[index][i];
+ R = opencl_BF_out[index][i + 1];
+ BF_ENCRYPT(BF_current_S INDEX,BF_current_P INDEX, L, R);
+ opencl_BF_out[index][i] = L;
+ opencl_BF_out[index][i + 1] = R;
+ } while (--count);
+
+/* This has to be bug-compatible with the original implementation :-) */
+ opencl_BF_out[index][5] &= ~(BF_word)0xFF;
+}
+
+/*
+ * I'm not doing any error checking in the routines below since the
+ * ciphertext should have already been checked to be fmt_BF.valid().
+ */
+
+static void BF_decode(BF_word *dst, char *src, int size)
+{
+ unsigned char *dptr = (unsigned char *)dst;
+ unsigned char *end = dptr + size;
+ unsigned char *sptr = (unsigned char *)src;
+ unsigned int c1, c2, c3, c4;
+
+ do {
+ c1 = opencl_BF_atoi64[ARCH_INDEX(*sptr++)];
+ c2 = opencl_BF_atoi64[ARCH_INDEX(*sptr++)];
+ *dptr++ = (c1 << 2) | ((c2 & 0x30) >> 4);
+ if (dptr >= end) break;
+
+ c3 = opencl_BF_atoi64[ARCH_INDEX(*sptr++)];
+ *dptr++ = ((c2 & 0x0F) << 4) | ((c3 & 0x3C) >> 2);
+ if (dptr >= end) break;
+
+ c4 = opencl_BF_atoi64[ARCH_INDEX(*sptr++)];
+ *dptr++ = ((c3 & 0x03) << 6) | c4;
+ } while (dptr < end);
+}
+
+void *opencl_BF_std_get_salt(char *ciphertext)
+{
+ static BF_salt salt;
+
+ BF_decode(salt.salt, &ciphertext[7], 16);
+ BF_swap(salt.salt, 4);
+
+ salt.rounds = atoi(&ciphertext[4]);
+ salt.subtype = ciphertext[2];
+
+ return &salt;
+}
+
+void *opencl_BF_std_get_binary(char *ciphertext)
+{
+ static BF_binary binary;
+
+ binary[5] = 0;
+ BF_decode(binary, &ciphertext[29], 23);
+ BF_swap(binary, 6);
+ binary[5] &= ~(BF_word)0xFF;
+
+ return &binary;
+}
View
71 src/opencl_bf_std.h
@@ -0,0 +1,71 @@
+/*
+ * This software is Copyright (c) 2012 Sayantan Datta <std2048 at gmail dot com>
+ * and it is hereby released to the general public under the following terms:
+ * Redistribution and use in source and binary forms, with or without modification, are permitted.
+ */
+#ifndef _OPENCL_BF_STD_H
+#define _OPENCL_BF_STD_H
+
+#include "arch.h"
+#include "common.h"
+#include "common-opencl.h"
+
+typedef unsigned int BF_word;
+
+/*
+ * Binary salt type, also keeps the nu