Skip to content
Permalink
Browse files

hashkill: fix build kernels for nv with low mem

  • Loading branch information...
linxon committed Nov 9, 2019
1 parent 53c11c8 commit 119bc3ba0899b506450733c364de2116d3ab0857
@@ -1,6 +1,6 @@
diff -ur a/src/kernels/compiler/amd-compiler.c b/src/kernels/compiler/amd-compiler.c
--- a/src/kernels/compiler/amd-compiler.c 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/compiler/amd-compiler.c 2019-11-08 16:06:56.030308820 +0300
+++ b/src/kernels/compiler/amd-compiler.c 2019-11-09 07:35:45.789677199 +0300
@@ -294,7 +294,7 @@
if( binary_sizes[j] != 0 )
{
@@ -21,7 +21,7 @@ diff -ur a/src/kernels/compiler/amd-compiler.c b/src/kernels/compiler/amd-compil
}
diff -ur a/src/kernels/compiler/compiler.h b/src/kernels/compiler/compiler.h
--- a/src/kernels/compiler/compiler.h 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/compiler/compiler.h 2019-11-08 16:07:08.557195893 +0300
+++ b/src/kernels/compiler/compiler.h 2019-11-09 07:35:45.789677199 +0300
@@ -5,6 +5,7 @@
#include <stdlib.h>
#include <string.h>
@@ -32,8 +32,39 @@ diff -ur a/src/kernels/compiler/compiler.h b/src/kernels/compiler/compiler.h
#include "ocl-base.h"
diff -ur a/src/kernels/compiler/nvidia-compiler.c b/src/kernels/compiler/nvidia-compiler.c
--- a/src/kernels/compiler/nvidia-compiler.c 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/compiler/nvidia-compiler.c 2019-11-08 16:07:31.381169750 +0300
@@ -183,25 +183,25 @@
+++ b/src/kernels/compiler/nvidia-compiler.c 2019-11-09 09:25:28.225931194 +0300
@@ -58,7 +58,7 @@
platform = platforms[i];
break;
}
- //printf("Platform found :%s\n",pbuf);
+ printf("Platform found :%s\n",pbuf);
}

if( platform == (cl_platform_id)NULL )
@@ -114,6 +114,9 @@
char pbuf[100];
err = _clGetDeviceInfo( devices[i], CL_DEVICE_NAME, sizeof(pbuf),pbuf, NULL );
checkErr( "clGetDeviceInfo", err );
+ cl_ulong local_mem;
+ err = _clGetDeviceInfo( devices[i], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(local_mem), &local_mem, NULL );
+ checkErr( "clGetDeviceInfo", err );

char flags[100];
if (optdisable==1) sprintf(flags,"%s -cl-nv-maxrregcount=64 ",buildparams);
@@ -157,6 +160,11 @@
printf("%s: flags = %s\n",filename,flags);
break;
}
+
+ if (local_mem == 16384) {
+ sprintf(flags,"%s -DLOCMEM16K",flags);
+ }
+
char *eflags="";
err = _clBuildProgramNoErr( program, 1, &devices[i], flags, NULL, NULL );
if (err!=CL_SUCCESS)
@@ -183,25 +191,25 @@
switch (smiter)
{
case 0:
@@ -66,7 +97,7 @@ diff -ur a/src/kernels/compiler/nvidia-compiler.c b/src/kernels/compiler/nvidia-
break;
}
}
@@ -288,25 +288,25 @@
@@ -288,25 +296,25 @@
switch (smiter)
{
case 0:
@@ -99,9 +130,157 @@ diff -ur a/src/kernels/compiler/nvidia-compiler.c b/src/kernels/compiler/nvidia-
break;
}
free(ofname);
@@ -437,6 +445,9 @@
char pbuf[100];
err = _clGetDeviceInfo( devices[i], CL_DEVICE_NAME, sizeof(pbuf),pbuf, NULL );
checkErr( "clGetDeviceInfo", err );
+ cl_ulong local_mem;
+ err = _clGetDeviceInfo( devices[i], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(local_mem), &local_mem, NULL );
+ checkErr( "clGetDeviceInfo", err );

char flags[100];
if (optdisable==1) sprintf(flags,"%s -cl-nv-maxrregcount=64 ",buildparams);
@@ -469,6 +480,11 @@
sprintf(flags,"%s -cl-nv-arch sm_35 ",flags);
break;
}
+
+ if (local_mem == 16384) {
+ sprintf(flags,"%s -DLOCMEM16K",flags);
+ }
+
char *eflags="";
err = _clBuildProgramNoErr( program, 1, &devices[i], flags, NULL, NULL );
if (err!=CL_SUCCESS)
diff -ur a/src/kernels/nvidia_bfunix.cl b/src/kernels/nvidia_bfunix.cl
--- a/src/kernels/nvidia_bfunix.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_bfunix.cl 2019-11-09 07:46:57.205147022 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define GGI (get_global_id(0))
#define GLI (get_local_id(0))
@@ -618,4 +619,5 @@

}

-#endif
\ No newline at end of file
+#endif
+#endif
diff -ur a/src/kernels/nvidia_msoffice_old.cl b/src/kernels/nvidia_msoffice_old.cl
--- a/src/kernels/nvidia_msoffice_old.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_msoffice_old.cl 2019-11-09 09:19:13.545452416 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10

#define rotate(a,b) ((a) << (b)) + ((a) >> (32-(b)))
@@ -955,4 +956,5 @@
}


-#endif
\ No newline at end of file
+#endif
+#endif
diff -ur a/src/kernels/nvidia_msoffice_old_md5.cl b/src/kernels/nvidia_msoffice_old_md5.cl
--- a/src/kernels/nvidia_msoffice_old_md5.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_msoffice_old_md5.cl 2019-11-09 11:01:42.330990880 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define rotate(a,b) ((a) << (b)) + ((a) >> (32-(b)))

@@ -889,3 +890,4 @@
}

#endif
+#endif
diff -ur a/src/kernels/nvidia_pdf2.cl b/src/kernels/nvidia_pdf2.cl
--- a/src/kernels/nvidia_pdf2.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_pdf2.cl 2019-11-09 12:38:11.964090661 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define rotate(a,b) ((a) << (b)) + ((a) >> (32-(b)))

@@ -544,4 +545,6 @@
}


-#endif
\ No newline at end of file
+#endif
+#endif
+
diff -ur a/src/kernels/nvidia_pdf3.cl b/src/kernels/nvidia_pdf3.cl
--- a/src/kernels/nvidia_pdf3.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_pdf3.cl 2019-11-09 12:45:26.626342501 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define rotate(a,b) ((a) << (b)) + ((a) >> (32-(b)))

@@ -865,4 +866,5 @@
}


-#endif
\ No newline at end of file
+#endif
+#endif
diff -ur a/src/kernels/nvidia_pdf4.cl b/src/kernels/nvidia_pdf4.cl
--- a/src/kernels/nvidia_pdf4.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_pdf4.cl 2019-11-09 12:45:48.549791646 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define rotate(a,b) ((a) << (b)) + ((a) >> (32-(b)))

@@ -880,4 +881,5 @@
}


-#endif
\ No newline at end of file
+#endif
+#endif
diff -ur a/src/kernels/nvidia_pdf5.cl b/src/kernels/nvidia_pdf5.cl
--- a/src/kernels/nvidia_pdf5.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_pdf5.cl 2019-11-09 12:46:11.547164822 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define rotate(a,b) ((a) << (b)) + ((a) >> (32-(b)))

@@ -283,4 +284,5 @@
}


-#endif
\ No newline at end of file
+#endif
+#endif
diff -ur a/src/kernels/nvidia_pdf6.cl b/src/kernels/nvidia_pdf6.cl
--- a/src/kernels/nvidia_pdf6.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_pdf6.cl 2019-11-09 12:46:36.214419879 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define GGI (get_global_id(0))
#define GLI (get_local_id(0))
@@ -2647,4 +2648,5 @@



-#endif
\ No newline at end of file
+#endif
+#endif
diff -ur a/src/plugins/a51.c b/src/plugins/a51.c
--- a/src/plugins/a51.c 2014-02-04 14:36:40.000000000 +0400
+++ b/src/plugins/a51.c 2019-11-08 14:23:59.503194553 +0300
+++ b/src/plugins/a51.c 2019-11-09 07:35:45.789677199 +0300
@@ -21,6 +21,7 @@
#include <stdio.h>
#include <unistd.h>
@@ -3,7 +3,7 @@

EAPI=7

inherit autotools toolchain-funcs pax-utils
inherit autotools toolchain-funcs pax-utils flag-o-matic

DESCRIPTION="Multi-threaded password recovery tool with multi-GPU support"
HOMEPAGE="https://github.com/gat3way/hashkill"
@@ -18,7 +18,7 @@ LICENSE="GPL-2 public-domain"
#KEYWORDS="~amd64 ~x86"
SLOT="0"

IUSE="video_cards_amdgpu video_cards_nvidia opencl +json pax_kernel"
IUSE="video_cards_amdgpu video_cards_nvidia +opencl +json pax_kernel"
REQUIRED_USE="
video_cards_amdgpu? ( opencl )
video_cards_nvidia? ( opencl )
@@ -57,6 +57,18 @@ src_prepare() {
-e "s/AC_INIT(hashkill, \(.*\),/AC_INIT(hashkill, ${PV},/" \
configure.ac || die

# do not add random CFLAGS
sed -i \
-e "s/ -O3//g" \
src/Makefile.am src/Makefile.in \
src/plugins/Makefile || die

#the following might fail if gcc is built with USE="multislot"
if has_version sys-devel/gcc[-lto]; then
einfo "Warning: compiling without LTO optimisaiton"
sed -i 's/ -flto -fwhole-program//g' src/Makefile || die
fi

if use pax_kernel && use opencl; then
sed -i \
-e "s|amd-compiler$|amd-compiler \n\t\t paxctl -m amd-compiler |g" \
@@ -69,16 +81,11 @@ src_prepare() {
}

src_configure() {
filter-flags -O2
econf \
$(use_with json) \
$(usex video_cards_amdgpu '' '--disable-amd-ocl') \
$(usex video_cards_nvidia '' '--disable-nv-ocl')

#the following might fail if gcc is built with USE="multislot"
if has_version sys-devel/gcc[-lto]; then
einfo "Warning: compiling without LTO optimisaiton"
sed -i 's/ -flto -fwhole-program//g' src/Makefile || die
fi
$(use_enable video_cards_amdgpu amd-ocl) \
$(use_enable video_cards_nvidia nv-ocl)
}

src_compile() {
@@ -91,10 +98,10 @@ src_compile() {
addwrite /dev/ati
fi

# Without -j1 param you can get random errors while building.
# [hashkill] (../../ocl-base.c:312) clCreateContextFromType: CL_DEVICE_NOT_AVAILABLE
# Don't remove it
emake -j1 CC="$(tc-getCC)"
# Your building speed heavily depends on your equipment.
# Without -j1 param you can get random screen freezes and errors during building:
# * [hashkill] (../../ocl-base.c:312) clCreateContextFromType: CL_DEVICE_NOT_AVAILABLE
emake CC="$(tc-getCC)" -j1
}

src_install() {
@@ -110,3 +117,8 @@ src_test() {
cd tests
./test.sh || die
}

pkg_postinst() {
ewarn " ... # after installing:"
ewarn " ~$ sudo gpasswd -d portage video\n"
}

0 comments on commit 119bc3b

Please sign in to comment.
You can’t perform that action at this time.