From 5aacea58b299fc396a5e7879d2f79942ad6aee4b Mon Sep 17 00:00:00 2001 From: Peter Wittek Date: Thu, 4 Aug 2016 21:19:23 +0200 Subject: [PATCH] GPU and sparse kernels also calculate BMUs after last epoch --- src/Python/setup.py | 6 +++--- src/denseGpuKernels.cu | 14 +++++++++++++- src/somoclu.h | 6 +++--- src/sparseCpuKernels.cpp | 15 +++++++++++++-- src/training.cpp | 5 ++--- 5 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/Python/setup.py b/src/Python/setup.py index 614ec7f..45777c8 100644 --- a/src/Python/setup.py +++ b/src/Python/setup.py @@ -34,10 +34,10 @@ def find_cuda(): arch = int(platform.architecture()[0][0:2]) if sys.platform.startswith('win'): os.path.join(libdir, "x"+str(arch)) - if os.path.exists(os.path.join(home, libdir)): - cudaconfig['lib'] = libdir - elif os.path.exists(os.path.join(home, libdir + "64")): + if os.path.exists(os.path.join(home, libdir + "64")): cudaconfig['lib'] = libdir + "64" + elif os.path.exists(os.path.join(home, libdir)): + cudaconfig['lib'] = libdir else: raise EnvironmentError('The CUDA libraries could not be located') return cudaconfig diff --git a/src/denseGpuKernels.cu b/src/denseGpuKernels.cu index cff108e..1ba9326 100644 --- a/src/denseGpuKernels.cu +++ b/src/denseGpuKernels.cu @@ -322,9 +322,21 @@ void trainOneEpochDenseGPU(int itask, float *data, float *numerator, unsigned int nDimensions, unsigned int nVectors, unsigned int nVectorsPerRank, float radius, float scale, string mapType, string gridType, - bool compact_support, bool gaussian, int *globalBmus) { + bool compact_support, bool gaussian, + int *globalBmus, bool only_bmus) { unsigned int *bmus = new unsigned int[nVectorsPerRank * 2]; getBmusOnGpu(bmus, codebook, nSomX, nSomY, nDimensions, nVectorsPerRank); + if (only_bmus) { +#ifdef HAVE_MPI + MPI_Gather(bmus, nVectorsPerRank * 2, MPI_INT, globalBmus, nVectorsPerRank * 2, MPI_INT, 0, MPI_COMM_WORLD); +#else + for (unsigned int i = 0; i < 2 * nVectorsPerRank; ++i) { + globalBmus[i] = bmus[i]; + } +#endif + delete [] bmus; + return; + } float *localNumerator = new float[nSomY * nSomX * nDimensions]; float *localDenominator = new float[nSomY * nSomX]; diff --git a/src/somoclu.h b/src/somoclu.h index cb4e4d9..aaa0bad 100644 --- a/src/somoclu.h +++ b/src/somoclu.h @@ -120,7 +120,7 @@ void trainOneEpochDenseCPU(int itask, float *data, float *numerator, unsigned int nVectorsPerRank, float radius, float scale, string mapType, string gridType, bool compact_support, bool gaussian, - int *globalBmus, bool only_bmus=false); + int *globalBmus, bool only_bmus); void trainOneEpochSparseCPU(int itask, svm_node **sparseData, float *numerator, float *denominator, float *codebook, unsigned int nSomX, unsigned int nSomY, @@ -128,7 +128,7 @@ void trainOneEpochSparseCPU(int itask, svm_node **sparseData, float *numerator, unsigned int nVectorsPerRank, float radius, float scale, string mapType, string gridType, bool compact_support, bool gaussian, - int *globalBmus); + int *globalBmus, bool only_bmus); void initializeCodebook(unsigned int seed, float *codebook, unsigned int nSomX, unsigned int nSomY, unsigned int nDimensions); @@ -145,7 +145,7 @@ extern "C" { unsigned int nVectorsPerRank, float radius, float scale, string mapType, string gridType, bool compact_support, bool gaussian, - int *globalBmus); + int *globalBmus, bool only_bmus); #endif void my_abort(string err); } diff --git a/src/sparseCpuKernels.cpp b/src/sparseCpuKernels.cpp index 81199a0..f618b0a 100644 --- a/src/sparseCpuKernels.cpp +++ b/src/sparseCpuKernels.cpp @@ -84,7 +84,8 @@ void trainOneEpochSparseCPU(int itask, svm_node **sparseData, float *numerator, unsigned int nDimensions, unsigned int nVectors, unsigned int nVectorsPerRank, float radius, float scale, string mapType, string gridType, - bool compact_support, bool gaussian, int *globalBmus) { + bool compact_support, bool gaussian, + int *globalBmus, bool only_bmus) { int p1[2] = {0, 0}; int *bmus = new int[nVectorsPerRank * 2]; #ifdef _OPENMP @@ -108,7 +109,17 @@ void trainOneEpochSparseCPU(int itask, svm_node **sparseData, float *numerator, } } } - + if (only_bmus) { +#ifdef HAVE_MPI + MPI_Gather(bmus, nVectorsPerRank * 2, MPI_INT, globalBmus, nVectorsPerRank * 2, MPI_INT, 0, MPI_COMM_WORLD); +#else + for (unsigned int i = 0; i < 2 * nVectorsPerRank; ++i) { + globalBmus[i] = bmus[i]; + } +#endif + delete [] bmus; + return; + } float *localNumerator = new float[nSomY * nSomX * nDimensions]; float *localDenominator = new float[nSomY * nSomX]; #ifdef _OPENMP diff --git a/src/training.cpp b/src/training.cpp index 87fc127..22b00eb 100644 --- a/src/training.cpp +++ b/src/training.cpp @@ -139,7 +139,6 @@ void train(int itask, float *data, svm_node **sparseData, #ifdef HAVE_MPI double epoch_time = MPI_Wtime(); #endif - trainOneEpoch(itask, data, sparseData, codebook, globalBmus, nEpoch, currentEpoch, nSomX, nSomY, nDimensions, nVectors, nVectorsPerRank, @@ -322,7 +321,7 @@ void trainOneEpoch(int itask, float *data, svm_node **sparseData, trainOneEpochDenseGPU(itask, data, numerator, denominator, codebook, nSomX, nSomY, nDimensions, nVectors, nVectorsPerRank, radius, scale, - mapType, gridType, compact_support, gaussian, globalBmus); + mapType, gridType, compact_support, gaussian, globalBmus, only_bmus); #else my_abort("Compiled without CUDA!"); #endif @@ -332,7 +331,7 @@ void trainOneEpoch(int itask, float *data, svm_node **sparseData, codebook, nSomX, nSomY, nDimensions, nVectors, nVectorsPerRank, radius, scale, mapType, gridType, compact_support, gaussian, - globalBmus); + globalBmus, only_bmus); break; }