Skip to content

Commit

Permalink
GPU and sparse kernels also calculate BMUs after last epoch
Browse files Browse the repository at this point in the history
  • Loading branch information
peterwittek committed Aug 4, 2016
1 parent eb15da9 commit 5aacea5
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 12 deletions.
6 changes: 3 additions & 3 deletions src/Python/setup.py
Expand Up @@ -34,10 +34,10 @@ def find_cuda():
arch = int(platform.architecture()[0][0:2])
if sys.platform.startswith('win'):
os.path.join(libdir, "x"+str(arch))
if os.path.exists(os.path.join(home, libdir)):
cudaconfig['lib'] = libdir
elif os.path.exists(os.path.join(home, libdir + "64")):
if os.path.exists(os.path.join(home, libdir + "64")):
cudaconfig['lib'] = libdir + "64"
elif os.path.exists(os.path.join(home, libdir)):
cudaconfig['lib'] = libdir
else:
raise EnvironmentError('The CUDA libraries could not be located')
return cudaconfig
Expand Down
14 changes: 13 additions & 1 deletion src/denseGpuKernels.cu
Expand Up @@ -322,9 +322,21 @@ void trainOneEpochDenseGPU(int itask, float *data, float *numerator,
unsigned int nDimensions, unsigned int nVectors,
unsigned int nVectorsPerRank, float radius,
float scale, string mapType, string gridType,
bool compact_support, bool gaussian, int *globalBmus) {
bool compact_support, bool gaussian,
int *globalBmus, bool only_bmus) {
unsigned int *bmus = new unsigned int[nVectorsPerRank * 2];
getBmusOnGpu(bmus, codebook, nSomX, nSomY, nDimensions, nVectorsPerRank);
if (only_bmus) {
#ifdef HAVE_MPI
MPI_Gather(bmus, nVectorsPerRank * 2, MPI_INT, globalBmus, nVectorsPerRank * 2, MPI_INT, 0, MPI_COMM_WORLD);
#else
for (unsigned int i = 0; i < 2 * nVectorsPerRank; ++i) {
globalBmus[i] = bmus[i];
}
#endif
delete [] bmus;
return;
}
float *localNumerator = new float[nSomY * nSomX * nDimensions];
float *localDenominator = new float[nSomY * nSomX];

Expand Down
6 changes: 3 additions & 3 deletions src/somoclu.h
Expand Up @@ -120,15 +120,15 @@ void trainOneEpochDenseCPU(int itask, float *data, float *numerator,
unsigned int nVectorsPerRank, float radius,
float scale, string mapType,
string gridType, bool compact_support, bool gaussian,
int *globalBmus, bool only_bmus=false);
int *globalBmus, bool only_bmus);
void trainOneEpochSparseCPU(int itask, svm_node **sparseData, float *numerator,
float *denominator, float *codebook,
unsigned int nSomX, unsigned int nSomY,
unsigned int nDimensions, unsigned int nVectors,
unsigned int nVectorsPerRank, float radius,
float scale, string mapType,
string gridType, bool compact_support, bool gaussian,
int *globalBmus);
int *globalBmus, bool only_bmus);
void initializeCodebook(unsigned int seed, float *codebook, unsigned int nSomX,
unsigned int nSomY, unsigned int nDimensions);

Expand All @@ -145,7 +145,7 @@ extern "C" {
unsigned int nVectorsPerRank, float radius,
float scale, string mapType,
string gridType, bool compact_support, bool gaussian,
int *globalBmus);
int *globalBmus, bool only_bmus);
#endif
void my_abort(string err);
}
Expand Down
15 changes: 13 additions & 2 deletions src/sparseCpuKernels.cpp
Expand Up @@ -84,7 +84,8 @@ void trainOneEpochSparseCPU(int itask, svm_node **sparseData, float *numerator,
unsigned int nDimensions, unsigned int nVectors,
unsigned int nVectorsPerRank, float radius,
float scale, string mapType, string gridType,
bool compact_support, bool gaussian, int *globalBmus) {
bool compact_support, bool gaussian,
int *globalBmus, bool only_bmus) {
int p1[2] = {0, 0};
int *bmus = new int[nVectorsPerRank * 2];
#ifdef _OPENMP
Expand All @@ -108,7 +109,17 @@ void trainOneEpochSparseCPU(int itask, svm_node **sparseData, float *numerator,
}
}
}

if (only_bmus) {
#ifdef HAVE_MPI
MPI_Gather(bmus, nVectorsPerRank * 2, MPI_INT, globalBmus, nVectorsPerRank * 2, MPI_INT, 0, MPI_COMM_WORLD);
#else
for (unsigned int i = 0; i < 2 * nVectorsPerRank; ++i) {
globalBmus[i] = bmus[i];
}
#endif
delete [] bmus;
return;
}
float *localNumerator = new float[nSomY * nSomX * nDimensions];
float *localDenominator = new float[nSomY * nSomX];
#ifdef _OPENMP
Expand Down
5 changes: 2 additions & 3 deletions src/training.cpp
Expand Up @@ -139,7 +139,6 @@ void train(int itask, float *data, svm_node **sparseData,
#ifdef HAVE_MPI
double epoch_time = MPI_Wtime();
#endif

trainOneEpoch(itask, data, sparseData, codebook, globalBmus,
nEpoch, currentEpoch,
nSomX, nSomY, nDimensions, nVectors, nVectorsPerRank,
Expand Down Expand Up @@ -322,7 +321,7 @@ void trainOneEpoch(int itask, float *data, svm_node **sparseData,
trainOneEpochDenseGPU(itask, data, numerator, denominator,
codebook, nSomX, nSomY, nDimensions,
nVectors, nVectorsPerRank, radius, scale,
mapType, gridType, compact_support, gaussian, globalBmus);
mapType, gridType, compact_support, gaussian, globalBmus, only_bmus);
#else
my_abort("Compiled without CUDA!");
#endif
Expand All @@ -332,7 +331,7 @@ void trainOneEpoch(int itask, float *data, svm_node **sparseData,
codebook, nSomX, nSomY, nDimensions,
nVectors, nVectorsPerRank, radius, scale,
mapType, gridType, compact_support, gaussian,
globalBmus);
globalBmus, only_bmus);
break;
}

Expand Down

0 comments on commit 5aacea5

Please sign in to comment.