From d72b261045e4a3a818652ab56d5c9d14efa4b250 Mon Sep 17 00:00:00 2001 From: Shinya Morino Date: Sat, 10 Nov 2018 17:26:17 +0900 Subject: [PATCH] getSystemE() is implemented for CPU-based annealers. --- sqaodc/cpu/CPUBipartiteGraphAnnealer.cpp | 32 +++++++++++++++-- sqaodc/cpu/CPUBipartiteGraphAnnealer.h | 2 ++ sqaodc/cpu/CPUDenseGraphAnnealer.cpp | 28 +++++++++++---- sqaodc/cpu/CPUDenseGraphAnnealer.h | 4 ++- sqaodc/cpu/Dot_SIMD.h | 44 +++++++++++++++++++++++ sqaodc/cuda/CUDABipartiteGraphAnnealer.cu | 5 +++ 6 files changed, 104 insertions(+), 11 deletions(-) diff --git a/sqaodc/cpu/CPUBipartiteGraphAnnealer.cpp b/sqaodc/cpu/CPUBipartiteGraphAnnealer.cpp index 51838d5..70b3500 100644 --- a/sqaodc/cpu/CPUBipartiteGraphAnnealer.cpp +++ b/sqaodc/cpu/CPUBipartiteGraphAnnealer.cpp @@ -6,6 +6,7 @@ #include #include "SharedFormulas.h" #include +#include "Dot_SIMD.h" #include namespace sqint = sqaod_internal; @@ -114,13 +115,14 @@ void CPUBipartiteGraphAnnealer::set_q(const sq::BitSetPair &qPair) { "Dimension of q1, %d, should be equal to N1, %d.", qPair.bits1.size, N1_); Vector q0 = sq::cast(qPair.bits0); - Vector q1 = sq::cast(qPair.bits1); - + Vector q1 = sq::cast(qPair.bits1); for (int idx = 0; idx < m_; ++idx) { Vector(matQ0_.rowPtr(idx), N0_).copyFrom(q0); Vector(matQ1_.rowPtr(idx), N1_).copyFrom(q1); } - + matQ0_.clearPadding(); + matQ1_.clearPadding(); + setState(solQSet); } @@ -133,6 +135,8 @@ void CPUBipartiteGraphAnnealer::set_qset(const sq::BitSetPairArray &qPairs Vector(matQ0_.rowPtr(idx), N0_).copyFrom(sq::cast(qPairs[idx].bits0)); Vector(matQ1_.rowPtr(idx), N1_).copyFrom(sq::cast(qPairs[idx].bits1)); } + matQ0_.clearPadding(); + matQ1_.clearPadding(); setState(solQSet); } @@ -201,6 +205,9 @@ void CPUBipartiteGraphAnnealer::randomizeSpin() { } }; #endif + matQ0_.clearPadding(); + matQ1_.clearPadding(); + setState(solQSet); } @@ -257,6 +264,25 @@ void CPUBipartiteGraphAnnealer::makeSolution() { calculate_E(); } +template +real CPUBipartiteGraphAnnealer::getSystemE(real G, real beta) const { + const_cast*>(this)->calculate_E(); + real E = E_.sum() / m_; + + if (isSQAAlgorithm(algo_)) { + real spinDotSum = real(0.); + for (int y0 = 0; y0 < m_; ++y0) { + int y1 = (y0 + 1) % m_; + spinDotSum += dot_simd(matQ0_.rowPtr(y0), matQ0_.rowPtr(y1), N0_); + spinDotSum += dot_simd(matQ1_.rowPtr(y0), matQ1_.rowPtr(y1), N1_); + } + real coef = real(0.5) / beta * std::log(std::tanh(G * beta / m_)); + E -= spinDotSum * coef; + } + if (om_ == sq::optMaximize) + E *= real(-1.); + return E; +} template void CPUBipartiteGraphAnnealer::annealOneStepNaive(real G, real beta) { diff --git a/sqaodc/cpu/CPUBipartiteGraphAnnealer.h b/sqaodc/cpu/CPUBipartiteGraphAnnealer.h index e8f684c..3198817 100644 --- a/sqaodc/cpu/CPUBipartiteGraphAnnealer.h +++ b/sqaodc/cpu/CPUBipartiteGraphAnnealer.h @@ -55,6 +55,8 @@ class CPUBipartiteGraphAnnealer : public sq::BipartiteGraphAnnealer { void makeSolution(); + real getSystemE(real G, real beta) const; + void annealOneStep(real G, real beta) { (this->*annealMethod_)(G, beta); } diff --git a/sqaodc/cpu/CPUDenseGraphAnnealer.cpp b/sqaodc/cpu/CPUDenseGraphAnnealer.cpp index eea75b2..1651e95 100644 --- a/sqaodc/cpu/CPUDenseGraphAnnealer.cpp +++ b/sqaodc/cpu/CPUDenseGraphAnnealer.cpp @@ -226,6 +226,26 @@ void CPUDenseGraphAnnealer::syncBits() { } +template +real CPUDenseGraphAnnealer::getSystemE(real G, real beta) const { + const_cast*>(this)->calculate_E(); + real E = E_.sum() / m_; + + if (isSQAAlgorithm(algo_)) { + real spinDotSum = real(0.); + for (int y0 = 0; y0 < m_; ++y0) { + int y1 = (y0 + 1) % m_; + spinDotSum += dot_simd(matQ_.rowPtr(y0), matQ_.rowPtr(y1), N_); + } + real coef = real(0.5) / beta * std::log(std::tanh(G * beta / m_)); + E -= spinDotSum * coef; + } + if (om_ == sq::optMaximize) + E *= real(-1.); + return E; +} + + template inline static void tryFlip(sq::MatrixType &matQ, int y, const sq::VectorType &h, const sq::MatrixType &J, sq::Random &random, real twoDivM, real coef, real beta) { @@ -233,13 +253,7 @@ void tryFlip(sq::MatrixType &matQ, int y, const sq::VectorType &h, c int m = matQ.rows; int x = random.randInt(N); real qyx = matQ(y, x); -#if defined(__AVX2__) - real sum = dot_avx2(J.rowPtr(x), matQ.rowPtr(y), N); -#elif defined(__SSE2__) - real sum = dot_sse2(J.rowPtr(x), matQ.rowPtr(y), N); -#else - real sum = dot_naive(J.rowPtr(x), matQ.rowPtr(y), N); -#endif + real sum = dot_simd(J.rowPtr(x), matQ.rowPtr(y), N); real dE = twoDivM * qyx * (h(x) + 2. * sum); int neibour0 = (y == 0) ? m - 1 : y - 1; int neibour1 = (y == m - 1) ? 0 : y + 1; diff --git a/sqaodc/cpu/CPUDenseGraphAnnealer.h b/sqaodc/cpu/CPUDenseGraphAnnealer.h index c6b0576..19a9153 100644 --- a/sqaodc/cpu/CPUDenseGraphAnnealer.h +++ b/sqaodc/cpu/CPUDenseGraphAnnealer.h @@ -51,6 +51,8 @@ class CPUDenseGraphAnnealer : public sq::DenseGraphAnnealer { void makeSolution(); + real getSystemE(real G, real beta) const; + void annealOneStep(real G, real beta) { (this->*annealMethod_)(G, beta); } @@ -72,7 +74,7 @@ class CPUDenseGraphAnnealer : public sq::DenseGraphAnnealer { sq::Random *random_; int nWorkers_; - Vector E_; + mutable Vector E_; sq::BitSetArray bitsX_; sq::BitSetArray bitsQ_; Matrix matQ_; diff --git a/sqaodc/cpu/Dot_SIMD.h b/sqaodc/cpu/Dot_SIMD.h index 61c93ae..50cced8 100644 --- a/sqaodc/cpu/Dot_SIMD.h +++ b/sqaodc/cpu/Dot_SIMD.h @@ -28,4 +28,48 @@ double dot_naive(const double *v0, const double *v1, sq::SizeType N); float dot_naive(const float *v0, const float *v1, sq::SizeType N); + +#ifdef __AVX2__ + +inline +double dot_simd(const double *v0, const double *v1, sq::SizeType N) { + return dot_avx2(v0, v1, N); +} + +inline +float dot_simd(const float *v0, const float *v1, sq::SizeType N) { + return dot_avx2(v0, v1, N); +} + +#endif + + +#ifdef __SSE2__ + +inline +double dot_simd(const double *v0, const double *v1, sq::SizeType N) { + return dot_sse2(v0, v1, N); +} + +inline +float dot_simd(const float *v0, const float *v1, sq::SizeType N) { + return dot_sse2(v0, v1, N); +} + +#endif + +#if !defined(__AVX2__) && !defined(__SSE2__) + +inline +double dot_simd(const double *v0, const double *v1, sq::SizeType N) { + return dot_naive(v0, v1, N); +} + +inline +float dot_simd(const float *v0, const float *v1, sq::SizeType N) { + return dot_naive(v0, v1, N); +} + +#endif + } diff --git a/sqaodc/cuda/CUDABipartiteGraphAnnealer.cu b/sqaodc/cuda/CUDABipartiteGraphAnnealer.cu index 2d8e9dd..95b883a 100644 --- a/sqaodc/cuda/CUDABipartiteGraphAnnealer.cu +++ b/sqaodc/cuda/CUDABipartiteGraphAnnealer.cu @@ -5,10 +5,15 @@ #include #include #include +#include "DeviceBatchedDot.cuh" + namespace sqint = sqaod_internal; using namespace sqaod_cuda; +template +using DotSpins = DeviceDotSpins; + template CUDABipartiteGraphAnnealer::CUDABipartiteGraphAnnealer() { devStream_ = NULL;