From 36bebeede45b2d0ba9815c4ed25f20e1d8c477c2 Mon Sep 17 00:00:00 2001
From: Tomoki <tomoki.ohtsuki129@gmail.com>
Date: Tue, 11 Jan 2022 00:15:28 +0900
Subject: [PATCH 1/2] Found a subtle bug caused by data race.

---
 CMakeLists.txt               | 12 ++++++++----
 cpp_sources/child_worker.cpp |  8 ++++----
 cpp_sources/trainer_base.cpp | 12 +++++++-----
 cpp_sources/trainer_base.hpp |  3 +++
 tests/test_lda.py            |  9 ++++++++-
 tests/test_llda.py           |  6 +++---
 6 files changed, 33 insertions(+), 17 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 776be75..ee55f51 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,11 @@
-cmake_minimum_required(VERSION 2.8.12)
+cmake_minimum_required(VERSION 3.0.0)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-project(lda11)
+set(CMAKE_RELS)
+set(CMAKE_BUILD_TYPE Debug)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_INIT} -std=c++11 -fPIC -O0")
+set(PYBIND11_PYTHON_VERSION 3.10)
 
 add_subdirectory(pybind11)
-include_directories(eigen-3.3.7)
-pybind11_add_module(lda11 src/wrapper.cpp src/labelled_lda.cpp src/trainer_base.cpp src/trainer.cpp src/predictor.cpp src/child_worker.cpp)
+include_directories(eigen-3.4.0)
+pybind11_add_module(_lda cpp_sources/wrapper.cpp cpp_sources/labelled_lda.cpp cpp_sources/trainer_base.cpp cpp_sources/trainer.cpp cpp_sources/predictor.cpp cpp_sources/child_worker.cpp)
diff --git a/cpp_sources/child_worker.cpp b/cpp_sources/child_worker.cpp
index ed28969..130fba6 100644
--- a/cpp_sources/child_worker.cpp
+++ b/cpp_sources/child_worker.cpp
@@ -73,6 +73,7 @@ void LDATrainerBase::ChildWorker::sync_topic(
     const Eigen::Ref<IntegerMatrix> &word_topic_global,
     const Eigen::Ref<IntegerMatrix> &doc_topic_global,
     const Eigen::Ref<IntegerVector> &topic_counts) {
+
   word_topic_local = word_topic_global;
   topic_counts_local = topic_counts;
   for (size_t internal_dix = 0; internal_dix < global_indices.size();
@@ -86,7 +87,6 @@ void LDATrainerBase::ChildWorker::do_work(
     Eigen::Ref<IntegerMatrix> word_topic, Eigen::Ref<IntegerMatrix> doc_topic,
     Eigen::Ref<IntegerVector> topic_counts,
     const Eigen::Ref<RealVector> &topic_word_prior) {
-  this->sync_topic(word_topic, doc_topic, topic_counts);
   this->decr_count(word_topic, doc_topic, topic_counts);
   Real eta_sum = topic_word_prior.sum();
   RealVector p_(n_topics_);
@@ -98,8 +98,7 @@ void LDATrainerBase::ChildWorker::do_work(
     size_t global_dix = global_indices[ws.doc_id];
 
     p_ = (word_topic_local.row(ws.word_id).cast<Real>().transpose().array() +
-          topic_word_prior.array())
-             .array() /
+          topic_word_prior(ws.word_id)) /
          (topic_counts_local.cast<Real>().array() + eta_sum) *
          (doc_topic_local.row(ws.doc_id).cast<Real>().transpose().array() +
           parent_->obtain_doc_topic_prior(global_dix).array());
@@ -111,6 +110,7 @@ void LDATrainerBase::ChildWorker::do_work(
     topic_counts_local(ws.topic_id)++;
   }
   this->add_count(word_topic, doc_topic, topic_counts);
+  this->epoch++;
 }
 
 RealMatrix LDATrainerBase::ChildWorker::obtain_phi(
@@ -126,7 +126,7 @@ RealMatrix LDATrainerBase::ChildWorker::obtain_phi(
     topic_counts_local(ws.topic_id)--;
     size_t global_dix = global_indices[ws.doc_id];
     p_ = (word_topic_local.row(ws.word_id).cast<Real>().transpose().array() +
-          topic_word_prior.array())
+          topic_word_prior(ws.word_id))
              .array() /
          (topic_counts_local.cast<Real>().array() + eta_sum) *
          (doc_topic_local.row(ws.doc_id).cast<Real>().transpose().array() +
diff --git a/cpp_sources/trainer_base.cpp b/cpp_sources/trainer_base.cpp
index 6c72dad..f12904f 100644
--- a/cpp_sources/trainer_base.cpp
+++ b/cpp_sources/trainer_base.cpp
@@ -21,7 +21,7 @@ LDATrainerBase::LDATrainerBase(Eigen::Ref<IntegerVector> counts,
 
   if (n_workers == 0) {
     throw std::invalid_argument("n_workers has to be strictly positive.");
-  } else if (n_workers == 1) {
+  } else if (n_workers == 1u) {
     for (size_t i = 0; i < n_; i++) {
       size_t c = counts(i);
       size_t dix = dixs(i);
@@ -43,7 +43,7 @@ LDATrainerBase::LDATrainerBase(Eigen::Ref<IntegerVector> counts,
       size_t assigned;
       if (dix_seen.find(dix) == dix_seen.end()) {
         assigned = std::floor(urand_.rand() * n_workers);
-        assigned = assigned < n_workers - 1 ? assigned : n_workers - 1;
+        assigned = assigned < (n_workers - 1) ? assigned : n_workers - 1;
         dix_seen.insert({dix, assigned});
         children[assigned]->add_doc(dix);
       } else {
@@ -92,8 +92,7 @@ void LDATrainerBase::iterate_gibbs(Eigen::Ref<RealVector> topic_word_prior,
       topic_counts(ws.topic_id)--;
 
       p_ = (word_topic.row(ws.word_id).cast<Real>().transpose().array() +
-            topic_word_prior.array())
-               .array() /
+            topic_word_prior(ws.word_id)) /
            (topic_counts.cast<Real>().array() + eta_sum) *
            (doc_topic.row(ws.doc_id).cast<Real>().transpose().array() +
             obtain_doc_topic_prior(ws.doc_id).array());
@@ -106,6 +105,9 @@ void LDATrainerBase::iterate_gibbs(Eigen::Ref<RealVector> topic_word_prior,
     }
   } else {
     std::vector<std::thread> workers;
+    for (auto &child : children) {
+      child->sync_topic(word_topic, doc_topic, topic_counts);
+    }
     for (auto &child : children) {
       workers.emplace_back([&child, &word_topic, &doc_topic, &topic_counts,
                             &topic_word_prior] {
@@ -135,7 +137,7 @@ LDATrainerBase::obtain_phi(const Eigen::Ref<RealVector> &topic_word_prior,
       word_topic(ws.word_id, ws.topic_id)--;
       topic_counts(ws.topic_id)--;
       p_ = (word_topic.row(ws.word_id).cast<Real>().transpose().array() +
-            topic_word_prior.array())
+            topic_word_prior(ws.word_id))
                .array() /
            (topic_counts.cast<Real>().array() + eta_sum) *
            (doc_topic.row(ws.doc_id).cast<Real>().transpose().array() +
diff --git a/cpp_sources/trainer_base.hpp b/cpp_sources/trainer_base.hpp
index bb819ce..b170b58 100644
--- a/cpp_sources/trainer_base.hpp
+++ b/cpp_sources/trainer_base.hpp
@@ -54,12 +54,15 @@ struct LDATrainerBase {
     UrandDevice urand_;
 
     std::vector<WordState> word_states_local;
+
+  private:
     IntegerMatrix doc_topic_local;
     IntegerMatrix word_topic_local;
     IntegerVector topic_counts_local;
 
     std::vector<size_t> global_indices;
     std::unordered_map<size_t, size_t> dix_to_internal_index;
+    int epoch;
   };
 
 protected:
diff --git a/tests/test_lda.py b/tests/test_lda.py
index a7cb1d7..9e52936 100644
--- a/tests/test_lda.py
+++ b/tests/test_lda.py
@@ -7,7 +7,14 @@
 
 def test_lda(docs_gen: Docs) -> None:
     (X1, _), true_theta = docs_gen.gen_doc(1000)
-    lda = LDA(2, n_iter=50, optimize_interval=1, optimize_burn_in=25, use_cgs_p=False)
+    lda = LDA(
+        2,
+        n_iter=50,
+        optimize_interval=1,
+        optimize_burn_in=25,
+        use_cgs_p=False,
+        n_workers=4,
+    )
     lda.fit(X1)
     phi1 = lda.phi
 
diff --git a/tests/test_llda.py b/tests/test_llda.py
index 6d74d30..2611770 100644
--- a/tests/test_llda.py
+++ b/tests/test_llda.py
@@ -43,7 +43,7 @@ def test_llda() -> None:
     A_word_index = np.where(TOPIC_A > 0.1)[0]
     B_word_index = np.where(TOPIC_A < 0.1)[0]
 
-    for A_index, cgs_p, n_threads in zip([1, 2], [False, True], [1, 2]):
+    for A_index, cgs_p in zip([1, 2], [False, True]):
         if A_index == 1:
             language = LabelledLanguage(TOPIC_A, TOPIC_B)
             B_index = 2
@@ -53,7 +53,7 @@ def test_llda() -> None:
 
         X, Y = language.gen_doc(1000)
 
-        llda = LabelledLDA(use_cgs_p=cgs_p, n_workers=n_threads).fit(X, Y)
+        llda = LabelledLDA(use_cgs_p=cgs_p, n_workers=1, n_iter=100).fit(X, Y)
         if sys.platform.startswith("linux"):
             with NamedTemporaryFile() as temp_fs:
                 pickle.dump(llda, temp_fs)
@@ -70,7 +70,7 @@ def test_llda() -> None:
 
         A_DOC = np.asarray(([0, 10, 0, 10]), dtype=np.int32)
         for mode in ["mf", "gibbs"]:
-            theta = llda_new.transform(A_DOC, mode=mode, n_workers=n_threads)[0]  # type: ignore
+            theta = llda_new.transform(A_DOC, mode=mode, n_workers=1)[0]  # type: ignore
             if A_index == 1:
                 assert (theta[1] / theta[2]) > 5
             else:

From 114de6d6e8c66ea8950e235f719e004c9b675caa Mon Sep 17 00:00:00 2001
From: Tomoki <tomoki.ohtsuki129@gmail.com>
Date: Tue, 11 Jan 2022 00:22:32 +0900
Subject: [PATCH 2/2] efficient test path

---
 tests/test_lda.py  | 2 +-
 tests/test_mlda.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/test_lda.py b/tests/test_lda.py
index 9e52936..6acdbaf 100644
--- a/tests/test_lda.py
+++ b/tests/test_lda.py
@@ -12,7 +12,7 @@ def test_lda(docs_gen: Docs) -> None:
         n_iter=50,
         optimize_interval=1,
         optimize_burn_in=25,
-        use_cgs_p=False,
+        use_cgs_p=True,
         n_workers=4,
     )
     lda.fit(X1)
diff --git a/tests/test_mlda.py b/tests/test_mlda.py
index 5ee03ce..fb0525e 100644
--- a/tests/test_mlda.py
+++ b/tests/test_mlda.py
@@ -9,7 +9,9 @@
 def test_mlda(docs_gen: Docs) -> None:
     (X1, X2), true_theta = docs_gen.gen_doc(1000)
     X2 = sps.lil_matrix(X2)
-    lda = MultilingualLDA(2, n_iter=50, optimize_interval=1, optimize_burn_in=25)
+    lda = MultilingualLDA(
+        2, n_iter=50, optimize_interval=1, optimize_burn_in=25, use_cgs_p=False
+    )
     lda.fit(X1, X2)
     phi1, phi2 = lda.phis