Merge branch 'release/v1.2.2'

kakao · Jul 29, 2022 · 0ff776f · 0ff776f
2 parents faa7f8d + 054ef1d
commit 0ff776f
Show file tree

Hide file tree

Showing 36 changed files with 856 additions and 308 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,4 @@ dist/
 *.egg-info/
 Makefile
 !docs/Makefile
+version.py
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -4,12 +4,6 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -fPIC -Wall -Wextra -fopenmp
 
 include(GNUInstallDirs)
 
-include_directories("./include")
-include_directories("./3rd/json11")
-include_directories("./3rd/spdlog/include")
-include_directories("./3rd/n2/include")
-include_directories("./3rd/eigen3")
-
 set(SOURCES
     "./3rd/json11/json11.cpp"
 )
@@ -20,8 +14,16 @@ file(GLOB CFR_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/lib/algo_impl/cfr/*.cc)
 file(GLOB BPR_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/lib/algo_impl/bpr/*.cc)
 file(GLOB WARP_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/lib/algo_impl/warp/*.cc)
 file(GLOB W2V_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/lib/algo_impl/w2v/*.cc)
-add_library(cbuffalo SHARED ${SOURCES} ${ALGO_SRCS} ${ALS_SRCS} ${CFR_SRCS} ${BPR_SRCS} ${WARP_SRCS} ${W2V_SRCS} ${MISC_SRCS})
-include_directories(cbuffalo "/usr/local/include/eigen3")
+file(GLOB PLSI_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/lib/algo_impl/plsi/*.cc)
+add_library(cbuffalo SHARED ${SOURCES} ${ALGO_SRCS} ${ALS_SRCS} ${CFR_SRCS} ${BPR_SRCS} ${WARP_SRCS} ${W2V_SRCS} ${PLSI_SRCS} ${MISC_SRCS})
+target_include_directories(cbuffalo
+PRIVATE
+    ./include
+    ./3rd/json11
+    ./3rd/spdlog/include
+    ./3rd/n2/include
+    ./3rd/eigen3
+)
 
 set_target_properties(cbuffalo PROPERTIES VERSION 0.1.0)
 set_target_properties(cbuffalo PROPERTIES SOVERSION 1)
@@ -34,3 +36,34 @@ install(DIRECTORY "${CMAKE_SOURCE_DIR}/include/buffalo" # source directory
         FILES_MATCHING # install only matched files
         PATTERN "*.hpp" # select header files
 )
+
+set(N2_DIR "./3rd/n2")
+file(GLOB_RECURSE N2_SRC CONFIGURE_DEPENDS ${N2_DIR}/src/*.cc)
+add_library(n2 SHARED ${N2_SRC})
+target_compile_options(n2 PRIVATE
+    ${OpenMP_CXX_FLAGS}
+    "-DBOOST_DISABLE_ASSERTS"
+)
+target_link_libraries(n2 PRIVATE pthread)
+target_include_directories(n2
+PRIVATE
+    ${N2_DIR}/include/
+    ${N2_DIR}/third_party/eigen/
+    ${N2_DIR}/third_party/spdlog/include/
+    ${N2_DIR}/third_party/boost/mpl/include/
+    ${N2_DIR}/third_party/boost/bind/include/
+    ${N2_DIR}/third_party/boost/core/include/
+    ${N2_DIR}/third_party/boost/heap/include/
+    ${N2_DIR}/third_party/boost/mp11/include/
+    ${N2_DIR}/third_party/boost/assert/include/
+    ${N2_DIR}/third_party/boost/config/include/
+    ${N2_DIR}/third_party/boost/detail/include/
+    ${N2_DIR}/third_party/boost/utility/include/
+    ${N2_DIR}/third_party/boost/iterator/include/
+    ${N2_DIR}/third_party/boost/parameter/include/
+    ${N2_DIR}/third_party/boost/type_traits/include/
+    ${N2_DIR}/third_party/boost/preprocessor/include/
+    ${N2_DIR}/third_party/boost/concept_check/include/
+    ${N2_DIR}/third_party/boost/static_assert/include/
+    ${N2_DIR}/third_party/boost/throw_exception/include/
+)
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,6 +1,7 @@
 include cuda_setup.py
 include CMakeLists.txt
 include requirements.txt
+include pyproject.toml
 include tests/res/*.json
 recursive-exclude buffalo/ *.cpp
 recursive-include buffalo/ *.pyx

diff --git a/NOTICE.md b/NOTICE.md
@@ -114,16 +114,6 @@ Copyright 2005-2018, NumPy Developers.
 BSD 3-Clause "New" or "Revised" License 
 
 
- **pathlib**
-
-https://bitbucket.org/pitrou/pathlib
-
-Copyright 2012 Antoine Pitrou
-
-
-MIT License 
-
-
  **psutil**
 
 https://github.com/giampaolo/psutil
@@ -164,14 +154,14 @@ Copyright 2007-2019 by the Sphinx team (see AUTHORS file).
 BSD 2-Clause "Simplified" License 
 
 
- **TensorFlow**
+ **Tensorboard**
 
-https://github.com/tensorflow/tensorflow
+https://github.com/tensorflow/tensorboard
 
-Copyright 2019 The TensorFlow Authors.
+Copyright 2017 The TensorFlow Authors.
 
 
-Apache License 2.0 
+Apache License 2.0
 
 
  **tqdm**

diff --git a/benchmark/evaluate.py b/benchmark/evaluate.py
@@ -38,7 +38,7 @@ def filter_seen_items(_topk, seen, topk):
         HIT += hit
 
         # ndcg, map
-        idcg = idcgs[min(len(_gt), len(_topk)) - 1]
+        idcg = idcgs[min(len(_gt), topk) - 1]
         dcg = 0.0
         hit, miss, ap = 0.0, 0.0, 0.0
 
@@ -60,7 +60,7 @@ def filter_seen_items(_topk, seen, topk):
 
         ndcg = dcg / idcg
         NDCG += ndcg
-        ap /= min(len(_gt), len(_topk))
+        ap /= min(len(_gt), topk)
         AP += ap
         N += 1.0
         AUC += auc

diff --git a/buffalo/algo/_plsi.pyx b/buffalo/algo/_plsi.pyx
@@ -0,0 +1,61 @@
+# cython: experimental_cpp_class_def=True, language_level=3
+# distutils: language=c++
+# -*- coding: utf-8 -*-
+import cython
+import numpy as np
+cimport numpy as np
+
+from libcpp.string cimport string
+from libcpp cimport bool as bool_t
+from libc.stdint cimport int32_t, int64_t
+
+
+cdef extern from "buffalo/algo_impl/plsi/plsi.hpp" namespace "plsi":
+    cdef cppclass CPLSI:
+        bool_t init(string) nogil except +
+        void release() nogil except +
+        void swap() nogil except +
+        void reset() nogil except +
+        void initialize_model(float*, int, float*, int) nogil except +
+        float partial_update(int, int, int64_t*, int32_t*, float*) nogil except +
+        void normalize(float, float) nogil except +
+
+
+cdef class CyPLSI:
+    """CPLSI object holder"""
+    cdef CPLSI* obj  # C-PLSI object
+
+    def __cinit__(self):
+        self.obj = new CPLSI()
+
+    def __dealloc__(self):
+        self.obj.release()
+        del self.obj
+
+    def init(self, opt_path):
+        return self.obj.init(opt_path)
+
+    def swap(self):
+        self.obj.swap()
+
+    def release(self):
+        self.obj.release()
+
+    def reset(self):
+        self.obj.reset()
+
+    def initialize_model(self, np.ndarray[np.float32_t, ndim=2] P,
+                         np.ndarray[np.float32_t, ndim=2] Q):
+        self.obj.initialize_model(&P[0, 0], P.shape[0],
+                                  &Q[0, 0], Q.shape[0])
+
+    def normalize(self, alpha1, alpha2):
+        self.obj.normalize(alpha1, alpha2)
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def partial_update(self, int start_x, int next_x,
+                       np.ndarray[np.int64_t, ndim=1] indptr,
+                       np.ndarray[np.int32_t, ndim=1] keys,
+                       np.ndarray[np.float32_t, ndim=1] vals):
+        return self.obj.partial_update(start_x, next_x, &indptr[0], &keys[0], &vals[0])
diff --git a/buffalo/algo/als.py b/buffalo/algo/als.py
@@ -13,7 +13,7 @@
 from buffalo.algo.options import ALSOption
 from buffalo.algo.optimize import Optimizable
 from buffalo.data.buffered_data import BufferedDataMatrix
-from buffalo.algo.base import Algo, Serializable, TensorboardExtention
+from buffalo.algo.base import Algo, Serializable, TensorboardExtension
 
 inited_CUALS = True
 try:
@@ -22,7 +22,7 @@
     inited_CUALS = False
 
 
-class ALS(Algo, ALSOption, Evaluable, Serializable, Optimizable, TensorboardExtention):
+class ALS(Algo, ALSOption, Evaluable, Serializable, Optimizable, TensorboardExtension):
     """Python implementation for C-ALS.
 
     Implementation of Collaborative Filtering for Implicit Feedback datasets.

diff --git a/buffalo/algo/base.py b/buffalo/algo/base.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 import os
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 import abc
 import json
 import pickle
@@ -9,10 +8,7 @@
 import datetime
 
 import numpy as np
-import tensorflow as tf
-tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
-from tensorflow.keras.utils import Progbar
-# what the...
+import tensorboard as tb
 import absl.logging
 logging.root.removeHandler(absl.logging._absl_handler)
 absl.logging._warn_preinit_stderr = False
@@ -331,18 +327,15 @@ def instantiate(cls, cls_opt, path, data_fields):
         return c
 
 
-class TensorboardExtention(object):
+class TensorboardExtension(object):
     @abc.abstractmethod
     def get_evaluation_metrics(self):
         raise NotImplementedError
 
     def _get_initial_tensorboard_data(self):
         tb = aux.Option({'summary_writer': None,
                          'name': None,
-                         'metrics': {},
-                         'feed_dict': {},
-                         'merged_summary_op': None,
-                         'session': None,
+                         'metrics': [],
                          'pbar': None,
                          'data_root': None,
                          'step': 1})
@@ -352,41 +345,28 @@ def initialize_tensorboard(self, num_steps, name_prefix='', name_postfix='', met
         if not self.opt.tensorboard:
             if not hasattr(self, '_tb_setted'):
                 self.logger.debug('Cannot find tensorboard configuration.')
-            self.tb_setted = False
+            self._tb_setted = False
             return
         name = self.opt.tensorboard.name
         name = name_prefix + name + name_postfix
         dtm = datetime.datetime.now().strftime('%Y%m%d-%H.%M')
         template = self.opt.tensorboard.get('name_template', '{name}.{dtm}')
         self._tb = self._get_initial_tensorboard_data()
         self._tb.name = template.format(name=name, dtm=dtm)
-        if not os.path.isdir(self.opt.tensorboard.root):
-            os.makedirs(self.opt.tensorboard.root)
+        os.makedirs(self.opt.tensorboard.root, exist_ok=True)
         tb_dir = os.path.join(self.opt.tensorboard.root, self._tb.name)
         self._tb.data_root = tb_dir
-        self._tb.summary_writer = tf.summary.FileWriter(tb_dir)
-        if not metrics:
-            metrics = self.get_evaluation_metrics()
-        for m in metrics:
-            self._tb.metrics[m] = tf.placeholder(tf.float32)
-            tf.summary.scalar(m, self._tb.metrics[m])
-            self._tb.feed_dict[self._tb.metrics[m]] = 0.0
-        self._tb.merged_summary_op = tf.summary.merge_all()
-        self._tb.session = tf.Session()
-        self._tb.pbar = Progbar(num_steps, stateful_metrics=self._tb.metrics, verbose=0)
+        self._tb.summary_writer = tb.summary.Writer(tb_dir)
+        self._tb.metrics = metrics if metrics is not None else self.get_evaluation_metrics()
         self._tb_setted = True
 
     def update_tensorboard_data(self, metrics):
         if not self.opt.tensorboard:
             return
-        metrics = [(m, np.float32(metrics.get(m, 0.0)))
-                   for m in self._tb.metrics.keys()]
-        self._tb.feed_dict = {self._tb.metrics[k]: v
-                              for k, v in metrics}
-        summary = self._tb.session.run(self._tb.merged_summary_op,
-                                       feed_dict=self._tb.feed_dict)
-        self._tb.summary_writer.add_summary(summary, self._tb.step)
-        self._tb.pbar.update(self._tb.step, metrics)
+        for m in self._tb.metrics:
+            v = metrics.get(m, 0.0)
+            self._tb.summary_writer.add_scalar(m, v, self._tb.step)
+        self._tb.summary_writer.flush()
         self._tb.step += 1
 
     def finalize_tensorboard(self):
@@ -395,6 +375,4 @@ def finalize_tensorboard(self):
         with open(os.path.join(self._tb.data_root, 'opt.json'), 'w') as fout:
             fout.write(json.dumps(self.opt, indent=2))
         self._tb.summary_writer.close()
-        self._tb.session.close()
         self._tb = None
-        tf.reset_default_graph()
diff --git a/buffalo/algo/bpr.py b/buffalo/algo/bpr.py
@@ -15,7 +15,7 @@
 from buffalo.algo.options import BPRMFOption
 from buffalo.algo.optimize import Optimizable
 from buffalo.data.buffered_data import BufferedDataMatrix
-from buffalo.algo.base import Algo, Serializable, TensorboardExtention
+from buffalo.algo.base import Algo, Serializable, TensorboardExtension
 
 # TODO init structure of gpu modules will be abstracted to a higher module
 inited_CUBPR = True
@@ -25,7 +25,7 @@
     inited_CUBPR = False
 
 
-class BPRMF(Algo, BPRMFOption, Evaluable, Serializable, Optimizable, TensorboardExtention):
+class BPRMF(Algo, BPRMFOption, Evaluable, Serializable, Optimizable, TensorboardExtension):
     """Python implementation for C-BPRMF.
     """
     def __init__(self, opt_path=None, *args, **kwargs):
@@ -41,7 +41,7 @@ def __init__(self, opt_path=None, *args, **kwargs):
         self.opt, self.opt_path = self.get_option(opt_path)
 
         if self.opt.accelerator and not inited_CUBPR:
-            self.logger.error(f"ImportError CuBPRMF, no cuda library exists.")
+            self.logger.error('ImportError CuBPRMF, no cuda library exists.')
             raise RuntimeError()
         self.obj = CuBPRMF() if self.opt.accelerator else CyBPRMF()
 
@@ -92,11 +92,11 @@ def init_factors(self):
         for attr_name in ['P', 'Q', 'Qb']:
             setattr(self, attr_name, None)
         self.P = np.abs(np.random.normal(scale=1.0 / (self.opt.d ** 2),
-                                         size=(header['num_users'], self.opt.d)).astype("float32"), order='C')
+                                         size=(header['num_users'], self.opt.d)).astype('float32'), order='C')
         self.Q = np.abs(np.random.normal(scale=1.0 / (self.opt.d ** 2),
-                                         size=(header['num_items'], self.opt.d)).astype("float32"), order='C')
+                                         size=(header['num_items'], self.opt.d)).astype('float32'), order='C')
         self.Qb = np.abs(np.random.normal(scale=1.0 / (self.opt.d ** 2),
-                                          size=(header['num_items'], 1)).astype("float32"), order='C')
+                                          size=(header['num_items'], 1)).astype('float32'), order='C')
         if not self.opt.use_bias:
             self.Qb *= 0
         self.obj.initialize_model(self.P, self.Q, self.Qb, self.num_nnz)
@@ -200,7 +200,7 @@ def compute_loss(self):
     def _prepare_train(self):
         if self.opt.accelerator:
             vdim = self.obj.get_vdim()
-            for attr in ["P", "Q"]:
+            for attr in ['P', 'Q']:
                 F = getattr(self, attr)
                 if F.shape[1] < vdim:
                     _F = np.empty(shape=(F.shape[0], vdim), dtype=np.float32)

diff --git a/buffalo/algo/cfr.py b/buffalo/algo/cfr.py
@@ -14,10 +14,10 @@
 from buffalo.algo.options import CFROption
 from buffalo.algo.optimize import Optimizable
 from buffalo.data.buffered_data import BufferedDataMatrix
-from buffalo.algo.base import Algo, Serializable, TensorboardExtention
+from buffalo.algo.base import Algo, Serializable, TensorboardExtension
 
 
-class CFR(Algo, CFROption, Evaluable, Serializable, Optimizable, TensorboardExtention):
+class CFR(Algo, CFROption, Evaluable, Serializable, Optimizable, TensorboardExtension):
     """Python implementation for CoFactor.
 
     Reference: Factorization Meets the Item Embedding: