Skip to content

Commit

Permalink
Merge branch 'release/v1.2.2'
Browse files Browse the repository at this point in the history
  • Loading branch information
dante.l committed Jul 29, 2022
2 parents faa7f8d + 054ef1d commit 0ff776f
Show file tree
Hide file tree
Showing 36 changed files with 856 additions and 308 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ dist/
*.egg-info/
Makefile
!docs/Makefile
version.py
49 changes: 41 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,6 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -fPIC -Wall -Wextra -fopenmp

include(GNUInstallDirs)

include_directories("./include")
include_directories("./3rd/json11")
include_directories("./3rd/spdlog/include")
include_directories("./3rd/n2/include")
include_directories("./3rd/eigen3")

set(SOURCES
"./3rd/json11/json11.cpp"
)
Expand All @@ -20,8 +14,16 @@ file(GLOB CFR_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/lib/algo_impl/cfr/*.cc)
file(GLOB BPR_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/lib/algo_impl/bpr/*.cc)
file(GLOB WARP_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/lib/algo_impl/warp/*.cc)
file(GLOB W2V_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/lib/algo_impl/w2v/*.cc)
add_library(cbuffalo SHARED ${SOURCES} ${ALGO_SRCS} ${ALS_SRCS} ${CFR_SRCS} ${BPR_SRCS} ${WARP_SRCS} ${W2V_SRCS} ${MISC_SRCS})
include_directories(cbuffalo "/usr/local/include/eigen3")
file(GLOB PLSI_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/lib/algo_impl/plsi/*.cc)
add_library(cbuffalo SHARED ${SOURCES} ${ALGO_SRCS} ${ALS_SRCS} ${CFR_SRCS} ${BPR_SRCS} ${WARP_SRCS} ${W2V_SRCS} ${PLSI_SRCS} ${MISC_SRCS})
target_include_directories(cbuffalo
PRIVATE
./include
./3rd/json11
./3rd/spdlog/include
./3rd/n2/include
./3rd/eigen3
)

set_target_properties(cbuffalo PROPERTIES VERSION 0.1.0)
set_target_properties(cbuffalo PROPERTIES SOVERSION 1)
Expand All @@ -34,3 +36,34 @@ install(DIRECTORY "${CMAKE_SOURCE_DIR}/include/buffalo" # source directory
FILES_MATCHING # install only matched files
PATTERN "*.hpp" # select header files
)

set(N2_DIR "./3rd/n2")
file(GLOB_RECURSE N2_SRC CONFIGURE_DEPENDS ${N2_DIR}/src/*.cc)
add_library(n2 SHARED ${N2_SRC})
target_compile_options(n2 PRIVATE
${OpenMP_CXX_FLAGS}
"-DBOOST_DISABLE_ASSERTS"
)
target_link_libraries(n2 PRIVATE pthread)
target_include_directories(n2
PRIVATE
${N2_DIR}/include/
${N2_DIR}/third_party/eigen/
${N2_DIR}/third_party/spdlog/include/
${N2_DIR}/third_party/boost/mpl/include/
${N2_DIR}/third_party/boost/bind/include/
${N2_DIR}/third_party/boost/core/include/
${N2_DIR}/third_party/boost/heap/include/
${N2_DIR}/third_party/boost/mp11/include/
${N2_DIR}/third_party/boost/assert/include/
${N2_DIR}/third_party/boost/config/include/
${N2_DIR}/third_party/boost/detail/include/
${N2_DIR}/third_party/boost/utility/include/
${N2_DIR}/third_party/boost/iterator/include/
${N2_DIR}/third_party/boost/parameter/include/
${N2_DIR}/third_party/boost/type_traits/include/
${N2_DIR}/third_party/boost/preprocessor/include/
${N2_DIR}/third_party/boost/concept_check/include/
${N2_DIR}/third_party/boost/static_assert/include/
${N2_DIR}/third_party/boost/throw_exception/include/
)
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
include cuda_setup.py
include CMakeLists.txt
include requirements.txt
include pyproject.toml
include tests/res/*.json
recursive-exclude buffalo/ *.cpp
recursive-include buffalo/ *.pyx
Expand Down
18 changes: 4 additions & 14 deletions NOTICE.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,16 +114,6 @@ Copyright 2005-2018, NumPy Developers.
BSD 3-Clause "New" or "Revised" License


**pathlib**

https://bitbucket.org/pitrou/pathlib

Copyright 2012 Antoine Pitrou


MIT License


**psutil**

https://github.com/giampaolo/psutil
Expand Down Expand Up @@ -164,14 +154,14 @@ Copyright 2007-2019 by the Sphinx team (see AUTHORS file).
BSD 2-Clause "Simplified" License


**TensorFlow**
**Tensorboard**

https://github.com/tensorflow/tensorflow
https://github.com/tensorflow/tensorboard

Copyright 2019 The TensorFlow Authors.
Copyright 2017 The TensorFlow Authors.


Apache License 2.0
Apache License 2.0


**tqdm**
Expand Down
4 changes: 2 additions & 2 deletions benchmark/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def filter_seen_items(_topk, seen, topk):
HIT += hit

# ndcg, map
idcg = idcgs[min(len(_gt), len(_topk)) - 1]
idcg = idcgs[min(len(_gt), topk) - 1]
dcg = 0.0
hit, miss, ap = 0.0, 0.0, 0.0

Expand All @@ -60,7 +60,7 @@ def filter_seen_items(_topk, seen, topk):

ndcg = dcg / idcg
NDCG += ndcg
ap /= min(len(_gt), len(_topk))
ap /= min(len(_gt), topk)
AP += ap
N += 1.0
AUC += auc
Expand Down
61 changes: 61 additions & 0 deletions buffalo/algo/_plsi.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# cython: experimental_cpp_class_def=True, language_level=3
# distutils: language=c++
# -*- coding: utf-8 -*-
import cython
import numpy as np
cimport numpy as np

from libcpp.string cimport string
from libcpp cimport bool as bool_t
from libc.stdint cimport int32_t, int64_t


cdef extern from "buffalo/algo_impl/plsi/plsi.hpp" namespace "plsi":
cdef cppclass CPLSI:
bool_t init(string) nogil except +
void release() nogil except +
void swap() nogil except +
void reset() nogil except +
void initialize_model(float*, int, float*, int) nogil except +
float partial_update(int, int, int64_t*, int32_t*, float*) nogil except +
void normalize(float, float) nogil except +


cdef class CyPLSI:
"""CPLSI object holder"""
cdef CPLSI* obj # C-PLSI object

def __cinit__(self):
self.obj = new CPLSI()

def __dealloc__(self):
self.obj.release()
del self.obj

def init(self, opt_path):
return self.obj.init(opt_path)

def swap(self):
self.obj.swap()

def release(self):
self.obj.release()

def reset(self):
self.obj.reset()

def initialize_model(self, np.ndarray[np.float32_t, ndim=2] P,
np.ndarray[np.float32_t, ndim=2] Q):
self.obj.initialize_model(&P[0, 0], P.shape[0],
&Q[0, 0], Q.shape[0])

def normalize(self, alpha1, alpha2):
self.obj.normalize(alpha1, alpha2)

@cython.boundscheck(False)
@cython.wraparound(False)
def partial_update(self, int start_x, int next_x,
np.ndarray[np.int64_t, ndim=1] indptr,
np.ndarray[np.int32_t, ndim=1] keys,
np.ndarray[np.float32_t, ndim=1] vals):
return self.obj.partial_update(start_x, next_x, &indptr[0], &keys[0], &vals[0])
4 changes: 2 additions & 2 deletions buffalo/algo/als.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from buffalo.algo.options import ALSOption
from buffalo.algo.optimize import Optimizable
from buffalo.data.buffered_data import BufferedDataMatrix
from buffalo.algo.base import Algo, Serializable, TensorboardExtention
from buffalo.algo.base import Algo, Serializable, TensorboardExtension

inited_CUALS = True
try:
Expand All @@ -22,7 +22,7 @@
inited_CUALS = False


class ALS(Algo, ALSOption, Evaluable, Serializable, Optimizable, TensorboardExtention):
class ALS(Algo, ALSOption, Evaluable, Serializable, Optimizable, TensorboardExtension):
"""Python implementation for C-ALS.
Implementation of Collaborative Filtering for Implicit Feedback datasets.
Expand Down
44 changes: 11 additions & 33 deletions buffalo/algo/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import abc
import json
import pickle
Expand All @@ -9,10 +8,7 @@
import datetime

import numpy as np
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
from tensorflow.keras.utils import Progbar
# what the...
import tensorboard as tb
import absl.logging
logging.root.removeHandler(absl.logging._absl_handler)
absl.logging._warn_preinit_stderr = False
Expand Down Expand Up @@ -331,18 +327,15 @@ def instantiate(cls, cls_opt, path, data_fields):
return c


class TensorboardExtention(object):
class TensorboardExtension(object):
@abc.abstractmethod
def get_evaluation_metrics(self):
raise NotImplementedError

def _get_initial_tensorboard_data(self):
tb = aux.Option({'summary_writer': None,
'name': None,
'metrics': {},
'feed_dict': {},
'merged_summary_op': None,
'session': None,
'metrics': [],
'pbar': None,
'data_root': None,
'step': 1})
Expand All @@ -352,41 +345,28 @@ def initialize_tensorboard(self, num_steps, name_prefix='', name_postfix='', met
if not self.opt.tensorboard:
if not hasattr(self, '_tb_setted'):
self.logger.debug('Cannot find tensorboard configuration.')
self.tb_setted = False
self._tb_setted = False
return
name = self.opt.tensorboard.name
name = name_prefix + name + name_postfix
dtm = datetime.datetime.now().strftime('%Y%m%d-%H.%M')
template = self.opt.tensorboard.get('name_template', '{name}.{dtm}')
self._tb = self._get_initial_tensorboard_data()
self._tb.name = template.format(name=name, dtm=dtm)
if not os.path.isdir(self.opt.tensorboard.root):
os.makedirs(self.opt.tensorboard.root)
os.makedirs(self.opt.tensorboard.root, exist_ok=True)
tb_dir = os.path.join(self.opt.tensorboard.root, self._tb.name)
self._tb.data_root = tb_dir
self._tb.summary_writer = tf.summary.FileWriter(tb_dir)
if not metrics:
metrics = self.get_evaluation_metrics()
for m in metrics:
self._tb.metrics[m] = tf.placeholder(tf.float32)
tf.summary.scalar(m, self._tb.metrics[m])
self._tb.feed_dict[self._tb.metrics[m]] = 0.0
self._tb.merged_summary_op = tf.summary.merge_all()
self._tb.session = tf.Session()
self._tb.pbar = Progbar(num_steps, stateful_metrics=self._tb.metrics, verbose=0)
self._tb.summary_writer = tb.summary.Writer(tb_dir)
self._tb.metrics = metrics if metrics is not None else self.get_evaluation_metrics()
self._tb_setted = True

def update_tensorboard_data(self, metrics):
if not self.opt.tensorboard:
return
metrics = [(m, np.float32(metrics.get(m, 0.0)))
for m in self._tb.metrics.keys()]
self._tb.feed_dict = {self._tb.metrics[k]: v
for k, v in metrics}
summary = self._tb.session.run(self._tb.merged_summary_op,
feed_dict=self._tb.feed_dict)
self._tb.summary_writer.add_summary(summary, self._tb.step)
self._tb.pbar.update(self._tb.step, metrics)
for m in self._tb.metrics:
v = metrics.get(m, 0.0)
self._tb.summary_writer.add_scalar(m, v, self._tb.step)
self._tb.summary_writer.flush()
self._tb.step += 1

def finalize_tensorboard(self):
Expand All @@ -395,6 +375,4 @@ def finalize_tensorboard(self):
with open(os.path.join(self._tb.data_root, 'opt.json'), 'w') as fout:
fout.write(json.dumps(self.opt, indent=2))
self._tb.summary_writer.close()
self._tb.session.close()
self._tb = None
tf.reset_default_graph()
14 changes: 7 additions & 7 deletions buffalo/algo/bpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from buffalo.algo.options import BPRMFOption
from buffalo.algo.optimize import Optimizable
from buffalo.data.buffered_data import BufferedDataMatrix
from buffalo.algo.base import Algo, Serializable, TensorboardExtention
from buffalo.algo.base import Algo, Serializable, TensorboardExtension

# TODO init structure of gpu modules will be abstracted to a higher module
inited_CUBPR = True
Expand All @@ -25,7 +25,7 @@
inited_CUBPR = False


class BPRMF(Algo, BPRMFOption, Evaluable, Serializable, Optimizable, TensorboardExtention):
class BPRMF(Algo, BPRMFOption, Evaluable, Serializable, Optimizable, TensorboardExtension):
"""Python implementation for C-BPRMF.
"""
def __init__(self, opt_path=None, *args, **kwargs):
Expand All @@ -41,7 +41,7 @@ def __init__(self, opt_path=None, *args, **kwargs):
self.opt, self.opt_path = self.get_option(opt_path)

if self.opt.accelerator and not inited_CUBPR:
self.logger.error(f"ImportError CuBPRMF, no cuda library exists.")
self.logger.error('ImportError CuBPRMF, no cuda library exists.')
raise RuntimeError()
self.obj = CuBPRMF() if self.opt.accelerator else CyBPRMF()

Expand Down Expand Up @@ -92,11 +92,11 @@ def init_factors(self):
for attr_name in ['P', 'Q', 'Qb']:
setattr(self, attr_name, None)
self.P = np.abs(np.random.normal(scale=1.0 / (self.opt.d ** 2),
size=(header['num_users'], self.opt.d)).astype("float32"), order='C')
size=(header['num_users'], self.opt.d)).astype('float32'), order='C')
self.Q = np.abs(np.random.normal(scale=1.0 / (self.opt.d ** 2),
size=(header['num_items'], self.opt.d)).astype("float32"), order='C')
size=(header['num_items'], self.opt.d)).astype('float32'), order='C')
self.Qb = np.abs(np.random.normal(scale=1.0 / (self.opt.d ** 2),
size=(header['num_items'], 1)).astype("float32"), order='C')
size=(header['num_items'], 1)).astype('float32'), order='C')
if not self.opt.use_bias:
self.Qb *= 0
self.obj.initialize_model(self.P, self.Q, self.Qb, self.num_nnz)
Expand Down Expand Up @@ -200,7 +200,7 @@ def compute_loss(self):
def _prepare_train(self):
if self.opt.accelerator:
vdim = self.obj.get_vdim()
for attr in ["P", "Q"]:
for attr in ['P', 'Q']:
F = getattr(self, attr)
if F.shape[1] < vdim:
_F = np.empty(shape=(F.shape[0], vdim), dtype=np.float32)
Expand Down
4 changes: 2 additions & 2 deletions buffalo/algo/cfr.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
from buffalo.algo.options import CFROption
from buffalo.algo.optimize import Optimizable
from buffalo.data.buffered_data import BufferedDataMatrix
from buffalo.algo.base import Algo, Serializable, TensorboardExtention
from buffalo.algo.base import Algo, Serializable, TensorboardExtension


class CFR(Algo, CFROption, Evaluable, Serializable, Optimizable, TensorboardExtention):
class CFR(Algo, CFROption, Evaluable, Serializable, Optimizable, TensorboardExtension):
"""Python implementation for CoFactor.
Reference: Factorization Meets the Item Embedding:
Expand Down

0 comments on commit 0ff776f

Please sign in to comment.