Skip to content

Commit

Permalink
Merge pull request #58 from yokomotod/tdidf-back-compat
Browse files Browse the repository at this point in the history
revert dummy tdidf.py for pickle backward compatibility
  • Loading branch information
Hi-king committed Jun 1, 2022
2 parents 7824381 + 43a373d commit 957aa02
Show file tree
Hide file tree
Showing 27 changed files with 39 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class DimensionReductionModel(object):
""" Reduce the dimension of vector values with respect to its importance.
The importance is calculated by sum of squared values.
"""

def __init__(self, dimension_size: int) -> None:
self.dimension_size = dimension_size
self.top_n_indices = None
Expand Down
1 change: 1 addition & 0 deletions redshells/contrib/model/early_stopping.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@


class EarlyStopping(object):

def __init__(self, try_count=1, learning_rate=0., decay_speed=2.0, threshold=0.001, save_directory: str = None):
self._save_path = os.path.join(save_directory, 'model.ckpt') if save_directory else None
self._try_count = try_count
Expand Down
2 changes: 2 additions & 0 deletions redshells/contrib/model/factorization_machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@


class FactorizationMachineGraph(object):

def __init__(self, input_size: int, feature_kind_size: int, embedding_size: int, l2_weight: float, learning_rate: float, scope_name: str = '') -> None:

with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE):
Expand Down Expand Up @@ -59,6 +60,7 @@ class FactorizationMachine(sklearn.base.BaseEstimator):
For details of the algorithm, see "Factorization Machines" which is available at https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf
"""

def __init__(self,
embedding_size: int,
l2_weight: float,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@


class FeatureAggregationSimilarityDataset(tf.keras.utils.Sequence):

def __init__(self,
x_item_indices: np.ndarray,
y_item_indices: np.ndarray,
Expand Down Expand Up @@ -71,6 +72,7 @@ def __len__(self):


class Average(tf.keras.layers.Layer):

def __init__(self, **kwargs):
super(Average, self).__init__(**kwargs)

Expand All @@ -94,6 +96,7 @@ def from_config(cls, config):


class Clip(tf.keras.layers.Layer):

def __init__(self, **kwargs):
super(Clip, self).__init__(**kwargs)

Expand All @@ -116,6 +119,7 @@ def from_config(cls, config):


class FeatureAggregationSimilarityGraph(object):

def __init__(self,
feature_size: int,
embedding_size: int,
Expand Down Expand Up @@ -165,6 +169,7 @@ def clip(x):


class FeatureAggregationSimilarityModel(object):

def __init__(
self,
embedding_size: int,
Expand Down
4 changes: 4 additions & 0 deletions redshells/contrib/model/gcmc_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@


class GcmcDataset(object):

def __init__(self,
user_ids: np.ndarray,
item_ids: np.ndarray,
Expand All @@ -28,6 +29,7 @@ def __init__(self,
self.item_features = item_features

def filter(self, user_ids: Set, item_ids: Set, ratings: Set):

def _is_valid(u, i, r):
return u in user_ids and i in item_ids and r in ratings

Expand All @@ -39,6 +41,7 @@ def _is_valid(u, i, r):


class GcmcIdMap(object):

def __init__(self,
ids: np.ndarray,
features: Optional[List[Dict[Any, np.ndarray]]] = None,
Expand Down Expand Up @@ -150,6 +153,7 @@ def _sort_features(cls, features: List[Dict[Any, np.ndarray]], order_map: Dict)


class GcmcGraphDataset(object):

def __init__(self, dataset: GcmcDataset, test_size: float, min_user_click_count: int = 0, max_user_click_count: int = sys.maxsize) -> None:
self._user = GcmcIdMap(dataset.user_ids, features=dataset.user_features, min_count=min_user_click_count, max_count=max_user_click_count)
self._item = GcmcIdMap(dataset.item_ids, features=dataset.item_features)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def _convert_sparse_matrix_to_sparse_tensor(x):


class GraphConvolutionalMatrixCompletionGraph(object):

def __init__(self,
n_rating: int,
n_user: int,
Expand Down Expand Up @@ -216,6 +217,7 @@ def _to_constant(x):


class GraphConvolutionalMatrixCompletion(object):

def __init__(self,
graph_dataset: GcmcGraphDataset,
encoder_hidden_size: int,
Expand Down
2 changes: 2 additions & 0 deletions redshells/contrib/model/matrix_factorization_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@


class MatrixFactorizationGraph(object):

def __init__(self, n_items: int, n_users: int, n_latent_factors: int, n_services: int, reg_item: float, reg_user: float, scope_name: str,
use_l2_upper_regularization: bool, average: float, standard_deviation: float) -> None:
# placeholder
Expand Down Expand Up @@ -91,6 +92,7 @@ def __init__(self, n_items: int, n_users: int, n_latent_factors: int, n_services


class MatrixFactorization(object):

def __init__(self,
n_latent_factors: int,
learning_rate: float,
Expand Down
1 change: 1 addition & 0 deletions redshells/factory/optuna_param_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def _catboostclassifier_default(trial: optuna.trial.Trial):


class _OptunaParamFactory(metaclass=Singleton):

def __init__(self):
self._rules = dict()
self._rules['XGBClassifier_default'] = _xgbclassifier_default
Expand Down
1 change: 1 addition & 0 deletions redshells/factory/prediction_model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@


class _PredictionModelFactory(metaclass=Singleton):

def __init__(self):
self._models = dict()
try:
Expand Down
1 change: 1 addition & 0 deletions redshells/model/lda_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
class LdaModel(object):
"""TopicModel is a kind of wrapper of LdaModel in gensim module.
"""

def __init__(self, n_topics: int, chunksize: int = 16, decay: float = 0.5, offset: int = 16, iterations: int = 3, eta: float = 1.0e-16) -> None:
self.n_topics = n_topics
self.chunksize = chunksize
Expand Down
1 change: 1 addition & 0 deletions redshells/model/scdv.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class SCDV(object):
See https://arxiv.org/pdf/1612.06778.pdf for details
"""

def __init__(self, documents: List[List[str]], cluster_size: int, sparsity_percentage: float, gaussian_mixture_kwargs: Dict[Any, Any],
dictionary: gensim.corpora.Dictionary, w2v: Union[FastText, Word2Vec]) -> None:
"""
Expand Down
3 changes: 3 additions & 0 deletions redshells/model/tdidf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# for pickle backward compatibility
# https://github.com/m3dev/redshells/pull/56#discussion_r711488588
from redshells.model.tfidf import Tfidf
1 change: 1 addition & 0 deletions redshells/model/tfidf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@


class Tfidf(object):

def __init__(self, dictionary: gensim.corpora.Dictionary, tokens: List[List[str]]) -> None:
self.dictionary = dictionary
self.tfidf = gensim.models.TfidfModel([dictionary.doc2bow(t) for t in tokens])
Expand Down
1 change: 1 addition & 0 deletions redshells/train/utils/token_iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


class TokenIterator(object):

def __init__(self, texts: List[str]) -> None:
self.texts = texts
self.i = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@


class TestFeatureAggregationSimilarityModel(unittest.TestCase):

def test(self):
model = FeatureAggregationSimilarityModel(embedding_size=7, learning_rate=0.001, feature_size=2, item_size=4, max_feature_index=5)
dataset = FeatureAggregationSimilarityDataset(x_item_indices=np.array([0, 1, 2]),
Expand Down
1 change: 1 addition & 0 deletions test/contrib/model/test_gcmc_graph_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def _make_sparse_matrix(n, m, n_values):


class TestGcmcGraphDataset(unittest.TestCase):

def test(self):
# This tests that GraphConvolutionalMatrixCompletion runs without error, and its loss and rmse are small enough.
n_users = 101
Expand Down
1 change: 1 addition & 0 deletions test/contrib/model/test_gcmc_graph_dataset_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@


class TestGcmcGraphDataset(unittest.TestCase):

def setUp(self) -> None:
dataset = GcmcDataset(user_ids=np.array([0, 1, 2]), item_ids=np.array([10, 11, 12]), ratings=np.array([100, 101, 102]))
self.graph_dataset = GcmcGraphDataset(dataset=dataset, test_size=0.1)
Expand Down
1 change: 1 addition & 0 deletions test/contrib/model/test_gcmc_id_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@


class TestGcmcIdMap(unittest.TestCase):

def test_initialize(self):
ids = np.array([0, 0, 1, 2, 3, 3])
features = [{0: np.array([0]), 1: np.array([1])}]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def _make_sparse_matrix(n, m, n_values):


class GraphConvolutionalMatrixCompletionTest(unittest.TestCase):

def test_run(self):
# This tests that GraphConvolutionalMatrixCompletion runs without error, and its loss and rmse are small enough.
n_users = 101
Expand Down
1 change: 1 addition & 0 deletions test/contrib/train/test_gcmc_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@


class TestGCMCDataset(unittest.TestCase):

def test_without_information(self):
user_ids = np.array([1, 1, 2, 2, 2])
item_ids = np.array([1, 2, 1, 2, 3])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class _DummyTask(gokart.TaskOnKart):


class TestTrainFeatureAggregationSimilarityModel(unittest.TestCase):

def setUp(self):
self.input_data = None
self.dump_data = None
Expand Down
1 change: 1 addition & 0 deletions test/model/test_tfidf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@


class TestTfidf(unittest.TestCase):

def test_apply_with_empty(self):
texts = [random.choices(string.ascii_letters, k=100) for _ in range(100)]
dictionary = gensim.corpora.Dictionary(texts)
Expand Down
1 change: 1 addition & 0 deletions test/train/test_train_doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class _DummyTask(luigi.Task):


class TrainDoc2VecTest(unittest.TestCase):

def setUp(self):
self.input_data = None
self.dump_data = None
Expand Down
1 change: 1 addition & 0 deletions test/train/test_train_fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class _DummyTask(gokart.TaskOnKart):


class TrainFastTextTest(unittest.TestCase):

def setUp(self):
self.input_data = None
self.dump_data = None
Expand Down
1 change: 1 addition & 0 deletions test/train/test_train_lda_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class _DummyTask(luigi.Task):


class TrainLdaModelTest(unittest.TestCase):

def setUp(self):
self.input_data = None
self.dump_data = None
Expand Down
1 change: 1 addition & 0 deletions test/train/test_train_pairwise_similarity_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class _DummyTask(luigi.Task):


class TrainPairwiseSimilarityModelTest(unittest.TestCase):

def setUp(self):
self.input_data = dict()
self.dump_data = None
Expand Down
1 change: 1 addition & 0 deletions test/train/test_train_word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class _DummyTask(luigi.Task):


class TrainWord2VecTest(unittest.TestCase):

def setUp(self):
self.input_data = None
self.dump_data = None
Expand Down

0 comments on commit 957aa02

Please sign in to comment.