merge dev to master

p768lwy3 · Oct 21, 2019 · dd08963 · dd08963
2 parents 2a31cc5 + f03052b
commit dd08963
Show file tree

Hide file tree

Showing 38 changed files with 656 additions and 348 deletions.
diff --git a/.gitignore b/.gitignore
@@ -105,3 +105,6 @@ venv.bak/
 
 # dir
 logdir/
+
+# Untitled.ipynb
+Untitled.ipynb
diff --git a/requirements.txt b/requirements.txt
@@ -1,9 +1,9 @@
-matplotlib==3.1.1
-numpy==1.17.0
-pandas==0.25.0
-scipy==1.3.1
-scikit-learn==0.21.3
-sqlalchemy==1.3.6
+matplotlib>=3.1.1
+numpy>=1.17.0
+pandas>=0.25.0
+scipy>=1.3.1
+scikit-learn>=0.21.3
+sqlalchemy>=1.3.6
 tensorboard==1.14.0
 texttable==1.6.2
 torch==1.3.0+cpu

diff --git a/setup.py b/setup.py
@@ -13,7 +13,7 @@
     # Required: project name
     name    = "torecsys",
     # Required: tag
-    version = "0.0.5",
+    version = "0.0.5.dev1",
     # Optional: short description
     description="Pure PyTorch Recommender System Module",
     # Optional: long description
@@ -38,8 +38,24 @@
     keywords = "recommendationsystem machinelearning research",
     # Required: packages
     packages = find_packages(exclude=["contrib", "docs", "tests"]),
-    # Optional: install_required
-    install_required = ["torch"],
+    # Optional: install_requires
+    install_requires = [
+        "matplotlib>=3.1.1", 
+        "numpy>=1.17.0",
+        "pandas>=0.25.0",
+        "scipy>=1.3.1",
+        "scikit-learn>=0.21.3",
+        "sqlalchemy>=1.3.6",
+        "tensorboard==1.14.0",
+        "texttable>=1.6.2",
+        "torch==1.2.0",
+        "torchaudio==0.3.0",
+        "torchtext==0.4.0",
+        "torchvision==0.4.0",
+        "tqdm>=4.33.0"
+    ],
+    # Optional: python_requires,
+    python_requires  = ">=3.7",
     # Optional: extras_required 
     # extras_required = {},
     # Optional: extra project url

diff --git a/torecsys/__init__.py b/torecsys/__init__.py
@@ -11,5 +11,6 @@
 from .losses import *
 from .metrics import *
 from .models import *
+from .utils.training.ranking_trainer import RankingTrainer
 from .utils.training.trainer import Trainer
 import torecsys.utils
diff --git a/torecsys/data/negsampling/__init__.py b/torecsys/data/negsampling/__init__.py
@@ -2,37 +2,112 @@
 """
 
 import torch
+from typing import Dict
 
 class _NegativeSampler(object):
     r"""Base Class of Negative Sampler
     """
-    def __init__(self):
-        raise NotImplementedError("")
+    def __init__(self, 
+                 kwargs_dict: Dict[str, Dict[str, int]]):
+        r"""Initialize a Negative sampler which draw samples with uniform distribution with replacement
+        
+        Args:
+            kwargs_dict (Dict[str, Dict[str, int]]): A dictionary, where key is field's name and value, including low and high, 
+                is a dictionary, where key is name of argument and value is value of argument.
+        """
+        self.kwargs_dict = kwargs_dict
+        self.dict_size = {k: self._getlen(v) for k, v in kwargs_dict.items()}
+
+    def _getlen(self) -> int:
+        r"""Get length of field.
+        
+        Raises:
+            NotImplementedError: when the function `_getlen` is not implemented.
+        
+        Returns:
+            int: Length of field.
+        """
+        raise NotImplementedError("_getlen is not implemented in Base Class.")
 
-    def __len__(self) -> int:
+    def __len__(self) -> Dict[str, int]:
         r"""Return size of dictionary.
         
         Returns:
-            int: total number of words in dictionary
+            Dict[str, int]: A dictionary, where key is field's name and value is the total number of words in that field
         """
-        raise self.dict_size
+        return self.dict_size
 
-    def __call__(self, size: int) -> torch.Tensor:
-        return self.generate(size)
+    def size(self) -> Dict[str, int]:
+        r"""Return size of dictionary.
+        
+        Returns:
+            Dict[str, int]: A dictionary, where key is field's name and value is the total number of words in that field
+        """
+        return __len__()
 
-    def generate(self, size: int) -> torch.Tensor:
-        r"""Return drawn samples.
+    def __call__(self, *args, **kwargs) -> Dict[str, torch.Tensor]:
+        """Return drawn samples.
         
         Args:
-            size (int): Number of negative samples to be drawn
+            pos_samples (Dict[str, T]): A dictionary of positive samples, where key is field's name and value is 
+                the tensor of that field with shape = (N, 1) and dtype = torch.long.
+            size (int): An integer of number of negative samples to be generated.
+        
+        Returns:
+            Dict[str, T]: A dictionary of negative samples, where key is field's name and value is the tensor of 
+                that field with shape = (N * Nneg, 1) and dtype = torch.long.
+        """
+        return self.generate(*args, **kwargs)
+
+    def _generate(self) -> torch.Tensor:
+        """A function to generate negative samples.
         
         Raises:
-            NotImplementedError: not implementated in base class
+            NotImplementedError: when the function `_generate` is not implemented.
         
         Returns:
-            torch.Tensor, shape = (size, 1), dtype = torch.long: Drawn negative samples
+            T, shape = (N * Nneg, 1), dtype = torch.long: Tensor of negative samples generated by the given function.
         """
-        raise NotImplementedError("")
+        raise NotImplementedError("_generate is not implemented in Base Class.")
+
+    def generate(self, 
+                 pos_samples: Dict[str, torch.Tensor], 
+                 size: int) -> Dict[str, torch.Tensor]:
+        """Return drawn samples.
+        
+        Args:
+            pos_samples (Dict[str, T]): A dictionary of positive samples, where key is field's name and value is 
+                the tensor of that field with shape = (N, ...) and dtype = torch.long.
+            size (int): An integer of number of negative samples to be generated.
+        
+        Returns:
+            Dict[str, T]: A dictionary of negative samples, where key is field's name and value is the tensor of 
+                that field with shape = (N * Nneg, ...) and dtype = torch.long.
+        """
+        # Get field in sampler which is to replace by sampler,
+        keys = list(self.kwargs_dict.keys())
+
+        neg_samples = {}
+
+        for k, v in pos_samples.items():
+            if k in keys:
+                # Generate negative samples with sampler.
+                # Get batch size of field and calculate number of samples to be generated.
+                batch_size = v.size(0)
+                num_neg = size * batch_size
+
+                # Get arguments of the field to be called in _generate.
+                kwargs = self.kwargs_dict[k]
+                kwargs["size"] = num_neg
+
+                # Generate the negative samples.
+                neg_samples[k] = self._generate(**kwargs)
+
+            else:
+                # Repeat positive samples n (i.e. size) times.
+                neg_samples[k] = v.repeat(1, size).view(-1, 1)
+
+        return neg_samples
 
 from .multinomial_sampler import MultinomialSampler
 from .uniform_sampler import UniformSamplerWithoutReplacement

diff --git a/torecsys/data/negsampling/multinomial_sampler.py b/torecsys/data/negsampling/multinomial_sampler.py
@@ -1,34 +1,32 @@
 from . import _NegativeSampler
 import torch
-
+from typing import Dict
 
 class MultinomialSampler(_NegativeSampler):
     r"""MutlinomialSampler is to generate negative samplers by multinomial distribution, i.e. draw samples by given probabilities
     """
-    def __init__(self, 
-                 weights          : torch.Tensor,
-                 with_replacement : bool = True):
-        r"""Initialize a Negative sampler which draw samples with multinomial distribution
+    @staticmethod
+    def _getlen(v: Dict[str, int]) -> int:
+        r"""Get length of field.
         
-        Args:
-            weights (torch.Tensor): weights (probabilities) to draw samples, with shape = (total number of words in dictionary, ).
-            with_replacement (bool, optional): boolean flag to control the replacement of sampling. Defaults to True.
+        Returns:
+            int: Length of field.
         """
-        self.with_replacement = with_replacement
-        if isinstance(weights, torch.Tensor):
-            self.weights = weights
-        else:
-            self.weights = torch.Tensor(weights)
-        self.dict_size = len(self.weights)
+        return len(v["weights"])
 
-    def generate(self, size: int) -> torch.Tensor:
-        r"""Return drawn samples.
+    def _generate(self, 
+                  weights          : torch.Tensor,
+                  with_replacement : bool,
+                  size             : int) -> torch.Tensor:
+        """A function to generate negative samples with multinomial distribution.
         
         Args:
-            size (int): Number of negative samples to be drawn
+            weights (torch.Tensor): the input tensor containing probabilities
+            with_replacement (bool): whether to draw with replacement or not
+            size (int): number of samples to draw
         
         Returns:
-            torch.Tensor, shape = (size, 1), dtype = torch.long: Drawn negative samples
+            T, shape = (N * Nneg, 1), dtype = torch.long: Tensor of negative samples generated by multinomial distribution.
         """
         samples = torch.multinomial(self.weights, size, replacement=self.with_replacement)
         return samples.long()
diff --git a/torecsys/data/negsampling/negative_sampler.py b/torecsys/data/negsampling/negative_sampler.py
diff --git a/torecsys/data/negsampling/uniform_sampler.py b/torecsys/data/negsampling/uniform_sampler.py
@@ -1,67 +1,62 @@
 from . import _NegativeSampler
 import torch
-
+from typing import Dict
 
 class UniformSamplerWithReplacement(_NegativeSampler):
     r"""UniformSamplerWithReplacement is to generate negative samplers by uniform distribution with replacement, i.e. draw samples uniformlly with replacement
     """
-    def __init__(self, 
-                 low  : int, 
-                 high : int):
-        r"""Initialize a Negative sampler which draw samples with uniform distribution with replacement
+    @staticmethod
+    def _getlen(v: Dict[str, int]) -> int:
+        r"""Get length of field.
         
-        Args:
-            low (int): minimum value (i.e. lower bound) of sampling id.
-            high (int): maximum value (i.e. upper bound) of sampling id.
+        Returns:
+            int: Length of field.
         """
-        self.low = low
-        self.high = high
-        self.dict_size = self.high - self.low
-
-    def generate(self, size: int) -> torch.Tensor:
-        r"""Return drawn samples.
+        return v["high"] - v["low"]
+
+    @staticmethod
+    def _generate(low  : int, 
+                  high : int, 
+                  size : int) -> torch.Tensor:
+        r"""A function to generate negative samples with uniform distribution with replacement.
         
         Args:
-            size (int): Number of negative samples to be drawn
+            low (int): Lowest integer to be drawn from the distribution.
+            high (int): One above the highest integer to be drawn from the distribution.
+            size (int): An integer defining the shape of the output tensor.
         
         Returns:
-            torch.Tensor, shape = (size, 1), dtype = torch.long: Drawn negative samples
+            T, shape = (N * Nneg, 1), dtype = torch.long: Tensor of negative samples generated by uniform distribution.
         """
-        return torch.randint(low=self.low, high=self.high, size=(size, )).long()
+        return torch.randint(low=low, high=high, size=(size, 1)).long()
 
 
 class UniformSamplerWithoutReplacement(_NegativeSampler):
     r"""UniformSamplerWithReplacement is to generate negative samplers by uniform distribution without replacement, i.e. draw samples uniformlly without replacement
     """
-    def __init__(self, 
-                 low  : int, 
-                 high : int):
-        r"""Initialize a Negative sampler which draw samples with uniform distribution without replacement
+    @staticmethod
+    def _getlen(v: Dict[str, int]) -> int:
+        r"""Get length of field.
+        
+        Returns:
+            int: Length of field.
+        """
+        return v["high"] - v["low"]
 
+    @staticmethod
+    def _generate(low  : int, 
+                  high : int, 
+                  size : int) -> torch.Tensor:
+        """A function to generate negative samples with uniform distribution without replacement.
+        
         Args:
             low (int): minimum value (i.e. lower bound) of sampling id.
             high (int): maximum value (i.e. upper bound) of sampling id.
-        """
-        self.low = low
-        self.high = high
-        self.dict_size = self.high - self.low
-
-    def generate(self, size: int) -> torch.Tensor:
-        r"""Generate negative samples by the sampler
-        
-        Args:
-            size (int): Number of negative samples to be drawn
-        
-        Raises:
-            ValueError: if input size is larger than the size of dictionary (i.e. high - low)
+            size (int): An integer of defining the shape of the output tensor.
         
         Returns:
-            torch.Tensor, shape = (size, 1), dtype = torch.long: Drawn negative samples
+            T, shape = (N * Nneg, 1), dtype = torch.long: Tensor of negative samples generated by uniform distribution.
         """
-
-        if size >= (self.high - self.low):
-            raise ValueError("input size cannot be larger than size of samples.")
-
-        samples = torch.randperm(n=self.high) + self.low
+        samples = torch.randperm(n=high) + low
         samples = samples[:size]
-        return samples.long()
+        return samples.view(-1, 1)
diff --git a/torecsys/data/sampledata/download_data.py b/torecsys/data/sampledata/download_data.py
@@ -3,9 +3,13 @@
 import os
 from pathlib import Path
 import requests
-from tqdm.autonotebook import tqdm
+import warnings
 import zipfile
 
+# ignore import warnings of the below packages
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore")
+    from tqdm.autonotebook import tqdm
 
 def download_ml_data(size : str,
                      dir  : str = None):