Skip to content

Commit

Permalink
merge dev to master
Browse files Browse the repository at this point in the history
  • Loading branch information
jasper430 committed Oct 21, 2019
2 parents 2a31cc5 + f03052b commit dd08963
Show file tree
Hide file tree
Showing 38 changed files with 656 additions and 348 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,6 @@ venv.bak/

# dir
logdir/

# Untitled.ipynb
Untitled.ipynb
12 changes: 6 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
matplotlib==3.1.1
numpy==1.17.0
pandas==0.25.0
scipy==1.3.1
scikit-learn==0.21.3
sqlalchemy==1.3.6
matplotlib>=3.1.1
numpy>=1.17.0
pandas>=0.25.0
scipy>=1.3.1
scikit-learn>=0.21.3
sqlalchemy>=1.3.6
tensorboard==1.14.0
texttable==1.6.2
torch==1.3.0+cpu
Expand Down
22 changes: 19 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# Required: project name
name = "torecsys",
# Required: tag
version = "0.0.5",
version = "0.0.5.dev1",
# Optional: short description
description="Pure PyTorch Recommender System Module",
# Optional: long description
Expand All @@ -38,8 +38,24 @@
keywords = "recommendationsystem machinelearning research",
# Required: packages
packages = find_packages(exclude=["contrib", "docs", "tests"]),
# Optional: install_required
install_required = ["torch"],
# Optional: install_requires
install_requires = [
"matplotlib>=3.1.1",
"numpy>=1.17.0",
"pandas>=0.25.0",
"scipy>=1.3.1",
"scikit-learn>=0.21.3",
"sqlalchemy>=1.3.6",
"tensorboard==1.14.0",
"texttable>=1.6.2",
"torch==1.2.0",
"torchaudio==0.3.0",
"torchtext==0.4.0",
"torchvision==0.4.0",
"tqdm>=4.33.0"
],
# Optional: python_requires,
python_requires = ">=3.7",
# Optional: extras_required
# extras_required = {},
# Optional: extra project url
Expand Down
1 change: 1 addition & 0 deletions torecsys/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@
from .losses import *
from .metrics import *
from .models import *
from .utils.training.ranking_trainer import RankingTrainer
from .utils.training.trainer import Trainer
import torecsys.utils
101 changes: 88 additions & 13 deletions torecsys/data/negsampling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,112 @@
"""

import torch
from typing import Dict

class _NegativeSampler(object):
r"""Base Class of Negative Sampler
"""
def __init__(self):
raise NotImplementedError("")
def __init__(self,
kwargs_dict: Dict[str, Dict[str, int]]):
r"""Initialize a Negative sampler which draw samples with uniform distribution with replacement
Args:
kwargs_dict (Dict[str, Dict[str, int]]): A dictionary, where key is field's name and value, including low and high,
is a dictionary, where key is name of argument and value is value of argument.
"""
self.kwargs_dict = kwargs_dict
self.dict_size = {k: self._getlen(v) for k, v in kwargs_dict.items()}

def _getlen(self) -> int:
r"""Get length of field.
Raises:
NotImplementedError: when the function `_getlen` is not implemented.
Returns:
int: Length of field.
"""
raise NotImplementedError("_getlen is not implemented in Base Class.")

def __len__(self) -> int:
def __len__(self) -> Dict[str, int]:
r"""Return size of dictionary.
Returns:
int: total number of words in dictionary
Dict[str, int]: A dictionary, where key is field's name and value is the total number of words in that field
"""
raise self.dict_size
return self.dict_size

def __call__(self, size: int) -> torch.Tensor:
return self.generate(size)
def size(self) -> Dict[str, int]:
r"""Return size of dictionary.
Returns:
Dict[str, int]: A dictionary, where key is field's name and value is the total number of words in that field
"""
return __len__()

def generate(self, size: int) -> torch.Tensor:
r"""Return drawn samples.
def __call__(self, *args, **kwargs) -> Dict[str, torch.Tensor]:
"""Return drawn samples.
Args:
size (int): Number of negative samples to be drawn
pos_samples (Dict[str, T]): A dictionary of positive samples, where key is field's name and value is
the tensor of that field with shape = (N, 1) and dtype = torch.long.
size (int): An integer of number of negative samples to be generated.
Returns:
Dict[str, T]: A dictionary of negative samples, where key is field's name and value is the tensor of
that field with shape = (N * Nneg, 1) and dtype = torch.long.
"""
return self.generate(*args, **kwargs)

def _generate(self) -> torch.Tensor:
"""A function to generate negative samples.
Raises:
NotImplementedError: not implementated in base class
NotImplementedError: when the function `_generate` is not implemented.
Returns:
torch.Tensor, shape = (size, 1), dtype = torch.long: Drawn negative samples
T, shape = (N * Nneg, 1), dtype = torch.long: Tensor of negative samples generated by the given function.
"""
raise NotImplementedError("")
raise NotImplementedError("_generate is not implemented in Base Class.")

def generate(self,
pos_samples: Dict[str, torch.Tensor],
size: int) -> Dict[str, torch.Tensor]:
"""Return drawn samples.
Args:
pos_samples (Dict[str, T]): A dictionary of positive samples, where key is field's name and value is
the tensor of that field with shape = (N, ...) and dtype = torch.long.
size (int): An integer of number of negative samples to be generated.
Returns:
Dict[str, T]: A dictionary of negative samples, where key is field's name and value is the tensor of
that field with shape = (N * Nneg, ...) and dtype = torch.long.
"""
# Get field in sampler which is to replace by sampler,
keys = list(self.kwargs_dict.keys())

neg_samples = {}

for k, v in pos_samples.items():
if k in keys:
# Generate negative samples with sampler.
# Get batch size of field and calculate number of samples to be generated.
batch_size = v.size(0)
num_neg = size * batch_size

# Get arguments of the field to be called in _generate.
kwargs = self.kwargs_dict[k]
kwargs["size"] = num_neg

# Generate the negative samples.
neg_samples[k] = self._generate(**kwargs)

else:
# Repeat positive samples n (i.e. size) times.
neg_samples[k] = v.repeat(1, size).view(-1, 1)

return neg_samples

from .multinomial_sampler import MultinomialSampler
from .uniform_sampler import UniformSamplerWithoutReplacement
Expand Down
34 changes: 16 additions & 18 deletions torecsys/data/negsampling/multinomial_sampler.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,32 @@
from . import _NegativeSampler
import torch

from typing import Dict

class MultinomialSampler(_NegativeSampler):
r"""MutlinomialSampler is to generate negative samplers by multinomial distribution, i.e. draw samples by given probabilities
"""
def __init__(self,
weights : torch.Tensor,
with_replacement : bool = True):
r"""Initialize a Negative sampler which draw samples with multinomial distribution
@staticmethod
def _getlen(v: Dict[str, int]) -> int:
r"""Get length of field.
Args:
weights (torch.Tensor): weights (probabilities) to draw samples, with shape = (total number of words in dictionary, ).
with_replacement (bool, optional): boolean flag to control the replacement of sampling. Defaults to True.
Returns:
int: Length of field.
"""
self.with_replacement = with_replacement
if isinstance(weights, torch.Tensor):
self.weights = weights
else:
self.weights = torch.Tensor(weights)
self.dict_size = len(self.weights)
return len(v["weights"])

def generate(self, size: int) -> torch.Tensor:
r"""Return drawn samples.
def _generate(self,
weights : torch.Tensor,
with_replacement : bool,
size : int) -> torch.Tensor:
"""A function to generate negative samples with multinomial distribution.
Args:
size (int): Number of negative samples to be drawn
weights (torch.Tensor): the input tensor containing probabilities
with_replacement (bool): whether to draw with replacement or not
size (int): number of samples to draw
Returns:
torch.Tensor, shape = (size, 1), dtype = torch.long: Drawn negative samples
T, shape = (N * Nneg, 1), dtype = torch.long: Tensor of negative samples generated by multinomial distribution.
"""
samples = torch.multinomial(self.weights, size, replacement=self.with_replacement)
return samples.long()
Empty file.
77 changes: 36 additions & 41 deletions torecsys/data/negsampling/uniform_sampler.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,62 @@
from . import _NegativeSampler
import torch

from typing import Dict

class UniformSamplerWithReplacement(_NegativeSampler):
r"""UniformSamplerWithReplacement is to generate negative samplers by uniform distribution with replacement, i.e. draw samples uniformlly with replacement
"""
def __init__(self,
low : int,
high : int):
r"""Initialize a Negative sampler which draw samples with uniform distribution with replacement
@staticmethod
def _getlen(v: Dict[str, int]) -> int:
r"""Get length of field.
Args:
low (int): minimum value (i.e. lower bound) of sampling id.
high (int): maximum value (i.e. upper bound) of sampling id.
Returns:
int: Length of field.
"""
self.low = low
self.high = high
self.dict_size = self.high - self.low

def generate(self, size: int) -> torch.Tensor:
r"""Return drawn samples.
return v["high"] - v["low"]

@staticmethod
def _generate(low : int,
high : int,
size : int) -> torch.Tensor:
r"""A function to generate negative samples with uniform distribution with replacement.
Args:
size (int): Number of negative samples to be drawn
low (int): Lowest integer to be drawn from the distribution.
high (int): One above the highest integer to be drawn from the distribution.
size (int): An integer defining the shape of the output tensor.
Returns:
torch.Tensor, shape = (size, 1), dtype = torch.long: Drawn negative samples
T, shape = (N * Nneg, 1), dtype = torch.long: Tensor of negative samples generated by uniform distribution.
"""
return torch.randint(low=self.low, high=self.high, size=(size, )).long()
return torch.randint(low=low, high=high, size=(size, 1)).long()


class UniformSamplerWithoutReplacement(_NegativeSampler):
r"""UniformSamplerWithReplacement is to generate negative samplers by uniform distribution without replacement, i.e. draw samples uniformlly without replacement
"""
def __init__(self,
low : int,
high : int):
r"""Initialize a Negative sampler which draw samples with uniform distribution without replacement
@staticmethod
def _getlen(v: Dict[str, int]) -> int:
r"""Get length of field.
Returns:
int: Length of field.
"""
return v["high"] - v["low"]

@staticmethod
def _generate(low : int,
high : int,
size : int) -> torch.Tensor:
"""A function to generate negative samples with uniform distribution without replacement.
Args:
low (int): minimum value (i.e. lower bound) of sampling id.
high (int): maximum value (i.e. upper bound) of sampling id.
"""
self.low = low
self.high = high
self.dict_size = self.high - self.low

def generate(self, size: int) -> torch.Tensor:
r"""Generate negative samples by the sampler
Args:
size (int): Number of negative samples to be drawn
Raises:
ValueError: if input size is larger than the size of dictionary (i.e. high - low)
size (int): An integer of defining the shape of the output tensor.
Returns:
torch.Tensor, shape = (size, 1), dtype = torch.long: Drawn negative samples
T, shape = (N * Nneg, 1), dtype = torch.long: Tensor of negative samples generated by uniform distribution.
"""

if size >= (self.high - self.low):
raise ValueError("input size cannot be larger than size of samples.")

samples = torch.randperm(n=self.high) + self.low
samples = torch.randperm(n=high) + low
samples = samples[:size]
return samples.long()
return samples.view(-1, 1)
6 changes: 5 additions & 1 deletion torecsys/data/sampledata/download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
import os
from pathlib import Path
import requests
from tqdm.autonotebook import tqdm
import warnings
import zipfile

# ignore import warnings of the below packages
with warnings.catch_warnings():
warnings.simplefilter("ignore")
from tqdm.autonotebook import tqdm

def download_ml_data(size : str,
dir : str = None):
Expand Down

0 comments on commit dd08963

Please sign in to comment.