Improved documentation for the base classes

kiudee · Mar 19, 2019 · 4ae6e35 · 4ae6e35
1 parent cc34269
commit 4ae6e35
Show file tree

Hide file tree

Showing 21 changed files with 217 additions and 37 deletions.
diff --git a/csrank/__init__.py b/csrank/__init__.py
@@ -1,6 +1,6 @@
 from .choicefunctions import *
+from .core import *
 from .dataset_reader import *
 from .discretechoice import *
 from .objectranking import *
-from .core import *
 from .tuning import ParameterOptimizer
diff --git a/csrank/choicefunctions/__init__.py b/csrank/choicefunctions/__init__.py
@@ -1,7 +1,7 @@
+from .baseline import AllPositive
 from .cmpnet_choice import CmpNetChoiceFunction
 from .fate_choice import FATEChoiceFunction
 from .feta_choice import FETAChoiceFunction
 from .generalized_linear_model import GeneralizedLinearModel
 from .pairwise_choice import PairwiseSVMChoiceFunction
 from .ranknet_choice import RankNetChoiceFunction
-from .baseline import AllPositive
diff --git a/csrank/choicefunctions/baseline.py b/csrank/choicefunctions/baseline.py
@@ -1,4 +1,5 @@
 import logging
+
 import numpy as np
 
 from csrank.learner import Learner
@@ -7,6 +8,12 @@
 
 class AllPositive(ChoiceFunctions, Learner):
     def __init__(self, **kwargs):
+        """
+            Baseline assigns the average number of chosen objects in the given choice sets and chooses all the objects.
+
+            :param kwargs: Keyword arguments for the algorithms
+        """
+
         self.logger = logging.getLogger(AllPositive.__name__)
 
     def fit(self, X, Y, **kwd):

diff --git a/csrank/choicefunctions/cmpnet_choice.py b/csrank/choicefunctions/cmpnet_choice.py
@@ -23,11 +23,12 @@ def __init__(self, n_object_features, n_hidden=2, n_units=8, loss_function='bina
            them.
            The outputs of the network for each pair of objects :math:`U(x_1,x_2), U(x_2,x_1)` are evaluated.
            :math:`U(x_1,x_2)` is a measure of how favorable it is for :math:`x_1` than :math:`x_2`.
-           Ranking for the given set of objects :math:`Q = \{ x_1 , \ldots , x_n \}`  is evaluted as follows:
+           Ranking for the given set of objects :math:`Q = \{ x_1 , \ldots , x_n \}`  is evaluated as follows:
 
            .. math::
-
-              ρ(Q) = \operatorname{argsort}_{i \in [n]}  \; \left\{ \\frac{1}{n-1} \sum_{j \in [n] \setminus \{i\}} U_1(x_i , x_j)\\right\}
+           
+                U(x_i) = \left\{ \\frac{1}{n-1} \sum_{j \in [n] \setminus \{i\}} U_1(x_i , x_j)\\right\} \\\\
+                c_{t}(Q) := \{x \in Q \mid U(x) > t\}
 
 
            Parameters

diff --git a/csrank/choicefunctions/ranknet_choice.py b/csrank/choicefunctions/ranknet_choice.py
@@ -66,7 +66,7 @@ def __init__(self, n_object_features, n_hidden=2, n_units=8, loss_function='bina
         self.logger.info("Initializing network with object features {}".format(self.n_object_features))
         self.threshold = 0.5
 
-    def convert_instances(self, X, Y):
+    def _convert_instances(self, X, Y):
         self.logger.debug('Creating the Dataset')
         x1, x2, garbage, garbage, y_single = generate_complete_pairwise_dataset(X, Y)
         del garbage

diff --git a/csrank/core/__init__.py b/csrank/core/__init__.py
@@ -1,5 +1,5 @@
 from .cmpnet_core import CmpNetCore
-from .ranknet_core import RankNetCore
-from .pairwise_svm import PairwiseSVM
+from .fate_network import FATENetwork, FATENetworkCore
 from .feta_network import FETANetwork
-from .fate_network import FATENetwork, FATENetworkCore
+from .pairwise_svm import PairwiseSVM
+from .ranknet_core import RankNetCore
diff --git a/csrank/core/ranknet_core.py b/csrank/core/ranknet_core.py
@@ -60,7 +60,7 @@ def _construct_layers(self, **kwargs):
 
     def fit(self, X, Y, epochs=10, callbacks=None, validation_split=0.1, verbose=0, **kwd):
 
-        X1, X2, Y_single = self.convert_instances(X, Y)
+        X1, X2, Y_single = self._convert_instances(X, Y)
 
         self.logger.debug("Instances created {}".format(X1.shape[0]))
         self.logger.debug('Creating the model')
@@ -90,7 +90,7 @@ def scoring_model(self):
             self._scoring_model = Model(inputs=[inp], outputs=output_score)
         return self._scoring_model
 
-    def convert_instances(self, X, Y):
+    def _convert_instances(self, X, Y):
         raise NotImplemented
 
     def construct_model(self):

diff --git a/csrank/dataset_reader/choicefunctions/__init__.py b/csrank/dataset_reader/choicefunctions/__init__.py
@@ -1,3 +1,3 @@
 from .choice_data_generator import ChoiceDatasetGenerator
+from .letor_ranking_choice_dataset import LetorRankingChoiceDatasetReader
 from .mnist_choice_dataset_reader import MNISTChoiceDatasetReader
-from .letor_ranking_choice_dataset import LetorRankingChoiceDatasetReader
diff --git a/csrank/discretechoice/cmpnet_discrete_choice.py b/csrank/discretechoice/cmpnet_discrete_choice.py
@@ -13,6 +13,58 @@ def __init__(self, n_object_features, n_hidden=2, n_units=8, loss_function='bina
                  batch_normalization=True, kernel_regularizer=l2(l=1e-4), kernel_initializer='lecun_normal',
                  activation='relu', optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9), metrics=['binary_accuracy'],
                  batch_size=256, random_state=None, **kwargs):
+        """
+               Create an instance of the CmpNet architecture.
+
+               CmpNet breaks the preferences in form of rankings into pairwise comparisons and learns a pairwise
+               model for the each pair of object in the underlying set.
+               For prediction list of objects is converted in pair of objects and the pairwise predicate is evaluated using
+               them.
+               The outputs of the network for each pair of objects :math:`U(x_1,x_2), U(x_2,x_1)` are evaluated.
+               :math:`U(x_1,x_2)` is a measure of how favorable it is for :math:`x_1` than :math:`x_2`.
+               Ranking for the given set of objects :math:`Q = \{ x_1 , \ldots , x_n \}`  is evaluated as follows:
+
+               .. math::
+
+                    U(x_i) = \left\{ \\frac{1}{n-1} \sum_{j \in [n] \setminus \{i\}} U_1(x_i , x_j)\\right\} \\\\
+                    dc_{t}(Q) := \{\operatorname{argmax}_{i \in [n]}  \; U(x_i)\}
+
+
+               Parameters
+               ----------
+               n_object_features : int
+                   Number of features of the object space
+               n_hidden : int
+                   Number of hidden layers used in the scoring network
+               n_units : int
+                   Number of hidden units in each layer of the scoring network
+               loss_function : function or string
+                   Loss function to be used for the binary decision task of the
+                   pairwise comparisons
+               batch_normalization : bool
+                   Whether to use batch normalization in each hidden layer
+               kernel_regularizer : function
+                   Regularizer function applied to all the hidden weight matrices.
+               activation : function or string
+                   Type of activation function to use in each hidden layer
+               optimizer : function or string
+                   Optimizer to use during stochastic gradient descent
+               metrics : list
+                   List of metrics to evaluate during training (can be
+                   non-differentiable)
+               batch_size : int
+                   Batch size to use during training
+               random_state : int, RandomState instance or None
+                   Seed of the pseudorandom generator or a RandomState instance
+               **kwargs
+                   Keyword arguments for the algorithms
+
+               References
+               ----------
+               .. [1] Leonardo Rigutini, Tiziano Papini, Marco Maggini, and Franco Scarselli. 2011.
+                  SortNet: Learning to Rank by a Neural Preference Function.
+                  IEEE Trans. Neural Networks 22, 9 (2011), 1368–1380. https://doi.org/10.1109/TNN.2011.2160875
+        """
         super().__init__(n_object_features=n_object_features, n_hidden=n_hidden, n_units=n_units,
                          loss_function=loss_function, batch_normalization=batch_normalization,
                          kernel_regularizer=kernel_regularizer, kernel_initializer=kernel_initializer,

diff --git a/csrank/discretechoice/fate_discrete_choice.py b/csrank/discretechoice/fate_discrete_choice.py
@@ -4,8 +4,8 @@
 from keras.optimizers import SGD
 from keras.regularizers import l2
 
-from csrank.discretechoice.discrete_choice import DiscreteObjectChooser
 from csrank.core.fate_network import FATENetwork
+from csrank.discretechoice.discrete_choice import DiscreteObjectChooser
 
 
 class FATEDiscreteChoiceFunction(FATENetwork, DiscreteObjectChooser):
@@ -14,6 +14,41 @@ def __init__(self, n_object_features, n_hidden_set_layers=2, n_hidden_set_units=
                  n_hidden_joint_units=32, activation='selu', kernel_initializer='lecun_normal',
                  kernel_regularizer=l2(l=0.01), optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9), batch_size=256,
                  random_state=None, **kwargs):
+        """
+            Create a FATE-network architecture for learning the discrete choice functions.
+            Training complexity is quadratic in the number of objects and prediction complexity is only linear.
+
+            Parameters
+            ----------
+            n_object_features : int
+                Dimensionality of the feature space of each object
+            n_hidden_set_layers : int
+                Number of set layers.
+            n_hidden_set_units : int
+                Number of hidden set units.
+            n_hidden_joint_layers : int
+                Number of joint layers.
+            n_hidden_joint_units : int
+                Number of joint units.
+            activation : string or function
+                Activation function to use in the hidden units
+            kernel_initializer : function or string
+                Initialization function for the weights of each hidden layer
+            kernel_regularizer : function or string
+                Regularizer to use in the hidden units
+            optimizer : string or function
+                Stochastic gradient optimizer
+            batch_size : int
+                Batch size to use for training
+            loss_function : function
+                Differentiable loss function for the score vector
+            metrics : list
+                List of evaluation metrics (can be non-differentiable)
+            random_state : int or object
+                Numpy random state
+            **kwargs
+                Keyword arguments for the @FATENetwork
+        """
         self.loss_function = loss_function
         self.metrics = metrics
         super().__init__(n_object_features=n_object_features, n_hidden_set_layers=n_hidden_set_layers,

diff --git a/csrank/discretechoice/feta_discrete_choice.py b/csrank/discretechoice/feta_discrete_choice.py
@@ -17,8 +17,48 @@ def __init__(self, n_objects, n_object_features, n_hidden=2, n_units=8, add_zero
                  max_number_of_objects=10, num_subsample=5, loss_function='categorical_hinge',
                  batch_normalization=False, kernel_regularizer=l2(l=1e-4), kernel_initializer='lecun_normal',
                  activation='selu', optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9),
-                 metrics=['categorical_accuracy'], batch_size=256, random_state=None,
-                 **kwargs):
+                 metrics=['categorical_accuracy'], batch_size=256, random_state=None, **kwargs):
+        """
+            Create a FETA-network architecture for learning the discrete choice functions.
+            Training and prediction complexity is quadratic in the number of objects.
+
+            Parameters
+            ----------
+            n_objects : int
+                Number of objects to be ranked
+            n_object_features : int
+                Dimensionality of the feature space of each object
+            n_hidden : int
+                Number of hidden layers
+            n_units : int
+                Number of hidden units in each layer
+            add_zeroth_order_model : bool
+                True if the model should include a latent utility function
+            max_number_of_objects : int
+                The maximum number of objects to train from
+            num_subsample : int
+                Number of objects to subsample to
+            loss_function : function
+                Differentiable loss function for the score vector
+            batch_normalization : bool
+                Whether to use batch normalization in the hidden layers
+            kernel_regularizer : function
+                Regularizer to use in the hidden units
+            kernel_initializer : function or string
+                Initialization function for the weights of each hidden layer
+            activation : string or function
+                Activation function to use in the hidden units
+            optimizer : string or function
+                Stochastic gradient optimizer
+            metrics : list
+                List of evaluation metrics (can be non-differentiable)
+            batch_size : int
+                Batch size to use for training
+            random_state : int or object
+                Numpy random state
+            **kwargs
+                Keyword arguments for the hidden units
+        """
         super().__init__(n_objects=n_objects, n_object_features=n_object_features, n_hidden=n_hidden, n_units=n_units,
                          add_zeroth_order_model=add_zeroth_order_model, max_number_of_objects=max_number_of_objects,
                          num_subsample=num_subsample, loss_function=loss_function,

diff --git a/csrank/discretechoice/pairwise_discrete_choice.py b/csrank/discretechoice/pairwise_discrete_choice.py
@@ -1,14 +1,14 @@
 import logging
 
 from csrank.choicefunctions.util import generate_complete_pairwise_dataset
-from csrank.discretechoice.discrete_choice import DiscreteObjectChooser
 from csrank.core.pairwise_svm import PairwiseSVM
+from csrank.discretechoice.discrete_choice import DiscreteObjectChooser
 
 
 class PairwiseSVMDiscreteChoiceFunction(PairwiseSVM, DiscreteObjectChooser):
     def __init__(self, n_object_features, C=1.0, tol=1e-4, normalize=True,
                  fit_intercept=True, random_state=None, **kwargs):
-        """ Create an instance of the Pairwise Discrete choice model.
+        """ Create an instance of the Pairwise discrete choice model.
 
         Parameters
         ----------
@@ -19,19 +19,21 @@ def __init__(self, n_object_features, C=1.0, tol=1e-4, normalize=True,
         tol : float, optional
             Optimization tolerance
         normalize : bool, optional
-            If True, the data will be normalized before fitting.
+            If True, the data will be normalized before fitting
         fit_intercept : bool, optional
-            If True, the linear model will also fit an intercept.
+            If True, the linear model will also fit an intercept
         random_state : int, RandomState instance or None, optional
-            Seed of the pseudorandom generator or a RandomState instance
+            Seed of the pseudo-random generator or a RandomState instance
         **kwargs
             Keyword arguments for the algorithms
 
         References
         ----------
-        .. [1] Theodoros Evgeniou, Massimiliano Pontil, and Olivier Toubia. „A convex optimization approach to modeling consumer heterogeneity in conjoint estimation“.
+        .. [1] Theodoros Evgeniou, Massimiliano Pontil, and Olivier Toubia. „A convex optimization approach to modeling
+               consumer heterogeneity in conjoint estimation“.
                In: Marketing Science 26.6 (2007), pp. 805–818 (cit. on p. 18)
-           [2] Sebastián Maldonado, Ricardo Montoya, and Richard Weber. „Advanced conjoint analysis using feature selection via support vector machines“.
+           [2] Sebastián Maldonado, Ricardo Montoya, and Richard Weber. „Advanced conjoint analysis using feature
+               selection via support vector machines“.
                In: European Journal of Operational Research 241.2 (2015), pp. 564 –574 (cit. on pp. 19, 20).
         """
         super().__init__(n_object_features=n_object_features, C=C, tol=tol, normalize=normalize,

diff --git a/csrank/discretechoice/ranknet_discrete_choice.py b/csrank/discretechoice/ranknet_discrete_choice.py
@@ -13,6 +13,50 @@ def __init__(self, n_object_features, n_hidden=2, n_units=8, loss_function='bina
                  batch_normalization=True, kernel_regularizer=l2(l=1e-4), kernel_initializer='lecun_normal',
                  activation='relu', optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9), metrics=['binary_accuracy'],
                  batch_size=256, random_state=None, **kwargs):
+        """
+            Create an instance of the RankNet architecture.
+            RankNet breaks the preferences into pairwise comparisons using the discrete choice
+            and learns a latent utility model for the objects.
+
+            Parameters
+            ----------
+            n_object_features : int
+                Number of features of the object space
+            n_hidden : int
+                Number of hidden layers used in the scoring network
+            n_units : int
+                Number of hidden units in each layer of the scoring network
+            loss_function : function or string
+                Loss function to be used for the binary decision task of the pairwise comparisons
+            batch_normalization : bool
+                Whether to use batch normalization in each hidden layer
+            kernel_regularizer : function
+                Regularizer function applied to all the hidden weight matrices.
+            kernel_initializer : function or string
+                Initialization function for the weights of each hidden layer
+            activation : function or string
+                Type of activation function to use in each hidden layer
+            optimizer : function or string
+                Optimizer to use during stochastic gradient descent
+            metrics : list
+                List of metrics to evaluate during training (can be non-differentiable)
+            batch_size : int
+                Batch size to use during training
+            random_state : int, RandomState instance or None
+                Seed of the pseudo-random generator or a RandomState instance
+            **kwargs
+                Keyword arguments for the algorithms
+
+            References
+            ----------
+
+           .. [1] Burges, C. et al. (2005, August).
+                  "Learning to rank using gradient descent.",
+                  In Proceedings of the 22nd international conference on Machine learning (pp. 89-96). ACM.
+           .. [2] Burges, C. J. (2010).
+                  "From ranknet to lambdarank to lambdamart: An overview.",
+                  Learning, 11(23-581), 81.
+        """
         super().__init__(n_object_features=n_object_features, n_hidden=n_hidden, n_units=n_units,
                          loss_function=loss_function, batch_normalization=batch_normalization,
                          kernel_regularizer=kernel_regularizer, kernel_initializer=kernel_initializer,
@@ -21,7 +65,7 @@ def __init__(self, n_object_features, n_hidden=2, n_units=8, loss_function='bina
         self.logger = logging.getLogger(RankNetDiscreteChoiceFunction.__name__)
         self.logger.info("Initializing network with object features {}".format(self.n_object_features))
 
-    def convert_instances(self, X, Y):
+    def _convert_instances(self, X, Y):
         self.logger.debug('Creating the Dataset')
         x1, x2, garbage, garbage, y_single = generate_complete_pairwise_dataset(X, Y)
         del garbage

diff --git a/csrank/dyadranking/fate_dyad_ranker.py b/csrank/dyadranking/fate_dyad_ranker.py
@@ -1,5 +1,5 @@
-from csrank.dyadranking.contextual_ranking import ContextualRanker
 from csrank.core.fate_network import FATENetwork
+from csrank.dyadranking.contextual_ranking import ContextualRanker
 from csrank.numpy_util import scores_to_rankings