Merge 9ac95f3 into c1d08fe

kiudee · Jun 4, 2019 · 4f7edf7 · 4f7edf7
2 parents c1d08fe + 9ac95f3
commit 4f7edf7
Show file tree

Hide file tree

Showing 28 changed files with 1,376 additions and 273 deletions.
diff --git a/csrank/choicefunctions/choice_functions.py b/csrank/choicefunctions/choice_functions.py
@@ -16,22 +16,25 @@ def learning_problem(self):
         return CHOICE_FUNCTION
 
     def predict_for_scores(self, scores, **kwargs):
-        """ Predict choices for scores for a given collection of sets of objects.
+        """
+            Binary choice vector :math:`y` represents the choices amongst the objects in :math:`Q`, such that
+            :math:`y(k) = 1` represents that the object :math:`x_k` is chosen and :math:`y(k) = 0` represents it is not
+            chosen. Predict choices for the scores for a given collection of sets of objects (query sets).
 
-        Parameters
-        ----------
-        scores : dict or numpy array
-            Dictionary with a mapping from ranking size to numpy arrays
-            or a single numpy array of size containing scores of each object of size:
-            (n_instances, n_objects)
+            Parameters
+            ----------
+            scores : dict or numpy array
+                Dictionary with a mapping from query set size to numpy arrays
+                or a single numpy array of size containing scores of each object of size:
+                (n_instances, n_objects)
 
 
-        Returns
-        -------
-        Y : dict or numpy array
-            Dictionary with a mapping from ranking size to numpy arrays
-            or a single numpy array containing predicted ranking of size:
-            (n_instances, n_objects)
+            Returns
+            -------
+            Y : dict or numpy array
+                Dictionary with a mapping from query set size to numpy arrays
+                or a single numpy array containing predicted choice vectors of size:
+                (n_instances, n_objects)
         """
 
         if isinstance(scores, dict):

diff --git a/csrank/choicefunctions/cmpnet_choice.py b/csrank/choicefunctions/cmpnet_choice.py
@@ -89,8 +89,44 @@ def _convert_instances(self, X, Y):
     def construct_model(self):
         return super().construct_model()
 
-    def fit(self, X, Y, epochs=10, callbacks=None, validation_split=0.1, tune_size=0.1,
-            thin_thresholds=1, verbose=0, **kwd):
+    def fit(self, X, Y, epochs=10, callbacks=None, validation_split=0.1, tune_size=0.1, thin_thresholds=1, verbose=0,
+            **kwd):
+        """
+            Fit a CmptNet model for learning a choice fucntion on the provided set of queries X and preferences Y of
+            those objects. The provided queries and corresponding preferences are of a fixed size (numpy arrays). For
+            learning this network the binary cross entropy loss function for a pair of objects :math:`x_i, x_j \in Q`
+            is defined as:
+
+            .. math::
+
+                C_{ij} =  -\\tilde{P_{ij}}(0)\\cdot \log(U(x_i,x_j)) - \\tilde{P_{ij}}(1) \\cdot \log(U(x_j,x_i)) \ ,
+
+            where :math:`\\tilde{P_{ij}}` is ground truth probability of the preference of :math:`x_i` over :math:`x_j`.
+            :math:`\\tilde{P_{ij}} = (1,0)` if :math:`x_i \succ x_j` else :math:`\\tilde{P_{ij}} = (0,1)`.
+
+            Parameters
+            ----------
+            X : numpy array
+                (n_instances, n_objects, n_features)
+                Feature vectors of the objects
+            Y : numpy array
+                (n_instances, n_objects)
+                Preferences in form of Orderings or Choices for given n_objects
+            epochs : int
+                Number of epochs to run if training for a fixed query size
+            callbacks : list
+                List of callbacks to be called during optimization
+            validation_split : float (range : [0,1])
+                Percentage of instances to split off to validate on
+            tune_size: float (range : [0,1])
+                Percentage of instances to split off to tune the threshold for the choice function
+            thin_thresholds: int
+                The number of instances of scores to skip while tuning the threshold
+            verbose : bool
+                Print verbose information
+            **kwd :
+                Keyword arguments for the fit function
+        """
         if tune_size > 0:
             X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=tune_size, random_state=self.random_state)
             try:

diff --git a/csrank/choicefunctions/fate_choice.py b/csrank/choicefunctions/fate_choice.py
@@ -17,24 +17,23 @@ def __init__(self, n_object_features, n_hidden_set_layers=2, n_hidden_set_units=
                  optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9), batch_size=256, metrics=None, random_state=None,
                  **kwargs):
         """
-            Create a FATE-network architecture for leaning discrete choice function. Training complexity is quadratic in
-            the number of objects and prediction complexity is only linear. The first-aggregate-then-evaluate approach
-            learns an embedding of each object and then aggregates that into a context representation
-            :math:`\\mu_{C(x)}`, where :math`C(x) = Q \setminus \{x\}` and then scores each object :math:`x` using a
-            generalized utility function :math:`U (x, \\mu_{C(x)})`.
+            Create a FATE-network architecture for leaning discrete choice function. The first-aggregate-then-evaluate
+            approach learns an embedding of each object and then aggregates that into a context representation
+            :math:`\\mu_{C(x)}` and then scores each object :math:`x` using a generalized utility function
+            :math:`U (x, \\mu_{C(x)})`.
+            To make it computationally efficient we take the the context :math:`C(x)` as query set :math:`Q`.
             The context-representation is evaluated as:
 
             .. math::
                 \\mu_{C(x)} = \\frac{1}{\\lvert C(x) \\lvert} \\sum_{y \\in C(x)} \\phi(y)
 
             where :math:`\phi \colon \mathcal{X} \\to \mathcal{Z}` maps each object :math:`y` to an
             :math:`m`-dimensional embedding space :math:`\mathcal{Z} \subseteq \mathbb{R}^m`.
-            To make it computationally efficient we take the the context as query set :math:`Q`.
             The choice set is defined as:
 
             .. math::
 
-                c(Q) = \{ x_i \in Q \lvert \, U (x, \\mu_{C(x)}) > t \}
+                c(Q) = \{ x \in Q \lvert \, U (x, \\mu_{C(x)}) > t \}
 
 
             Parameters
@@ -103,7 +102,51 @@ def _construct_layers(self, **kwargs):
     def construct_model(self, n_features, n_objects):
         return super().construct_model(n_features, n_objects)
 
-    def fit(self, X, Y, tune_size=0.1, thin_thresholds=1, **kwargs):
+    def fit(self, X, Y, epochs=35, inner_epochs=1, callbacks=None, validation_split=0.1, verbose=0, global_lr=1.0,
+            global_momentum=0.9, min_bucket_size=500, refit=False, tune_size=0.1, thin_thresholds=1, **kwargs):
+        """
+            Fit a generic FATE-network model for learning a choice function on a provided set of queries.
+
+            The provided queries can be of a fixed size (numpy arrays) or of varying sizes in which case dictionaries
+            are expected as input. For varying sizes a meta gradient descent is performed across the
+            different query sizes.
+
+            Parameters
+            ----------
+            X : numpy array or dict
+                Feature vectors of the objects
+                (n_instances, n_objects, n_features) if numpy array or map from n_objects to numpy arrays
+            Y : numpy array or dict
+                Choices for given objects in the query
+                (n_instances, n_objects) if numpy array or map from n_objects to numpy arrays
+            epochs : int
+                Number of epochs to run if training for a fixed query size or
+                number of epochs of the meta gradient descent for the variadic model
+            inner_epochs : int
+                Number of epochs to train for each query size inside the variadic
+                model
+            callbacks : list
+                List of callbacks to be called during optimization
+            validation_split : float (range : [0,1])
+                Percentage of instances to split off to validate on
+            verbose : bool
+                Print verbose information
+            global_lr : float
+                Learning rate of the meta gradient descent (variadic model only)
+            global_momentum : float
+                Momentum for the meta gradient descent (variadic model only)
+            min_bucket_size : int
+                Restrict the training to queries of a minimum size
+            refit : bool
+                If True, create a new model object, otherwise continue fitting the
+                existing one if one exists.
+            tune_size: float (range : [0,1])
+                Percentage of instances to split off to tune the threshold for the choice function
+            thin_thresholds: int
+                The number of instances of scores to skip while tuning the threshold
+            **kwargs :
+                Keyword arguments for the fit function
+        """
         if tune_size > 0:
             X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=tune_size, random_state=self.random_state)
             try:

diff --git a/csrank/choicefunctions/feta_choice.py b/csrank/choicefunctions/feta_choice.py
@@ -22,13 +22,26 @@ def __init__(self, n_objects, n_object_features, n_hidden=2, n_units=8, add_zero
                  metrics=['binary_accuracy'], batch_size=256, random_state=None,
                  **kwargs):
         """
-            Create a FETA-network architecture for learning the choice functions.
+            Create a FETA-network architecture for learning choice functions.
+            The first-evaluate-then-aggregate approach approximates the context-dependent utility function using the
+            first-order utility function :math:`U_1 \colon \mathcal{X} \\times \mathcal{X} \\rightarrow [0,1]`
+            and zeroth-order utility function  :math:`U_0 \colon \mathcal{X} \\rightarrow [0,1]`.
+            The scores each object :math:`x` using a context-dependent utility function :math:`U (x, C_i)`:
+
+            .. math::
+                 U(x_i, C_i) = U_0(x_i) + \\frac{1}{n-1} \sum_{x_j \in Q \\setminus \{x_i\}} U_1(x_i , x_j) \, .
+
             Training and prediction complexity is quadratic in the number of objects.
+            The choice set is defined as:
+
+            .. math::
+
+                c(Q) = \{ x_i \in Q \lvert \, U (x_i, C_i) > t \}
 
             Parameters
             ----------
             n_objects : int
-                Number of objects to be ranked
+                Number of objects in each query set
             n_object_features : int
                 Dimensionality of the feature space of each object
             n_hidden : int
@@ -165,6 +178,31 @@ def create_input_lambda(i):
 
     def fit(self, X, Y, epochs=10, callbacks=None, validation_split=0.1, tune_size=0.1, thin_thresholds=1, verbose=0,
             **kwd):
+        """
+            Fit a FETA-Network for learning a choice function on the provided set of queries X and preferences Y of
+            those objects. The provided queries and corresponding preferences are of a fixed size (numpy arrays).
+
+            Parameters
+            ----------
+            X : numpy array (n_instances, n_objects, n_features)
+                Feature vectors of the objects
+            Y : numpy array (n_instances, n_objects)
+                Choices for given objects in the query
+            epochs : int
+                Number of epochs to run if training for a fixed query size
+            callbacks : list
+                List of callbacks to be called during optimization
+            validation_split : float (range : [0,1])
+                Percentage of instances to split off to validate on
+            verbose : bool
+                Print verbose information
+            tune_size: float (range : [0,1])
+                Percentage of instances to split off to tune the threshold for the choice function
+            thin_thresholds: int
+                The number of instances of scores to skip while tuning the threshold
+            **kwd :
+                Keyword arguments for the fit function
+        """
         if tune_size > 0:
             X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=tune_size, random_state=self.random_state)
             try: