Merge bd8a027 into 229d5dd

kiudee · Nov 25, 2019 · c0f4c0e · c0f4c0e
2 parents 229d5dd + bd8a027
commit c0f4c0e
Show file tree

Hide file tree

Showing 114 changed files with 5,972 additions and 2,362 deletions.
diff --git a/csrank/callbacks.py b/csrank/callbacks.py
@@ -43,9 +43,10 @@ def on_epoch_end(self, epoch, logs=None):
         self.best_weights = self.model.get_weights()
         if current is None:
             warnings.warn(
-                'Early stopping conditioned on metric `%s` '
-                'which is not available. Available metrics are: %s' %
-                (self.monitor, ','.join(list(logs.keys()))), RuntimeWarning
+                "Early stopping conditioned on metric `%s` "
+                "which is not available. Available metrics are: %s"
+                % (self.monitor, ",".join(list(logs.keys()))),
+                RuntimeWarning,
             )
             return
         if self.monitor_op(current - self.min_delta, self.best):
@@ -60,16 +61,20 @@ def on_epoch_end(self, epoch, logs=None):
 
     def on_train_end(self, logs=None):
         if self.stopped_epoch > 0:
-            self.logger.info("Setting best weights for final epoch {}".format(self.epoch))
+            self.logger.info(
+                "Setting best weights for final epoch {}".format(self.epoch)
+            )
             self.model.set_weights(self.best_weights)
 
     def set_tunable_parameters(self, patience=300, min_delta=2, **point):
         self.patience = patience
         self.min_delta = min_delta
         if len(point) > 0:
-            self.logger.warning('This callback does not support'
-                                ' tunable parameters'
-                                ' called: {}'.format(print_dictionary(point)))
+            self.logger.warning(
+                "This callback does not support"
+                " tunable parameters"
+                " called: {}".format(print_dictionary(point))
+            )
 
 
 class weightHistory(Callback):
@@ -79,12 +84,13 @@ def on_train_begin(self, logs={}):
         self.hidden_units_used = []
 
     def on_batch_end(self, batch, logs={}):
-        hidden = [layer for layer in self.model.layers
-                  if layer.name == 'hidden_1']
+        hidden = [layer for layer in self.model.layers if layer.name == "hidden_1"]
 
         y = np.array(hidden[0].get_weights()[0])
         close = np.isclose(y, 0, atol=1e-3)
-        self.hidden_units_used.append(len(np.unique(np.where(np.logical_not(close))[1])))
+        self.hidden_units_used.append(
+            len(np.unique(np.where(np.logical_not(close))[1]))
+        )
         self.norm.append(np.abs(y).sum())
         self.zero_weights.append(close.sum())
 
@@ -113,25 +119,26 @@ def set_tunable_parameters(self, epochs_drop=300, drop=0.1, **point):
         self.epochs_drop = epochs_drop
         self.drop = drop
         if len(point) > 0:
-            self.logger.warning('This callback does not support'
-                                ' tunable parameters'
-                                ' called: {}'.format(print_dictionary(point)))
+            self.logger.warning(
+                "This callback does not support"
+                " tunable parameters"
+                " called: {}".format(print_dictionary(point))
+            )
 
 
 class DebugOutput(Callback):
-
     def __init__(self, delta=100, **kwargs):
         super(DebugOutput, self).__init__(**kwargs)
         self.delta = delta
 
     def on_train_end(self, logs=None):
-        self.logger.debug('Total number of epochs: {}'.format(self.epoch))
+        self.logger.debug("Total number of epochs: {}".format(self.epoch))
 
     def on_train_begin(self, logs=None):
         self.epoch = 0
-        self.logger = logging.getLogger('DebugOutput')
+        self.logger = logging.getLogger("DebugOutput")
 
     def on_epoch_end(self, epoch, logs=None):
         self.epoch += 1
         if self.epoch % self.delta == 0:
-            self.logger.debug('Epoch {} of the training finished.'.format(self.epoch))
+            self.logger.debug("Epoch {} of the training finished.".format(self.epoch))
diff --git a/csrank/choicefunction/baseline.py b/csrank/choicefunction/baseline.py
@@ -47,7 +47,9 @@ def predict_scores(self, X, Y, **kwargs):
         if isinstance(X, dict):
             scores = dict()
             for ranking_size, x in X.items():
-                scores[ranking_size] = self._predict_scores_fixed(x, Y[ranking_size], **kwargs)
+                scores[ranking_size] = self._predict_scores_fixed(
+                    x, Y[ranking_size], **kwargs
+                )
 
         else:
             scores = self._predict_scores_fixed(X, **kwargs)

diff --git a/csrank/choicefunction/choice_functions.py b/csrank/choicefunction/choice_functions.py
@@ -6,11 +6,10 @@
 from csrank.metrics_np import f1_measure
 from csrank.util import progress_bar
 
-__all__ = ['ChoiceFunctions']
+__all__ = ["ChoiceFunctions"]
 
 
 class ChoiceFunctions(metaclass=ABCMeta):
-
     @property
     def learning_problem(self):
         return CHOICE_FUNCTION
@@ -60,9 +59,11 @@ def _tune_threshold(self, X_val, Y_val, thin_thresholds=1, verbose=0):
                     threshold = p
                     best = f1
                 if verbose == 1:
-                    progress_bar(i, len(probabilities), status='Tuning threshold')
+                    progress_bar(i, len(probabilities), status="Tuning threshold")
         except KeyboardInterrupt:
             self.logger.info("Keyboard interrupted")
-        self.logger.info('Tuned threshold, obtained {:.2f} which achieved'
-                         ' a micro F1-measure of {:.2f}'.format(threshold, best))
+        self.logger.info(
+            "Tuned threshold, obtained {:.2f} which achieved"
+            " a micro F1-measure of {:.2f}".format(threshold, best)
+        )
         return threshold
diff --git a/csrank/choicefunction/cmpnet_choice.py b/csrank/choicefunction/cmpnet_choice.py
@@ -10,10 +10,22 @@
 
 
 class CmpNetChoiceFunction(CmpNetCore, ChoiceFunctions):
-    def __init__(self, n_object_features, n_hidden=2, n_units=8, loss_function='binary_crossentropy',
-                 batch_normalization=True, kernel_regularizer=l2(l=1e-4), kernel_initializer='lecun_normal',
-                 activation='relu', optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9), metrics=['binary_accuracy'],
-                 batch_size=256, random_state=None, **kwargs):
+    def __init__(
+        self,
+        n_object_features,
+        n_hidden=2,
+        n_units=8,
+        loss_function="binary_crossentropy",
+        batch_normalization=True,
+        kernel_regularizer=l2(l=1e-4),
+        kernel_initializer="lecun_normal",
+        activation="relu",
+        optimizer=SGD(lr=1e-4, nesterov=True, momentum=0.9),
+        metrics=["binary_accuracy"],
+        batch_size=256,
+        random_state=None,
+        **kwargs
+    ):
         """
             Create an instance of the :class:`CmpNetCore` architecture for learning a choice function.
             CmpNet breaks the preferences in form of rankings into pairwise comparisons and learns a pairwise model for
@@ -67,32 +79,58 @@ def __init__(self, n_object_features, n_hidden=2, n_units=8, loss_function='bina
                 [1] Leonardo Rigutini, Tiziano Papini, Marco Maggini, and Franco Scarselli. 2011. SortNet: Learning to Rank by a Neural Preference Function. IEEE Trans. Neural Networks 22, 9 (2011), 1368–1380. https://doi.org/10.1109/TNN.2011.2160875
 
         """
-        super().__init__(n_object_features=n_object_features, n_hidden=n_hidden, n_units=n_units,
-                         loss_function=loss_function, batch_normalization=batch_normalization,
-                         kernel_regularizer=kernel_regularizer, kernel_initializer=kernel_initializer,
-                         activation=activation, optimizer=optimizer, metrics=metrics, batch_size=batch_size,
-                         random_state=random_state, **kwargs)
+        super().__init__(
+            n_object_features=n_object_features,
+            n_hidden=n_hidden,
+            n_units=n_units,
+            loss_function=loss_function,
+            batch_normalization=batch_normalization,
+            kernel_regularizer=kernel_regularizer,
+            kernel_initializer=kernel_initializer,
+            activation=activation,
+            optimizer=optimizer,
+            metrics=metrics,
+            batch_size=batch_size,
+            random_state=random_state,
+            **kwargs
+        )
         self.logger = logging.getLogger(CmpNetChoiceFunction.__name__)
-        self.logger.info("Initializing network with object features {}".format(self.n_object_features))
+        self.logger.info(
+            "Initializing network with object features {}".format(
+                self.n_object_features
+            )
+        )
         self.threshold = 0.5
 
     def _convert_instances_(self, X, Y):
-        self.logger.debug('Creating the Dataset')
+        self.logger.debug("Creating the Dataset")
         x1, x2, garbage, y_double, garbage = generate_complete_pairwise_dataset(X, Y)
         del garbage
         if x1.shape[0] > self.threshold_instances:
-            indices = self.random_state.choice(x1.shape[0], self.threshold_instances, replace=False)
+            indices = self.random_state.choice(
+                x1.shape[0], self.threshold_instances, replace=False
+            )
             x1 = x1[indices, :]
             x2 = x2[indices, :]
             y_double = y_double[indices, :]
-        self.logger.debug('Finished the Dataset instances {}'.format(x1.shape[0]))
+        self.logger.debug("Finished the Dataset instances {}".format(x1.shape[0]))
         return x1, x2, y_double
 
     def construct_model(self):
         return super().construct_model()
 
-    def fit(self, X, Y, epochs=10, callbacks=None, validation_split=0.1, tune_size=0.1, thin_thresholds=1, verbose=0,
-            **kwd):
+    def fit(
+        self,
+        X,
+        Y,
+        epochs=10,
+        callbacks=None,
+        validation_split=0.1,
+        tune_size=0.1,
+        thin_thresholds=1,
+        verbose=0,
+        **kwd
+    ):
         """
             Fit a CmptNet model for learning a choice fucntion on the provided set of queries X and preferences Y of
             those objects. The provided queries and corresponding preferences are of a fixed size (numpy arrays). For
@@ -130,15 +168,25 @@ def fit(self, X, Y, epochs=10, callbacks=None, validation_split=0.1, tune_size=0
                 Keyword arguments for the fit function
         """
         if tune_size > 0:
-            X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=tune_size, random_state=self.random_state)
+            X_train, X_val, Y_train, Y_val = train_test_split(
+                X, Y, test_size=tune_size, random_state=self.random_state
+            )
             try:
-                super().fit(X_train, Y_train, epochs, callbacks,
-                            validation_split, verbose, **kwd)
+                super().fit(
+                    X_train,
+                    Y_train,
+                    epochs,
+                    callbacks,
+                    validation_split,
+                    verbose,
+                    **kwd
+                )
             finally:
-                self.logger.info('Fitting utility function finished. Start tuning threshold.')
+                self.logger.info(
+                    "Fitting utility function finished. Start tuning threshold."
+                )
         else:
-            super().fit(X, Y, epochs, callbacks, validation_split, verbose,
-                        **kwd)
+            super().fit(X, Y, epochs, callbacks, validation_split, verbose, **kwd)
             self.threshold = 0.5
 
     def _predict_scores_fixed(self, X, **kwargs):
@@ -156,7 +204,20 @@ def predict(self, X, **kwargs):
     def clear_memory(self, **kwargs):
         super().clear_memory(**kwargs)
 
-    def set_tunable_parameters(self, n_hidden=32, n_units=2, reg_strength=1e-4, learning_rate=1e-3, batch_size=128,
-                               **point):
-        super().set_tunable_parameters(n_hidden=n_hidden, n_units=n_units, reg_strength=reg_strength,
-                                       learning_rate=learning_rate, batch_size=batch_size, **point)
+    def set_tunable_parameters(
+        self,
+        n_hidden=32,
+        n_units=2,
+        reg_strength=1e-4,
+        learning_rate=1e-3,
+        batch_size=128,
+        **point
+    ):
+        super().set_tunable_parameters(
+            n_hidden=n_hidden,
+            n_units=n_units,
+            reg_strength=reg_strength,
+            learning_rate=learning_rate,
+            batch_size=batch_size,
+            **point
+        )