feat(backend): allow dynamic active active learning strategies. i.e. …

…start with strategy x and mode to strategy y
label-sleuth · Feb 20, 2023 · 3667040 · 3667040
1 parent 87894cc
commit 3667040
Show file tree

Hide file tree

Showing 7 changed files with 168 additions and 2 deletions.
diff --git a/label_sleuth/active_learning/core/active_learning_policies.py b/label_sleuth/active_learning/core/active_learning_policies.py
@@ -0,0 +1,13 @@
+from label_sleuth.active_learning.core.catalog import ActiveLearningCatalog
+from label_sleuth.active_learning.policy.static_active_learning_policy import StaticActiveLearningPolicy
+
+
+class ActiveLearningPolicies:
+    """
+    Active learning policies determine which type of active learning strategy is used. Policies can be static, i.e.
+    always return the same active learning strategy, or dynamic, e.g. a different strategy is returned depending on
+    the current iteration number.
+    """
+    STATIC_RETROSPECTIVE = StaticActiveLearningPolicy(ActiveLearningCatalog.RETROSPECTIVE)
+    STATIC_HARD_MINING = StaticActiveLearningPolicy(ActiveLearningCatalog.HARD_MINING)
+    STATIC_RANDOM = StaticActiveLearningPolicy(ActiveLearningCatalog.RANDOM)
diff --git a/label_sleuth/active_learning/policy/__init__.py b/label_sleuth/active_learning/policy/__init__.py
diff --git a/label_sleuth/active_learning/policy/active_learning_changing_policy.py b/label_sleuth/active_learning/policy/active_learning_changing_policy.py
@@ -0,0 +1,60 @@
+#
+#  Copyright (c) 2022 IBM Corp.
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+from typing import List
+
+import numpy as np
+
+from label_sleuth.active_learning.core.active_learning_api import ActiveLearningStrategy
+from label_sleuth.active_learning.policy.active_learning_policy import ActiveLearningPolicy
+
+
+class ActiveLearningChangingPolicy(ActiveLearningPolicy):
+    """
+    A dynamic active learning policy that follows a predefined pattern of switching between active learning strategies
+    after a certain number of iterations.
+    """
+
+    def __init__(self, active_learning_strategies: List[ActiveLearningStrategy],
+                 num_iterations_per_strategy: List[int]):
+        """
+        :param active_learning_strategies: a list of the N strategies to be used by the policy
+        :param num_iterations_per_strategy: a corresponding list specifying the number of iterations for each of the
+        first N-1 strategies to be used; the Nth strategy type will be used for all subsequent iterations.
+        For example, if there are 3 strategies, and num_iterations_per_strategy=[5, 2], then strategy A will be used for
+        iterations 0-4, strategy B for iterations 5-6, and strategy C from iteration 7 onwards.
+        """
+        if len(active_learning_strategies) != len(num_iterations_per_strategy) + 1:
+            raise Exception(
+                f"The number of strategies provided ({len(active_learning_strategies)}) does not match the provided "
+                f"list of {len(num_iterations_per_strategy)} active learning strategy switch points. "
+                f"For each active learning type, except the last one, the number of iterations for this active "
+                f"learning strategy to be used must be specified.")
+        self.active_learning_strategies = active_learning_strategies
+        self.num_iterations_per_strategy = num_iterations_per_strategy
+        self.switch_points = np.cumsum(num_iterations_per_strategy)
+
+    def get_active_learning_strategy(self, iteration_num: int) -> ActiveLearningStrategy:
+        for active_learning_strategy, switch_point in zip(self.active_learning_strategies, self.switch_points):
+            if iteration_num < switch_point:
+                return active_learning_strategy
+        return self.active_learning_strategies[-1]
+
+    def get_name(self):
+        name = ""
+        for active_learning_strategy, n_iter in zip(self.active_learning_strategies, self.num_iterations_per_strategy):
+            name += f'{active_learning_strategy.name}x{n_iter}-'
+        name += f"{self.active_learning_strategies[-1].name}"
+        return name
diff --git a/label_sleuth/active_learning/policy/active_learning_policy.py b/label_sleuth/active_learning/policy/active_learning_policy.py
@@ -0,0 +1,41 @@
+#
+#  Copyright (c) 2022 IBM Corp.
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import abc
+
+from label_sleuth.active_learning.core.active_learning_api import ActiveLearningStrategy
+
+
+class ActiveLearningPolicy(object, metaclass=abc.ABCMeta):
+    """
+    Base class for implementing an active learning policy, that determines which type of active learning strategy
+    will be used. Policies can be static, i.e. always return the same active learning strategy,
+    or dynamic, i.e. a different active learning strategy is returned
+    depending on the current iteration.
+    """
+
+    @abc.abstractmethod
+    def get_active_learning_strategy(self, iteration_num: int) -> ActiveLearningStrategy:
+        """
+        Given *iteration_num*, return the type of active learning strategy to be used
+        :param iteration_num:
+        :return: An instance of ActiveLearningStrategy
+        """
+
+    @abc.abstractmethod
+    def get_name(self) -> str:
+        """
+        :return: a name that describes the policy
+        """
diff --git a/label_sleuth/active_learning/policy/static_active_learning_policy.py b/label_sleuth/active_learning/policy/static_active_learning_policy.py
@@ -0,0 +1,35 @@
+#
+#  Copyright (c) 2022 IBM Corp.
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from label_sleuth.active_learning.core.active_learning_api import ActiveLearningStrategy
+from label_sleuth.active_learning.policy.active_learning_policy import ActiveLearningPolicy
+
+
+class StaticActiveLearningPolicy(ActiveLearningPolicy):
+    """
+    A simple policy that is initialized using a specific classification active learning strategy and always returns
+    this strategy.
+    """
+
+    def __init__(self, active_learning_strategy: ActiveLearningStrategy):
+        self.static_active_learning_strategy = active_learning_strategy
+
+    def get_active_learning_strategy(self, iteration_num: int) -> ActiveLearningStrategy:
+        """
+        Ignores *iteration_num* and returns the active learning strategy defined in the initialization
+        """
+        return self.static_active_learning_strategy
+
+    def get_name(self):
+        return f'Static-{self.static_active_learning_strategy.name}'
diff --git a/label_sleuth/config.py b/label_sleuth/config.py
@@ -22,7 +22,9 @@
 import dacite
 
 from label_sleuth.active_learning.core.active_learning_api import ActiveLearningStrategy
+from label_sleuth.active_learning.core.active_learning_policies import ActiveLearningPolicies
 from label_sleuth.active_learning.core.catalog import ActiveLearningCatalog
+from label_sleuth.active_learning.policy.active_learning_policy import ActiveLearningPolicy
 from label_sleuth.models.core.languages import Language, Languages
 from label_sleuth.models.core.model_policies import ModelPolicies
 from label_sleuth.models.policy.model_policy import ModelPolicy
@@ -35,11 +37,12 @@ class Configuration:
     changed_element_threshold: int
     model_policy: ModelPolicy
     training_set_selection_strategy: TrainingSetSelectionStrategy
-    active_learning_strategy: ActiveLearningStrategy
     precision_evaluation_size: int
     apply_labels_to_duplicate_texts: bool
     language: Language
     login_required: bool
+    active_learning_strategy: ActiveLearningStrategy = None
+    active_learning_policy: ActiveLearningPolicy = None
     main_panel_elements_per_page: int = 500
     sidebar_panel_elements_per_page: int = 50
     users: List[dict] = field(default_factory=list)
@@ -49,6 +52,7 @@ class Configuration:
     ModelPolicy: lambda x: getattr(ModelPolicies, x),
     TrainingSetSelectionStrategy: lambda x: getattr(TrainingSetSelectionStrategy, x),
     ActiveLearningStrategy: lambda x: getattr(ActiveLearningCatalog, x),
+    ActiveLearningPolicy: lambda x: getattr(ActiveLearningPolicies, x),
     Language: lambda x: getattr(Languages, x)
 }
 
@@ -69,4 +73,10 @@ def load_config(config_path, command_line_args=None) -> Configuration:
         data_class=Configuration, data=raw_cfg,
         config=dacite.Config(type_hooks=converters),
     )
+    if config.active_learning_strategy is None and config.active_learning_policy is None:
+        raise Exception("Either active_learning_strategy or active_learning_policy must be specified")
+
+    if config.active_learning_strategy is not None and config.active_learning_policy is not None:
+        raise Exception("Only one of active_learning_strategy or active_learning_policy can be specified")
+
     return config
diff --git a/label_sleuth/orchestrator/orchestrator_api.py b/label_sleuth/orchestrator/orchestrator_api.py
@@ -590,7 +590,14 @@ def _calculate_active_learning_recommendations(self, workspace_id, dataset_name,
         :param count:
         :param iteration_index: iteration to use
         """
-        active_learner = self.active_learning_factory.get_active_learner(self.config.active_learning_strategy)
+
+        if self.config.active_learning_strategy is not None:
+            active_learning_strategy = self.config.active_learning_strategy
+        else:
+            active_learning_strategy = self.config.active_learning_policy.get_active_learning_strategy(iteration_index)
+
+        active_learner = self.active_learning_factory.get_active_learner(active_learning_strategy)
+        logging.info(f"using active learning {active_learner}")
         # Where labels are applied to duplicate texts (the default behavior), we do not want duplicates to appear in
         # the Label Next list
         remove_duplicates = self.config.apply_labels_to_duplicate_texts