From 8424e376a34ac33ed70a5eab669a8f17f091b91d Mon Sep 17 00:00:00 2001
From: "Radionov, Maksim" <maksim.radionov@intel.com>
Date: Fri, 28 May 2021 12:34:56 +0300
Subject: [PATCH 1/4] Use epochs instead  batch_num. Log epoch count

---
 openfl-workspace/keras_nlp/src/nlp_dataloader.py     |  1 +
 openfl-workspace/tf_cnn_histology/plan/plan.yaml     |  3 +--
 .../workspace/plan/defaults/tasks_tensorflow.yaml    |  2 +-
 .../workspace/plan/defaults/tasks_torch.yaml         |  1 +
 openfl/federated/task/runner_keras.py                | 11 ++++++-----
 openfl/federated/task/runner_pt.py                   | 12 +++++++-----
 openfl/federated/task/runner_tf.py                   | 11 +++--------
 7 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/openfl-workspace/keras_nlp/src/nlp_dataloader.py b/openfl-workspace/keras_nlp/src/nlp_dataloader.py
index 1754c51eec..fd43d87c44 100644
--- a/openfl-workspace/keras_nlp/src/nlp_dataloader.py
+++ b/openfl-workspace/keras_nlp/src/nlp_dataloader.py
@@ -89,6 +89,7 @@ def get_valid_data_size(self):
         """
         return self.X_valid[0].shape[0]
 
+    @staticmethod
     def _batch_generator(X1, X2, y, idxs, batch_size, num_batches):
         """
         Generate batch of data.
diff --git a/openfl-workspace/tf_cnn_histology/plan/plan.yaml b/openfl-workspace/tf_cnn_histology/plan/plan.yaml
index 3fe6f8809a..52a660f949 100644
--- a/openfl-workspace/tf_cnn_histology/plan/plan.yaml
+++ b/openfl-workspace/tf_cnn_histology/plan/plan.yaml
@@ -61,5 +61,4 @@ tasks:
       batch_size: 32
       epochs: 1
       metrics:
-      - loss
-      num_batches: 1
+      - loss
\ No newline at end of file
diff --git a/openfl-workspace/workspace/plan/defaults/tasks_tensorflow.yaml b/openfl-workspace/workspace/plan/defaults/tasks_tensorflow.yaml
index 586a885b40..6d000cc618 100644
--- a/openfl-workspace/workspace/plan/defaults/tasks_tensorflow.yaml
+++ b/openfl-workspace/workspace/plan/defaults/tasks_tensorflow.yaml
@@ -18,6 +18,6 @@ train:
   function : train_batches
   kwargs   :
     batch_size  : 32
-    num_batches : 1
     metrics     :
     - loss
+    epochs      : 1
diff --git a/openfl-workspace/workspace/plan/defaults/tasks_torch.yaml b/openfl-workspace/workspace/plan/defaults/tasks_torch.yaml
index a240c2003b..f41b0c3600 100644
--- a/openfl-workspace/workspace/plan/defaults/tasks_torch.yaml
+++ b/openfl-workspace/workspace/plan/defaults/tasks_torch.yaml
@@ -17,3 +17,4 @@ train:
   kwargs   :
     metrics     :
     - loss
+    epochs : 1
diff --git a/openfl/federated/task/runner_keras.py b/openfl/federated/task/runner_keras.py
index 16ebfee0a7..aa21c177a1 100644
--- a/openfl/federated/task/runner_keras.py
+++ b/openfl/federated/task/runner_keras.py
@@ -59,7 +59,7 @@ def rebuild_model(self, round, input_tensor_dict, validation=False):
         else:
             self.set_tensor_dict(input_tensor_dict, with_opt_vars=False)
 
-    def train(self, col_name, round_num, input_tensor_dict, metrics, num_batches=None, **kwargs):
+    def train(self, col_name, round_num, input_tensor_dict, metrics, epochs=1, batch_size=1, **kwargs):
         """
         Perform the training for a specified number of batches.
 
@@ -80,10 +80,11 @@ def train(self, col_name, round_num, input_tensor_dict, metrics, num_batches=Non
 
         # rebuild model with updated weights
         self.rebuild_model(round_num, input_tensor_dict)
-
-        results = self.train_iteration(self.data_loader.get_train_loader(num_batches),
-                                       metrics=metrics,
-                                       **kwargs)
+        for epoch in range(epochs):
+            self.logger.info(f"Run {epoch} epoch of {round_num} round")
+            results = self.train_iteration(self.data_loader.get_train_loader(batch_size),
+                                        metrics=metrics,
+                                        **kwargs)
 
         # output metric tensors (scalar)
         origin = col_name
diff --git a/openfl/federated/task/runner_pt.py b/openfl/federated/task/runner_pt.py
index 4d9d1f1267..7e22dbd6fb 100644
--- a/openfl/federated/task/runner_pt.py
+++ b/openfl/federated/task/runner_pt.py
@@ -124,7 +124,7 @@ def validate(self, col_name, round_num, input_tensor_dict,
         return output_tensor_dict, {}
 
     def train_batches(self, col_name, round_num, input_tensor_dict,
-                      num_batches=None, use_tqdm=False, **kwargs):
+                      num_batches=None, use_tqdm=False, epochs=1, **kwargs):
         """Train batches.
 
         Train the model on the requested number of batches.
@@ -145,10 +145,12 @@ def train_batches(self, col_name, round_num, input_tensor_dict,
         # set to "training" mode
         self.train()
         self.to(self.device)
-        loader = self.data_loader.get_train_loader(num_batches)
-        if use_tqdm:
-            loader = tqdm.tqdm(loader, desc="train epoch")
-        metric = self.train_epoch(loader)
+        for epoch in range(epochs):
+            self.logger.info(f"Run {epoch} epoch of {round_num} round")
+            loader = self.data_loader.get_train_loader(num_batches)
+            if use_tqdm:
+                loader = tqdm.tqdm(loader, desc="train epoch")
+            metric = self.train_epoch(loader)
         # Output metric tensors (scalar)
         origin = col_name
         tags = ('trained',)
diff --git a/openfl/federated/task/runner_tf.py b/openfl/federated/task/runner_tf.py
index 4f39444a23..9dd2d1bd56 100644
--- a/openfl/federated/task/runner_tf.py
+++ b/openfl/federated/task/runner_tf.py
@@ -83,7 +83,7 @@ def rebuild_model(self, round, input_tensor_dict, validation=False):
             self.set_tensor_dict(input_tensor_dict, with_opt_vars=False)
 
     def train_batches(self, col_name, round_num, input_tensor_dict,
-                      num_batches, use_tqdm=False, **kwargs):
+                      epochs=1, use_tqdm=False, **kwargs):
         """
         Perform the training for a specified number of batches.
 
@@ -106,22 +106,17 @@ def train_batches(self, col_name, round_num, input_tensor_dict,
         self.rebuild_model(round_num, input_tensor_dict)
 
         tf.keras.backend.set_learning_phase(True)
-
         losses = []
-        batch_num = 0
 
-        while batch_num < num_batches:
+        for epoch in range(epochs):
+            self.logger.info(f"Run {epoch} epoch of {round_num} round")
             # get iterator for batch draws (shuffling happens here)
             gen = self.data_loader.get_train_loader(batch_size)
             if use_tqdm:
                 gen = tqdm.tqdm(gen, desc="training epoch")
 
             for (X, y) in gen:
-                if batch_num >= num_batches:
-                    break
-                else:
                     losses.append(self.train_batch(X, y))
-                    batch_num += 1
 
         # Output metric tensors (scalar)
         origin = col_name

From 4205e9ee19af7dd68b11fa3763fc956c054835da Mon Sep 17 00:00:00 2001
From: "Radionov, Maksim" <maksim.radionov@intel.com>
Date: Fri, 28 May 2021 13:02:19 +0300
Subject: [PATCH 2/4] Fix flake8

---
 openfl/federated/task/runner_keras.py | 7 ++++---
 openfl/federated/task/runner_tf.py    | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/openfl/federated/task/runner_keras.py b/openfl/federated/task/runner_keras.py
index aa21c177a1..d37d9b09d5 100644
--- a/openfl/federated/task/runner_keras.py
+++ b/openfl/federated/task/runner_keras.py
@@ -59,7 +59,8 @@ def rebuild_model(self, round, input_tensor_dict, validation=False):
         else:
             self.set_tensor_dict(input_tensor_dict, with_opt_vars=False)
 
-    def train(self, col_name, round_num, input_tensor_dict, metrics, epochs=1, batch_size=1, **kwargs):
+    def train(self, col_name, round_num, input_tensor_dict,
+              metrics, epochs=1, batch_size=1, **kwargs):
         """
         Perform the training for a specified number of batches.
 
@@ -83,8 +84,8 @@ def train(self, col_name, round_num, input_tensor_dict, metrics, epochs=1, batch
         for epoch in range(epochs):
             self.logger.info(f"Run {epoch} epoch of {round_num} round")
             results = self.train_iteration(self.data_loader.get_train_loader(batch_size),
-                                        metrics=metrics,
-                                        **kwargs)
+                                           metrics=metrics,
+                                           **kwargs)
 
         # output metric tensors (scalar)
         origin = col_name
diff --git a/openfl/federated/task/runner_tf.py b/openfl/federated/task/runner_tf.py
index 9dd2d1bd56..6cd8da658b 100644
--- a/openfl/federated/task/runner_tf.py
+++ b/openfl/federated/task/runner_tf.py
@@ -116,7 +116,7 @@ def train_batches(self, col_name, round_num, input_tensor_dict,
                 gen = tqdm.tqdm(gen, desc="training epoch")
 
             for (X, y) in gen:
-                    losses.append(self.train_batch(X, y))
+                losses.append(self.train_batch(X, y))
 
         # Output metric tensors (scalar)
         origin = col_name

From e028fee0a3f299b08ff60581bf242cbcf6fc8c21 Mon Sep 17 00:00:00 2001
From: Ilya Trushkin <ilya.trushkin@intel.com>
Date: Wed, 25 Aug 2021 18:16:09 +0300
Subject: [PATCH 3/4] Resolve conversations

---
 openfl/federated/task/runner_keras.py | 2 +-
 openfl/federated/task/runner_pt.py    | 7 +++----
 openfl/federated/task/runner_tf.py    | 4 ++--
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/openfl/federated/task/runner_keras.py b/openfl/federated/task/runner_keras.py
index ec40129912..93ac40b1d0 100644
--- a/openfl/federated/task/runner_keras.py
+++ b/openfl/federated/task/runner_keras.py
@@ -63,7 +63,7 @@ def rebuild_model(self, round_num, input_tensor_dict, validation=False):
     def train(self, col_name, round_num, input_tensor_dict,
               metrics, epochs=1, batch_size=1, **kwargs):
         """
-        Perform the training for a specified number of batches.
+        Perform the training.
 
         Is expected to perform draws randomly, without replacement until data is exausted.
         Then data is replaced and shuffled and draws continue.
diff --git a/openfl/federated/task/runner_pt.py b/openfl/federated/task/runner_pt.py
index 1e8a3fb790..2905f09798 100644
--- a/openfl/federated/task/runner_pt.py
+++ b/openfl/federated/task/runner_pt.py
@@ -129,7 +129,7 @@ def validate(self, col_name, round_num, input_tensor_dict,
         return output_tensor_dict, {}
 
     def train_batches(self, col_name, round_num, input_tensor_dict,
-                      num_batches=None, use_tqdm=False, epochs=1, **kwargs):
+                      use_tqdm=False, epochs=1, **kwargs):
         """Train batches.
 
         Train the model on the requested number of batches.
@@ -138,9 +138,8 @@ def train_batches(self, col_name, round_num, input_tensor_dict,
             col_name:            Name of the collaborator
             round_num:           What round is it
             input_tensor_dict:   Required input tensors (for model)
-            num_batches:         The number of batches to train on before
-                                 returning
             use_tqdm (bool):     Use tqdm to print a progress bar (Default=True)
+            epochs:              The number of epochs to train
 
         Returns:
             global_output_dict:  Tensors to send back to the aggregator
@@ -152,7 +151,7 @@ def train_batches(self, col_name, round_num, input_tensor_dict,
         self.to(self.device)
         for epoch in range(epochs):
             self.logger.info(f'Run {epoch} epoch of {round_num} round')
-            loader = self.data_loader.get_train_loader(num_batches)
+            loader = self.data_loader.get_train_loader()
             if use_tqdm:
                 loader = tqdm.tqdm(loader, desc='train epoch')
             metric = self.train_epoch(loader)
diff --git a/openfl/federated/task/runner_tf.py b/openfl/federated/task/runner_tf.py
index 9c04e74085..18c7726ce1 100644
--- a/openfl/federated/task/runner_tf.py
+++ b/openfl/federated/task/runner_tf.py
@@ -86,15 +86,15 @@ def rebuild_model(self, round_num, input_tensor_dict, validation=False):
     def train_batches(self, col_name, round_num, input_tensor_dict,
                       epochs=1, use_tqdm=False, **kwargs):
         """
-        Perform the training for a specified number of batches.
+        Perform the training.
 
         Is expected to perform draws randomly, without replacement until data is exausted. Then
         data is replaced and shuffled and draws continue.
 
         Args:
-            num_batches: Number of batches to train on
             use_tqdm (bool): True = use tqdm to print a progress
              bar (Default=False)
+            epochs (int): Number of epochs to train 
         Returns:
             float: loss metric
         """

From 38a7fc40830eda3f72c98ba7399adac9192dabd2 Mon Sep 17 00:00:00 2001
From: Ilya Trushkin <ilya.trushkin@intel.com>
Date: Wed, 25 Aug 2021 18:19:55 +0300
Subject: [PATCH 4/4] Fix linter

---
 openfl/federated/task/runner_tf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openfl/federated/task/runner_tf.py b/openfl/federated/task/runner_tf.py
index 18c7726ce1..620a0ca0dd 100644
--- a/openfl/federated/task/runner_tf.py
+++ b/openfl/federated/task/runner_tf.py
@@ -94,7 +94,7 @@ def train_batches(self, col_name, round_num, input_tensor_dict,
         Args:
             use_tqdm (bool): True = use tqdm to print a progress
              bar (Default=False)
-            epochs (int): Number of epochs to train 
+            epochs (int): Number of epochs to train
         Returns:
             float: loss metric
         """