[PR] Progress bar on training

rodrigo-arenas · Jun 27, 2021 · 888e665 · 888e665
2 parents b7c6332 + 86034f3
commit 888e665
Show file tree

Hide file tree

Showing 10 changed files with 133 additions and 57 deletions.
diff --git a/README.rst b/README.rst
@@ -44,16 +44,21 @@ Main Features:
 * **Plots**: Generate pre-defined plots to understand the optimization process.
 * **MLflow**: Build-in integration with mlflow to log all the hyperparameters, cv-scores and the fitted models.
 
-**Some explains of the packages capabilities**
+Demos on Features:
+##################
 
-Sampled distribution of hyperparameters:
+Visualize the progress of your training:
 
-.. image:: https://github.com/rodrigo-arenas/Sklearn-genetic-opt/blob/master/docs/images/density.png?raw=true
+.. image:: docs/images/progress_bar.gif
 
 Real time metrics visualization and comparison across runs:
 
 .. image:: https://github.com/rodrigo-arenas/Sklearn-genetic-opt/blob/master/docs/images/tensorboard_log.png?raw=true
 
+Sampled distribution of hyperparameters:
+
+.. image:: https://github.com/rodrigo-arenas/Sklearn-genetic-opt/blob/master/docs/images/density.png?raw=true
+
 Artifacts logging:
 
 .. image:: https://github.com/rodrigo-arenas/Sklearn-genetic-opt/blob/master/docs/images/mlflow_artifacts_4.png?raw=true
@@ -73,6 +78,10 @@ install all the extra packages::
 
     pip install sklearn-genetic-opt[all]
 
+The only optional dependency that the last command does not install, it's Tensorflow,
+it is usually advised to look further which distribution works better for you.
+
+
 Example
 #######
 
@@ -124,13 +133,6 @@ Example
    print("Stats achieved in each generation: ", evolved_estimator.history)
    print("Best k solutions: ", evolved_estimator.hof)
 
-^^^^^^^
-Results
-^^^^^^^
-
-Log controlled by verbosity
-
-.. image:: https://github.com/rodrigo-arenas/Sklearn-genetic-opt/blob/master/docs/images/log.JPG?raw=true
 
 Changelog
 #########

diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -15,3 +15,4 @@ sphinx-copybutton
 numpydoc
 nbsphinx
 tensorflow>=2.0.0
+tqdm>=4.61.1
diff --git a/docs/images/progress_bar.gif b/docs/images/progress_bar.gif
diff --git a/docs/index.rst b/docs/index.rst
@@ -18,35 +18,42 @@ Installation:
 
 Install sklearn-genetic-opt
 
-It's advised to install sklearn-genetic using a virtual env, inside the env use::
+It's advised to install sklearn-genetic using a virtual env, to install a light version,
+inside the env use::
 
    pip install sklearn-genetic-opt
 
-Or install with the extra packages to get the full functionalities::
-
-   pip install sklearn-genetic-opt[all]
-
 .. |PythonMinVersion| replace:: 3.7
 .. |ScikitLearnMinVersion| replace:: 0.21.3
 .. |NumPyMinVersion| replace:: 1.14.5
 .. |SeabornMinVersion| replace:: 0.9.0
 .. |DEAPMinVersion| replace:: 1.3.1
 .. |MLflowMinVersion| replace:: 1.17.0
+.. |TensorflowMinVersion| replace:: 2.0.0
+.. |tqdmMinVersion| replace:: 4.61.1
 
 sklearn-genetic-opt requires:
 
 - Python (>= |PythonMinVersion|)
 - scikit-learn (>= |ScikitLearnMinVersion|)
 - NumPy (>= |NumPyMinVersion|)
 - DEAP (>= |DEAPMinVersion|)
+- tqdm (>= |tqdmMinVersion|)
 
-extra requirements:
+Extra requirements:
 
-These requirements are necessary to use the
-:mod:`~sklearn_genetic.plots` and :class:`~sklearn_genetic.mlflow.MLflowConfig` modules
+These requirements are necessary to use
+:mod:`~sklearn_genetic.plots`, :class:`~sklearn_genetic.mlflow.MLflowConfig`
+and :class:`~sklearn_genetic.callbacks.TensorBoard` correspondingly.
 
-- MLflow (>= |MLflowMinVersion|)
 - Seaborn (>= |SeabornMinVersion|)
+- MLflow (>= |MLflowMinVersion|)
+- Tensorflow (>= |TensorflowMinVersion|)
+
+This command will install all the extra requirements, except for Tensorflow,
+as it is usually advised to look further which distribution works better for you::
+
+   pip install sklearn-genetic-opt[all]
 
 .. toctree::
    :maxdepth: 2

diff --git a/docs/notebooks/Boston_Houses_decision_tree.ipynb b/docs/notebooks/Boston_Houses_decision_tree.ipynb
diff --git a/docs/notebooks/Digits_decision_tree.ipynb b/docs/notebooks/Digits_decision_tree.ipynb
@@ -138,7 +138,7 @@
     "    crossover_probability=0.9,\n",
     "    mutation_probability=0.05,\n",
     "    param_grid=params_grid,\n",
-    "    algorithm=\"eaMuPlusLambda\",\n",
+    "    algorithm=\"eaSimple\",\n",
     "    n_jobs=-1,\n",
     "    verbose=True)"
    ],
@@ -162,19 +162,33 @@
    "cell_type": "code",
    "execution_count": 6,
    "outputs": [
+    {
+     "data": {
+      "text/plain": "  0%|          | 0/31 [00:00<?, ?it/s]",
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "89a66e843f4745ad9bdda96f44440394"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "gen\tnevals\tfitness \tfitness_std\tfitness_max\tfitness_min\n",
-      "0  \t16    \t0.379572\t0.200348   \t0.770574   \t0.191189   \n",
-      "1  \t29    \t0.605258\t0.146615   \t0.735661   \t0.270989   \n",
-      "2  \t32    \t0.718256\t0.0139822  \t0.738986   \t0.695761   \n",
-      "3  \t32    \t0.725322\t0.01201    \t0.738986   \t0.695761   \n",
-      "4  \t31    \t0.736128\t0.00537084 \t0.742311   \t0.724855   \n",
-      "5  \t31    \t0.73883 \t0.00319545 \t0.742311   \t0.730673   \n",
-      "6  \t29    \t0.740441\t0.00451965 \t0.74813    \t0.729842   \n",
-      "7  \t30    \t0.740337\t0.00517348 \t0.74813    \t0.729842   \n",
+      "0  \t16    \t0.363259\t0.136399   \t0.639235   \t0.189526   \n",
+      "1  \t14    \t0.450592\t0.119266   \t0.620116   \t0.27847    \n",
+      "2  \t12    \t0.54707 \t0.1376     \t0.75478    \t0.26517    \n",
+      "3  \t12    \t0.625052\t0.113433   \t0.768911   \t0.346633   \n",
+      "4  \t16    \t0.667654\t0.11493    \t0.755611   \t0.400665   \n",
+      "5  \t14    \t0.727504\t0.0156019  \t0.759767   \t0.689111   \n",
+      "6  \t16    \t0.71462 \t0.0486477  \t0.758105   \t0.607648   \n",
+      "7  \t14    \t0.701164\t0.132646   \t0.764755   \t0.190357   \n",
+      "8  \t12    \t0.735661\t0.0115332  \t0.758936   \t0.715711   \n",
+      "9  \t16    \t0.735141\t0.00947264 \t0.748961   \t0.704073   \n",
       "INFO: DeltaThreshold callback met its criteria\n",
       "INFO: Stopping the algorithm\n"
      ]
@@ -200,8 +214,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'min_weight_fraction_leaf': 0.01686201756501332, 'criterion': 'entropy', 'max_depth': 6, 'max_leaf_nodes': 24}\n",
-      "accuracy score:  0.78\n"
+      "{'min_weight_fraction_leaf': 0.027793264515431237, 'criterion': 'entropy', 'max_depth': 17, 'max_leaf_nodes': 26}\n",
+      "accuracy score:  0.77\n"
      ]
     }
    ],

diff --git a/docs/release_notes.rst b/docs/release_notes.rst
@@ -22,6 +22,8 @@ Features:
   its class name to know which callbacks were responsible of the stopping.
 * Added support for extra methods coming from scikit-learn's BaseSearchCV, it is
   still partial support, missing properties like `cv_results_`, `best_index_` and `multimetric_`.
+* tqdm progress bar is now displayed when the .fit method of `GASearchCV` is called, it shows
+  how many generations are left in the training progress.
 
 ^^^^^^^^^^
 Bug Fixes:

diff --git a/setup.py b/setup.py
@@ -40,6 +40,7 @@
         "scikit-learn>=0.21.3",
         "numpy>=1.14.5",
         "deap>=1.3.1",
+        "tqdm>=4.61.1",
     ],
     extras_require={
         "mlflow": ["mlflow>=1.17.0"],

diff --git a/sklearn_genetic/algorithms.py b/sklearn_genetic/algorithms.py
@@ -1,8 +1,12 @@
+import sys
+
 from deap import tools
 from deap.algorithms import varAnd, varOr
 
 from .callbacks.validations import eval_callbacks
 
+from tqdm.auto import tqdm
+
 
 def eaSimple(
     population,
@@ -68,6 +72,8 @@ def eaSimple(
     logbook = tools.Logbook()
     logbook.header = ["gen", "nevals"] + (stats.fields if stats else [])
 
+    progress_bar = tqdm(total=ngen + 1, file=sys.stdout)
+
     # Evaluate the individuals with an invalid fitness
     invalid_ind = [ind for ind in population if not ind.fitness.valid]
     fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
@@ -82,11 +88,14 @@ def eaSimple(
     n_gen = gen = 0
     logbook.record(gen=n_gen, nevals=len(invalid_ind), **record)
 
+    progress_bar.update(1)
+
     if verbose:
         print(logbook.stream)
 
     # Check if any of the callbacks conditions are True to stop the iteration
     if eval_callbacks(callbacks, record, logbook, estimator):
+        progress_bar.close()
         print("INFO: Stopping the algorithm")
         return population, logbook, n_gen
 
@@ -114,15 +123,20 @@ def eaSimple(
         # Append the current generation statistics to the logbook
         record = stats.compile(population) if stats else {}
         logbook.record(gen=gen, nevals=len(invalid_ind), **record)
+
+        progress_bar.update(1)
+
         if verbose:
             print(logbook.stream)
 
         # Check if any of the callbacks conditions are True to stop the iteration
         if eval_callbacks(callbacks, record, logbook, estimator):
+            progress_bar.close()
             print("INFO: Stopping the algorithm")
             break
 
     n_gen = gen + 1
+    progress_bar.close()
 
     return population, logbook, n_gen
 
@@ -197,6 +211,8 @@ def eaMuPlusLambda(
     logbook = tools.Logbook()
     logbook.header = ["gen", "nevals"] + (stats.fields if stats else [])
 
+    progress_bar = tqdm(total=ngen + 1, file=sys.stdout)
+
     # Evaluate the individuals with an invalid fitness
     invalid_ind = [ind for ind in population if not ind.fitness.valid]
     fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
@@ -210,11 +226,15 @@ def eaMuPlusLambda(
 
     n_gen = gen = 0
     logbook.record(gen=n_gen, nevals=len(invalid_ind), **record)
+
+    progress_bar.update(1)
+
     if verbose:
         print(logbook.stream)
 
     # Check if any of the callbacks conditions are True to stop the iteration
     if eval_callbacks(callbacks, record, logbook, estimator):
+        progress_bar.close()
         print("INFO: Stopping the algorithm")
         return population, logbook, n_gen
 
@@ -239,14 +259,20 @@ def eaMuPlusLambda(
         # Update the statistics with the new population
         record = stats.compile(population) if stats is not None else {}
         logbook.record(gen=gen, nevals=len(invalid_ind), **record)
+
+        progress_bar.update(1)
+
         if verbose:
             print(logbook.stream)
 
         if eval_callbacks(callbacks, record, logbook, estimator):
+            progress_bar.close()
             print("INFO: Stopping the algorithm")
             break
 
     n_gen = gen + 1
+    progress_bar.close()
+
     return population, logbook, n_gen
 
 
@@ -332,15 +358,21 @@ def eaMuCommaLambda(
     logbook = tools.Logbook()
     logbook.header = ["gen", "nevals"] + (stats.fields if stats else [])
 
+    progress_bar = tqdm(total=ngen + 1, file=sys.stdout)
+
     record = stats.compile(population) if stats is not None else {}
 
     n_gen = gen = 0
     logbook.record(gen=n_gen, nevals=len(invalid_ind), **record)
+
+    progress_bar.update(1)
+
     if verbose:
         print(logbook.stream)
 
     # Check if any of the callbacks conditions are True to stop the iteration
     if eval_callbacks(callbacks, record, logbook, estimator):
+        progress_bar.close()
         print("INFO: Stopping the algorithm")
         return population, logbook, n_gen
 
@@ -365,13 +397,18 @@ def eaMuCommaLambda(
         # Update the statistics with the new population
         record = stats.compile(population) if stats is not None else {}
         logbook.record(gen=gen, nevals=len(invalid_ind), **record)
+
+        progress_bar.update(1)
+
         if verbose:
             print(logbook.stream)
 
         # Check if any of the callbacks conditions are True to stop the iteration
         if eval_callbacks(callbacks, record, logbook, estimator):
+            progress_bar.close()
             print("INFO: Stopping the algorithm")
             break
 
     n_gen = gen + 1
+    progress_bar.close()
     return population, logbook, n_gen
diff --git a/sklearn_genetic/callbacks/loggers.py b/sklearn_genetic/callbacks/loggers.py
@@ -13,7 +13,7 @@
 try:
     import tensorflow as tf
 except ModuleNotFoundError:  # noqa
-    tf = None # noqa
+    tf = None  # noqa
 
 
 class LogbookSaver(BaseCallback):