Merge pull request #257 from mlf-core/release/1.9.0

release/1.9.0
mlf-core · Feb 16, 2021 · 3d916ba · 3d916ba
2 parents a68524a + da7740f
commit 3d916ba
Show file tree

Hide file tree

Showing 44 changed files with 495 additions and 379 deletions.
diff --git a/.cookietemple.yml b/.cookietemple.yml
@@ -15,7 +15,7 @@ full_name: Lukas Heumos
 email: lukas.heumos@posteo.net
 project_name: mlf-core
 project_short_description: Reproducible machine learning pipelines using mlflow.
-version: 1.8.0
+version: 1.9.0
 license: Apache2.0
 command_line_interface: Click
 testing_library: pytest
diff --git a/.github/workflows/build_package.yml b/.github/workflows/build_package.yml
@@ -10,6 +10,8 @@ jobs:
       matrix:
         os: [macos-latest, ubuntu-latest, windows-latest]
         python: [3.8, 3.9]
+    env:
+      PYTHONIOENCODING: utf-8
 
     steps:
       - uses: actions/checkout@v2
@@ -23,6 +25,9 @@ jobs:
       - name: Build mlf-core
         run: pip install .
 
+      - name: Run mlf-core --help
+        run: mlf-core --help
+
       - name: Install required twine packaging dependencies
         run: pip install setuptools wheel twine
 

diff --git a/.github/workflows/create_package_prediction.yml b/.github/workflows/create_package_prediction.yml
@@ -23,29 +23,20 @@ jobs:
         run: |
           make install
 
-      - name: Install flake8
-        run: pip install flake8
-
       - name: Create package-prediction Pytorch Template
         run: |
           cd ..
           echo -e "\033[B\n\n\n\n\nn\n\n\n\n\nn" | mlf-core create
-          cd exploding_springfield
-          flake8
 
         # mlf-core is now already configured so need to do it again
       - name: Create package-prediction Tensorflow Template
         run: |
           cd ..
           rm -rf exploding_springfield
           echo -e "\033[B\n\n\n\n\n\n\033[B\nn" | mlf-core create
-          cd exploding_springfield
-          flake8
 
       - name: Create package-prediction XGBoost Template
         run: |
           cd ..
           rm -rf exploding_springfield
           echo -e "\033[B\n\n\n\n\n\n\033[B\033[B\nn" | mlf-core create
-          cd exploding_springfield
-          flake8
diff --git a/.github/workflows/sync_project.yml b/.github/workflows/sync_project.yml
@@ -23,7 +23,7 @@ jobs:
               token: '${{ secrets.CT_SYNC_TOKEN }}'
           name: Check out source-code repository
 
-        - uses: oleksiyrudenko/gha-git-credentials@v2
+        - uses: oleksiyrudenko/gha-git-credentials@v2.1
           with:
                name: 'zethson'
                email: 'lukas.heumos@posteo.net'

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -6,6 +6,30 @@ Changelog
 
 This project adheres to `Semantic Versioning <https://semver.org/>`_.
 
+1.9.0 (2021-02-16)
+------------------
+
+**Added**
+
+* Possibility to log input files in all template with the MLFCore object.
+* [ALL TEMPLATES] Using new mlf-core/base:1.2.0 container, which is based on CUDA 11.2.1 and cudnn 8.1
+* [PYTORCH] Upgraded Pytorch to 1.7.1
+* [PYTORCH] Added set_deterministic
+* [ALL TEMPLATES] Using new mlflow autolog
+* [ALL TEMPLATES] Changed mlflow autolog for loss to every 1 iteration
+
+**Fixed**
+
+* mlf-core fix-artifact-paths does now operate as expected.
+* [ALL TEMPLATES] fixed a path error that causes the general template linter to fail searching for
+  subprocess.call([\'conda\', \'env\', \'export\', \'--name\', \'<<project_name>>\'], stdout=conda_env_filehandler) and
+  mlflow.log_artifact(f\'{{reports_output_dir}}/<<project_name>>_conda_environment.yml\', artifact_path=\'reports\') in the project's mlf_core.py file
+
+**Dependencies**
+
+**Deprecated**
+
+
 1.8.0 (2021-02-01)
 ------------------
 

diff --git a/README.rst b/README.rst
@@ -19,6 +19,10 @@ mlf-core
 .. image:: https://img.shields.io/pypi/v/mlf-core.svg
         :target: https://pypi.python.org/pypi/mlf-core
         :alt: PyPI Status
+
+.. image:: https://static.pepy.tech/personalized-badge/mlf-core?units=international_system&left_color=grey&right_color=green&left_text=Downloads
+        :target: https://pepy.tech/project/mlf-core
+        :alt: Pepy Downloads
 
 .. image:: https://img.shields.io/discord/742367395196305489?color=passing
         :target: https://discord.gg/Mv8sAcq

diff --git a/cookietemple.cfg b/cookietemple.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.8.0
+current_version = 1.9.0
 
 [bumpversion_files_whitelisted]
 setup_file = setup.py

diff --git a/docs/bump_version.rst b/docs/bump_version.rst
@@ -39,11 +39,6 @@ The :code:`bump-version` command follows the syntax
 
 - ``PATH`` [CWD]: The path to the ``mlf_core.cfg`` file, which contains all locations, where the version should be increased.
 
-.. figure:: images/bump_version_example.png
-   :scale: 100 %
-   :alt: bump-version example
-
-   bump-version applied to a fresh cli-python project
 
 Flags
 -------

diff --git a/docs/conf.py b/docs/conf.py
@@ -53,9 +53,9 @@
 # the built documents.
 #
 # The short X.Y version.
-version = '1.8.0'
+version = '1.9.0'
 # The full version, including alpha/beta/rc tags.
-release = '1.8.0'
+release = '1.9.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/docs/images/mlf_core_overview.png b/docs/images/mlf_core_overview.png
diff --git a/docs/images/navigate_developer_settings.png b/docs/images/navigate_developer_settings.png
diff --git a/docs/images/navigate_settings.png b/docs/images/navigate_settings.png
diff --git a/docs/images/token_settings.png b/docs/images/token_settings.png
diff --git a/docs/index.rst b/docs/index.rst
@@ -5,6 +5,7 @@ Welcome to mlf-core's documentation!
    :caption: Contents:
 
    readme
+   tutorial
    installation
    usage
    create

diff --git a/docs/lint.rst b/docs/lint.rst
@@ -165,6 +165,7 @@ mlflow-tensorflow-2
 
 .. code-block::
     :linenos:
+
     set_general_random_seeds(dict_args["general_seed"]),
     set_tensorflow_random_seeds(dict_args["tensorflow_seed"])
     def set_tensorflow_random_seeds(seed):
@@ -210,6 +211,7 @@ mlflow-xgboost-2
 
 .. code-block::
     :linenos:
+
     set_general_random_seeds(dict_args["general_seed"]),
     set_xgboost_random_seeds(dict_args["xgboost_seed"], param)
     def set_xgboost_random_seeds(seed, param):
@@ -249,6 +251,7 @@ mlflow-xgboost_dask-2
 
 .. code-block::
     :linenos:
+    
     set_general_random_seeds(dict_args["general_seed"]),
     set_xgboost_dask_random_seeds(dict_args["xgboost_seed"], param)
     def set_xgboost_random_seeds(seed, param):

diff --git a/docs/modules.rst b/docs/modules.rst
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,4 +1,4 @@
-Sphinx==3.4.3
+Sphinx==3.5.0
 sphinx_rtd_theme==0.5.1
 sphinx-automodapi==0.13
 -r ../requirements.txt
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
@@ -0,0 +1,112 @@
+.. _tutorial:
+
+==========
+Tutorial
+==========
+
+Disclaimer
+-----------
+
+.. warning:: **This page is currently under development. Please check back later.**
+
+
+.. warning:: This document serves as a single page tutorial for mlf-core, the issue of deterministic machine learning and everything related.
+             It is **not** supposed to be used as a reference documentation for specific pieces of information.
+             Please use the remaining mlf-core or the respective tools' documentation for this purpose.
+             Although, mlf-core is designed with users in mind and as easy as possible it is inherently complex due to the nature of the issue it solves.
+             Hence, please be patient while working through this tutorial.
+
+Introduction
+-------------
+
+The fields of machine learning and artificial intelligence grew immensly in recent years.
+Nevertheless, many papers cannot be reproduced and it is difficult for scientists even after rigorous peer review to know which results to trust.
+This serious problem is known as the reproducibility crisis in machine learning.
+The reasons for this issue are manifold, but include the fact that major machine learning libraries default to the usage of non-deterministic algorithms based on atomic operations.
+Solely fixing all random seeds is not sufficient for deterministic machine learning.
+Fortunately, major machine learning libraries such as Pytorch, Tensoflow and XGBoost are aware of these issues and the they are slowly providing
+more and more deterministic variants of these atomic operations based algorithms.
+We evaluated the current state of deterministic machine learning and formulated a set of requirements for fully reproducible machine learning even with several GPUs.
+Based on this evaluation we developed the mlf-core ecosystem, an intuitive software solution solving the issue of irreproducible machine learning.
+
+mlf-core Overview
+-------------------
+
+The mlf-core ecosystem consists of the primary Python packages `mlf-core <https://github.com/mlf-core/mlf-core>`_ and `system-intelligence <https://github.com/mlf-core/system-intelligence>`_,
+a set of GPU enable `docker containers <https://github.com/mlf-core/containers>` and various fully reproducible machine learning projects found in the `mlf-core Github organization <https://github.com/mlf-core>`_.
+
+.. figure:: images/mlf_core_overview.png
+   :alt: mlf-core overview
+
+   An overview of the mlf-core project.
+
+This tutorial will primarily focus on the mlf-core Python package since it is the part that users will knowingly use the most.
+Additionally, mlf-core makes heavy use of `Conda <https://docs.conda.io/en/latest/>`_, `Docker <https://www.docker.com/>`_, Github_ and `Github Actions <https://github.com/features/actions>`_.
+We **strongly** suggest that you look for tutorials on Youtube or your favorite search engine to get comfortable with these technologies before proceeding further.
+Whenever we use more advanced features of these tools we will explain them. Therefore you don't need to be an expert, but a good overview is helpful.
+
+Installation
+-------------
+
+The mlf-core Python package is available on `PyPI <https://pypi.org/project/mlf-core/>`_ and the latest version can be installed with
+
+.. code-block:: console
+
+    $ pip install mlf-core
+
+It is advised to use a virtual environment for mlf-core since it relies on explicitly pinning many requirements.
+To verify that your installation was successful run:
+
+.. code-block:: console
+
+    $ mlf-core --help
+
+Configuration
+--------------
+
+mlf-core tightly (optionally, but **strongly recommended**) integrates with Github and wants to prevent overhead when creating several projects.
+Therefore mlf-core requires a little bit of configuration before the first usage.
+To configure mlf-core run:
+
+.. code-block:: console
+
+    $ mlf-core config all
+
+Enter your full name, your email and your Github username (hit enter if not available).
+Next you will be asked whether you want to update your Github personal access token.
+mlf-core requires your Github access token to automatically create a Github repository to upload your code and to enable mlf-core's sync functionality (explained later).
+Hence, answer with **y**. Now you will be prompted for the token.
+To create a token go to Github_ and log in. Next, click on your profile avater and navigate to 'Settings'.
+
+.. figure:: images/navigate_settings.png
+   :alt: Github settings navigation
+
+   Click on 'Settings'.
+
+Now navigate to the 'Developer settings'.
+
+.. figure:: images/navigate_developer_settings.png
+   :alt: Github settings navigation
+
+Click on 'Developer settings' in the bottom left. Then access 'Personal access token' and click 'Generate new token in the top right.
+You should now be prompted for your password. Enter a name for the note that clearly specifies what it is for e.g. 'mlf-core token'.
+Tick all options in the following image:
+
+.. figure:: images/token_settings.png
+   :alt: Github settings navigation
+
+   Select **all** of the in the screenshot ticked options. No additional options are required, especially not repository deletion.
+
+Click 'Generate token' at the very bottom and copy your token into the prompt of mlf-core. Hit enter and accept the update.
+mlf-core is now configured and ready to be used!
+
+For more details including security precautions please visit :ref:`config` and :ref:`github_support`.
+
+
+.. _Github: https://github.com
+
+Creating a mlf-core project
+------------------------------
+
+mlf-core project overview
+----------------------------
diff --git a/mlf_core/__init__.py b/mlf_core/__init__.py
@@ -2,4 +2,4 @@
 
 __author__ = """Lukas Heumos"""
 __email__ = 'lukas.heumos@posteo.net'
-__version__ = '1.8.0'
+__version__ = '1.9.0'
diff --git a/mlf_core/cli.py b/mlf_core/cli.py
@@ -11,11 +11,10 @@
 from rich import traceback
 from rich import print
 
-import mlf_core
 from mlf_core.bump_version.bump_version import VersionBumper
 from mlf_core.config.config import ConfigCommand
 from mlf_core.create.create import choose_domain
-from mlf_core.custom_cli.click import HelpErrorHandling, print_project_version, CustomHelpSubcommand, CustomArg
+from mlf_core.custom_cli.click import HelpErrorHandling, print_project_version, print_mlfcore_version, CustomHelpSubcommand, CustomArg
 from mlf_core.info.info import TemplateInfo
 from mlf_core.lint.lint import lint_project
 from mlf_core.list.list import TemplateLister
@@ -48,7 +47,7 @@ def main():
 
 
 @click.group(cls=HelpErrorHandling)
-@click.version_option(mlf_core.__version__, message=click.style(f'mlf-core Version: {mlf_core.__version__}', fg='blue'))
+@click.option('--version', is_flag=True, callback=print_mlfcore_version, expose_value=False, is_eager=True, help='Print the current mlf-core version.')
 @click.option('-v', '--verbose', is_flag=True, default=False, help='Enable verbose output (print debug statements).')
 @click.option("-l", "--log-file", help="Save a verbose log to a file.")
 @click.pass_context
@@ -295,15 +294,18 @@ def config(ctx, view: bool, section: str) -> None:
 
 
 @mlf_core_cli.command(short_help='Fix artifact location path for local all mlruns.', cls=CustomHelpSubcommand)
-@click.argument('path', type=str, default='.', required=False, helpmsg='Path to the root of the mlruns folder.', cls=CustomArg)
+@click.argument('path', type=str, default=os.getcwd(), required=False, helpmsg='Path to the root of the mlruns folder.', cls=CustomArg)
 @click.pass_context
 def fix_artifact_paths(ctx, path: str) -> None:
     """
+    Ensures that the paths of all locally saved MLflow artifacts are fixed to display them on the current machine.
     """
     for meta_yaml in Path(f'{path}/mlruns').rglob('meta.yaml'):
-        if 'file' not in meta_yaml.absolute():
-            print(f'[bold yellow] Skipping path fixing for: {meta_yaml.absolute()}. Run was not saved locally.')
-        print(f'[bold blue] Fixing path for: {meta_yaml.absolute()}')
+        with open(meta_yaml.absolute()) as meta_yaml_file:
+            content = meta_yaml_file.readlines()
+            if 'file://' not in content[0]:
+                print(f'[bold yellow]Skipping path fixing for: {meta_yaml.absolute()}. Run was not saved locally.')
+        print(f'[bold blue]Fixing path for: {meta_yaml.absolute()}')
         with open(meta_yaml.absolute()) as meta_yaml_file:
             content = meta_yaml_file.readlines()
             if 'artifact_location' in content[0]: