From 8bbe3cf29e585dbe6beed79286763197b1dbe42b Mon Sep 17 00:00:00 2001 From: Dorota Jarecka Date: Wed, 1 Apr 2020 11:19:54 -0400 Subject: [PATCH 01/12] skiping for now docker tasks on windows --- pydra/engine/tests/test_dockertask.py | 43 ++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index 97abc56e5f..73c7f778a0 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -import os, shutil +import os, sys, shutil import subprocess as sp import pytest import attr @@ -19,8 +19,13 @@ shutil.which("docker") is None or sp.call(["docker", "info"]), reason="no docker within the container", ) +no_win = pytest.mark.skipif( + sys.platform.startswith("win"), + reason="docker command not adjusted for windows docker", +) +@no_win @need_docker def test_docker_1_nosubm(): """ simple command in a container, a default bindings and working directory is added @@ -42,6 +47,7 @@ def test_docker_1_nosubm(): assert "Unable to find image" in res.output.stderr +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_1(plugin): @@ -61,6 +67,7 @@ def test_docker_1(plugin): assert "Unable to find image" in res.output.stderr +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_1_dockerflag(plugin): @@ -82,6 +89,7 @@ def test_docker_1_dockerflag(plugin): assert "Unable to find image" in res.output.stderr +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_1_dockerflag_exception(plugin): @@ -94,6 +102,7 @@ def test_docker_1_dockerflag_exception(plugin): assert "container_info has to have 2 or 3 elements" in str(excinfo.value) +@no_win @need_docker def test_docker_2_nosubm(): """ a command with arguments, cmd and args given as executable @@ -113,6 +122,7 @@ def test_docker_2_nosubm(): assert "Unable to find image" in res.output.stderr +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_2(plugin): @@ -135,6 +145,7 @@ def test_docker_2(plugin): assert "Unable to find image" in res.output.stderr +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_2_dockerflag(plugin): @@ -159,6 +170,7 @@ def test_docker_2_dockerflag(plugin): assert "Unable to find image" in res.output.stderr +@no_win @need_docker def test_docker_2a_nosubm(): """ a command with arguments, using executable and args @@ -183,6 +195,7 @@ def test_docker_2a_nosubm(): assert "Unable to find image" in res.output.stderr +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_2a(plugin): @@ -210,6 +223,7 @@ def test_docker_2a(plugin): assert "Unable to find image" in res.output.stderr +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_3(plugin, tmpdir): @@ -233,6 +247,7 @@ def test_docker_3(plugin, tmpdir): assert "Unable to find image" in res.output.stderr +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_3_dockerflag(plugin, tmpdir): @@ -259,6 +274,7 @@ def test_docker_3_dockerflag(plugin, tmpdir): assert "Unable to find image" in res.output.stderr +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_3_dockerflagbind(plugin, tmpdir): @@ -285,6 +301,7 @@ def test_docker_3_dockerflagbind(plugin, tmpdir): assert "Unable to find image" in res.output.stderr +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_4(plugin, tmpdir): @@ -311,6 +328,7 @@ def test_docker_4(plugin, tmpdir): assert res.output.return_code == 0 +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_4_dockerflag(plugin, tmpdir): @@ -340,6 +358,7 @@ def test_docker_4_dockerflag(plugin, tmpdir): # tests with State +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_st_1(plugin): @@ -364,6 +383,7 @@ def test_docker_st_1(plugin): assert res[0].output.return_code == res[1].output.return_code == 0 +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_st_2(plugin): @@ -388,6 +408,7 @@ def test_docker_st_2(plugin): assert res[0].output.return_code == res[1].output.return_code == 0 +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_st_3(plugin): @@ -406,6 +427,7 @@ def test_docker_st_3(plugin): assert "Ubuntu" in res[3].output.stdout +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_st_4(plugin): @@ -448,6 +470,7 @@ def test_docker_st_4(plugin): # tests with workflows +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_wf_docker_1(plugin, tmpdir): @@ -492,6 +515,7 @@ def test_wf_docker_1(plugin, tmpdir): assert res.output.out == "message from the previous task: hello from pydra" +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_wf_docker_1_dockerflag(plugin, tmpdir): @@ -532,6 +556,7 @@ def test_wf_docker_1_dockerflag(plugin, tmpdir): assert res.output.out == "message from the previous task: hello from pydra" +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_wf_docker_2pre(plugin, tmpdir): @@ -553,6 +578,7 @@ def test_wf_docker_2pre(plugin, tmpdir): assert res.output.stdout == "/outputs/tmp.txt" +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_wf_docker_2(plugin, tmpdir): @@ -593,6 +619,7 @@ def test_wf_docker_2(plugin, tmpdir): assert res.output.out == "Hello!" +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_wf_docker_3(plugin, tmpdir): @@ -636,6 +663,7 @@ def test_wf_docker_3(plugin, tmpdir): # tests with customized output_spec +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_outputspec_1(plugin, tmpdir): @@ -664,6 +692,7 @@ def test_docker_outputspec_1(plugin, tmpdir): # tests with customised input_spec +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_inputspec_1(plugin, tmpdir): @@ -705,6 +734,7 @@ def test_docker_inputspec_1(plugin, tmpdir): assert res.output.stdout == "hello from pydra" +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_inputspec_1a(plugin, tmpdir): @@ -744,6 +774,7 @@ def test_docker_inputspec_1a(plugin, tmpdir): assert res.output.stdout == "hello from pydra" +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_inputspec_2(plugin, tmpdir): @@ -792,6 +823,7 @@ def test_docker_inputspec_2(plugin, tmpdir): assert res.output.stdout == "hello from pydra\nhave a nice one" +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_inputspec_2a_except(plugin, tmpdir): @@ -843,6 +875,7 @@ def test_docker_inputspec_2a_except(plugin, tmpdir): assert res.output.stdout == "hello from pydra\nhave a nice one" +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_inputspec_2a(plugin, tmpdir): @@ -894,6 +927,7 @@ def test_docker_inputspec_2a(plugin, tmpdir): assert res.output.stdout == "hello from pydra\nhave a nice one" +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_inputspec_3(plugin, tmpdir): @@ -937,6 +971,7 @@ def test_docker_inputspec_3(plugin, tmpdir): assert cmdline == docky.cmdline +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_inputspec_3a(plugin, tmpdir): @@ -980,6 +1015,7 @@ def test_docker_inputspec_3a(plugin, tmpdir): assert "use field.metadata['container_path']=True" in str(excinfo.value) +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_cmd_inputspec_copyfile_1(plugin, tmpdir): @@ -1042,6 +1078,7 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmpdir): assert "hello from pydra\n" == f.read() +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_inputspec_state_1(plugin, tmpdir): @@ -1090,6 +1127,7 @@ def test_docker_inputspec_state_1(plugin, tmpdir): assert res[1].output.stdout == "have a nice one" +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_inputspec_state_1b(plugin, tmpdir): @@ -1139,6 +1177,7 @@ def test_docker_inputspec_state_1b(plugin, tmpdir): assert res[1].output.stdout == "have a nice one" +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_wf_inputspec_1(plugin, tmpdir): @@ -1190,6 +1229,7 @@ def test_docker_wf_inputspec_1(plugin, tmpdir): assert res.output.out == "hello from pydra" +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_wf_state_inputspec_1(plugin, tmpdir): @@ -1247,6 +1287,7 @@ def test_docker_wf_state_inputspec_1(plugin, tmpdir): assert res[1].output.out == "have a nice one" +@no_win @need_docker @pytest.mark.parametrize("plugin", Plugins) def test_docker_wf_ndst_inputspec_1(plugin, tmpdir): From badcd6223da6bc9a5a6c49471923e5d97272692e Mon Sep 17 00:00:00 2001 From: Dorota Jarecka Date: Wed, 1 Apr 2020 12:31:38 -0400 Subject: [PATCH 02/12] fixing paths for win --- pydra/engine/helpers_file.py | 2 +- pydra/engine/tests/test_helpers_file.py | 28 ++++++++++++++++--------- pydra/engine/tests/test_shelltask.py | 4 ++-- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 2afac73ad9..ba2399fa45 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -379,7 +379,7 @@ def get_related_files(filename, include_this_file=True): if this_type in type_set: for related_type in type_set: if include_this_file or related_type != this_type: - related_files.append(op.join(path, name + related_type)) + related_files.append(Path(path) / (name + related_type)) if not len(related_files): related_files = [filename] return related_files diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index 048667f9b0..7adca521a3 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -41,12 +41,12 @@ def test_split_filename(filename, split): def test_fname_presuffix(): fname = "foo.nii" pth = fname_presuffix(fname, "pre_", "_post", "/tmp") - assert pth == "/tmp/pre_foo_post.nii" + assert pth == str(Path("/tmp/pre_foo_post.nii")) fname += ".gz" pth = fname_presuffix(fname, "pre_", "_post", "/tmp") - assert pth == "/tmp/pre_foo_post.nii.gz" + assert pth == str(Path("/tmp/pre_foo_post.nii.gz")) pth = fname_presuffix(fname, "pre_", "_post", "/tmp", use_ext=False) - assert pth == "/tmp/pre_foo_post" + assert pth == str(Path("/tmp/pre_foo_post")) @pytest.fixture() @@ -56,7 +56,7 @@ def _temp_analyze_files(tmpdir): orig_hdr = tmpdir.join("orig.hdr") orig_img.open("w+").close() orig_hdr.open("w+").close() - return str(orig_img), str(orig_hdr) + return Path(orig_img), Path(orig_hdr) @pytest.fixture() @@ -66,7 +66,7 @@ def _temp_analyze_files_prime(tmpdir): orig_hdr = tmpdir.join("orig_prime.hdr") orig_img.open("w+").close() orig_hdr.open("w+").close() - return orig_img.strpath, orig_hdr.strpath + return Path(orig_img.strpath), Path(orig_hdr.strpath) def test_copyfile(_temp_analyze_files): @@ -213,11 +213,19 @@ def test_ensure_list(filename, expected): @pytest.mark.parametrize( "file, length, expected_files", [ - ("/path/test.img", 3, ["/path/test.hdr", "/path/test.img", "/path/test.mat"]), - ("/path/test.hdr", 3, ["/path/test.hdr", "/path/test.img", "/path/test.mat"]), - ("/path/test.BRIK", 2, ["/path/test.BRIK", "/path/test.HEAD"]), - ("/path/test.HEAD", 2, ["/path/test.BRIK", "/path/test.HEAD"]), - ("/path/foo.nii", 2, ["/path/foo.nii", "/path/foo.mat"]), + ( + "/path/test.img", + 3, + [Path("/path/test.hdr"), Path("/path/test.img"), Path("/path/test.mat")], + ), + ( + "/path/test.hdr", + 3, + [Path("/path/test.hdr"), Path("/path/test.img"), Path("/path/test.mat")], + ), + ("/path/test.BRIK", 2, [Path("/path/test.BRIK"), Path("/path/test.HEAD")]), + ("/path/test.HEAD", 2, [Path("/path/test.BRIK"), Path("/path/test.HEAD")]), + ("/path/foo.nii", 2, [Path("/path/foo.nii"), Path("/path/foo.mat")]), ], ) def test_related_files(file, length, expected_files): diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 011e83c706..dbfc0e88d0 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -57,7 +57,7 @@ def test_shell_cmd_1_strip(plugin, results_function): assert shelly.cmdline == " ".join(cmd) res = results_function(shelly, plugin) - assert res.output.stdout == str(shelly.output_dir) + assert res.output.stdout == str(Path(shelly.output_dir)) assert res.output.return_code == 0 assert res.output.stderr == "" @@ -124,7 +124,7 @@ def test_shell_cmd_3(plugin): shelly = ShellCommandTask(name="shelly", executable=cmd).split("executable") assert shelly.cmdline == ["pwd", "whoami"] res = shelly(plugin=plugin) - assert res[0].output.stdout == f"{str(shelly.output_dir[0])}\n" + assert res[0].output.stdout == f"{str(Path(shelly.output_dir[0]))}\n" if "USER" in os.environ: assert res[1].output.stdout == f"{os.environ['USER']}\n" else: From ba0b7ccfebcd26aa3dcf7715d160207567b1a66e Mon Sep 17 00:00:00 2001 From: Dorota Jarecka Date: Wed, 1 Apr 2020 13:21:11 -0400 Subject: [PATCH 03/12] fixing paths for win --- pydra/engine/helpers_file.py | 4 ++-- pydra/engine/tests/test_helpers_file.py | 6 +++--- pydra/engine/tests/test_shelltask.py | 7 ++++--- pydra/engine/tests/test_task.py | 9 ++++++++- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index ba2399fa45..4a6093e448 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -94,7 +94,7 @@ def fname_presuffix(fname, prefix="", suffix="", newpath=None, use_ext=True): >>> from pydra.engine.helpers_file import fname_presuffix >>> fname = 'foo.nii.gz' >>> fname_presuffix(fname,'pre','post','/tmp') - '/tmp/prefoopost.nii.gz' + Path('/tmp/prefoopost.nii.gz') """ pth, fname, ext = split_filename(fname) @@ -104,7 +104,7 @@ def fname_presuffix(fname, prefix="", suffix="", newpath=None, use_ext=True): # No need for isdefined: bool(Undefined) evaluates to False if newpath: pth = op.abspath(newpath) - return op.join(pth, prefix + fname + suffix + ext) + return Path(pth) / (prefix + fname + suffix + ext) def hash_file(afile, chunk_len=8192, crypto=sha256, raise_notfound=True): diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index 7adca521a3..019f2447b2 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -41,12 +41,12 @@ def test_split_filename(filename, split): def test_fname_presuffix(): fname = "foo.nii" pth = fname_presuffix(fname, "pre_", "_post", "/tmp") - assert pth == str(Path("/tmp/pre_foo_post.nii")) + assert pth == Path("/tmp/pre_foo_post.nii") fname += ".gz" pth = fname_presuffix(fname, "pre_", "_post", "/tmp") - assert pth == str(Path("/tmp/pre_foo_post.nii.gz")) + assert pth == Path("/tmp/pre_foo_post.nii.gz") pth = fname_presuffix(fname, "pre_", "_post", "/tmp", use_ext=False) - assert pth == str(Path("/tmp/pre_foo_post")) + assert pth == Path("/tmp/pre_foo_post") @pytest.fixture() diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index dbfc0e88d0..d2d6936c94 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -41,7 +41,7 @@ def test_shell_cmd_1(plugin, results_function): assert shelly.cmdline == " ".join(cmd) res = results_function(shelly, plugin=plugin) - assert res.output.stdout == str(shelly.output_dir) + "\n" + assert Path(res.output.stdout.rstrip()) == shelly.output_dir assert res.output.return_code == 0 assert res.output.stderr == "" @@ -57,7 +57,7 @@ def test_shell_cmd_1_strip(plugin, results_function): assert shelly.cmdline == " ".join(cmd) res = results_function(shelly, plugin) - assert res.output.stdout == str(Path(shelly.output_dir)) + assert Path(res.output.stdout) == Path(shelly.output_dir) assert res.output.return_code == 0 assert res.output.stderr == "" @@ -124,7 +124,8 @@ def test_shell_cmd_3(plugin): shelly = ShellCommandTask(name="shelly", executable=cmd).split("executable") assert shelly.cmdline == ["pwd", "whoami"] res = shelly(plugin=plugin) - assert res[0].output.stdout == f"{str(Path(shelly.output_dir[0]))}\n" + assert Path(res[0].output.stdout.rstrip()) == shelly.output_dir[0] + if "USER" in os.environ: assert res[1].output.stdout == f"{os.environ['USER']}\n" else: diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index e624a39ca2..cc97a138ac 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import typing as ty -import os +import os, sys import pytest from ... import mark @@ -15,6 +15,11 @@ from ...utils.messenger import FileMessenger, PrintMessenger, collect_messages from .utils import gen_basic_wf +no_win = pytest.mark.skipif( + sys.platform.startswith("win"), + reason="docker/singularity command not adjusted for windows", +) + @mark.task def funaddtwo(a): @@ -372,6 +377,7 @@ def test_container_cmds(tmpdir): assert containy.cmdline +@no_win def test_docker_cmd(tmpdir): docky = DockerTask(name="docky", executable="pwd", image="busybox") assert ( @@ -393,6 +399,7 @@ def test_docker_cmd(tmpdir): ) +@no_win def test_singularity_cmd(tmpdir): # todo how this should be done? image = "library://sylabsed/linux/alpine" From 8c02892c9cfc6e673b95eee49aab486a4aee376d Mon Sep 17 00:00:00 2001 From: Dorota Jarecka Date: Wed, 1 Apr 2020 13:36:21 -0400 Subject: [PATCH 04/12] returning string in fname_presuffix --- pydra/engine/helpers_file.py | 4 ++-- pydra/engine/tests/test_helpers_file.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 4a6093e448..7285832fdf 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -94,7 +94,7 @@ def fname_presuffix(fname, prefix="", suffix="", newpath=None, use_ext=True): >>> from pydra.engine.helpers_file import fname_presuffix >>> fname = 'foo.nii.gz' >>> fname_presuffix(fname,'pre','post','/tmp') - Path('/tmp/prefoopost.nii.gz') + str(Path('/tmp/prefoopost.nii.gz')) """ pth, fname, ext = split_filename(fname) @@ -104,7 +104,7 @@ def fname_presuffix(fname, prefix="", suffix="", newpath=None, use_ext=True): # No need for isdefined: bool(Undefined) evaluates to False if newpath: pth = op.abspath(newpath) - return Path(pth) / (prefix + fname + suffix + ext) + return str(Path(pth) / (prefix + fname + suffix + ext)) def hash_file(afile, chunk_len=8192, crypto=sha256, raise_notfound=True): diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index 019f2447b2..7adca521a3 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -41,12 +41,12 @@ def test_split_filename(filename, split): def test_fname_presuffix(): fname = "foo.nii" pth = fname_presuffix(fname, "pre_", "_post", "/tmp") - assert pth == Path("/tmp/pre_foo_post.nii") + assert pth == str(Path("/tmp/pre_foo_post.nii")) fname += ".gz" pth = fname_presuffix(fname, "pre_", "_post", "/tmp") - assert pth == Path("/tmp/pre_foo_post.nii.gz") + assert pth == str(Path("/tmp/pre_foo_post.nii.gz")) pth = fname_presuffix(fname, "pre_", "_post", "/tmp", use_ext=False) - assert pth == Path("/tmp/pre_foo_post") + assert pth == str(Path("/tmp/pre_foo_post")) @pytest.fixture() From 0c37c9a85a5dcba9b55dc99ecd8e4902ff3fe79d Mon Sep 17 00:00:00 2001 From: Dorota Jarecka Date: Wed, 1 Apr 2020 14:17:09 -0400 Subject: [PATCH 05/12] fixing doctest --- pydra/engine/helpers_file.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 7285832fdf..ed18ca118e 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -91,10 +91,12 @@ def fname_presuffix(fname, prefix="", suffix="", newpath=None, use_ext=True): Examples -------- + >>> import pytest, sys + >>> if sys.platform.startswith('win'): pytest.skip() >>> from pydra.engine.helpers_file import fname_presuffix >>> fname = 'foo.nii.gz' >>> fname_presuffix(fname,'pre','post','/tmp') - str(Path('/tmp/prefoopost.nii.gz')) + '/tmp/prefoopost.nii.gz' """ pth, fname, ext = split_filename(fname) From 914f2df1584d8ff1785509425a079a17bd7ecb16 Mon Sep 17 00:00:00 2001 From: Nicol Lo Date: Fri, 3 Apr 2020 17:48:32 +0800 Subject: [PATCH 06/12] add scikit-learn workflow tutorial --- tutorial/notebooks/ml_workflow.ipynb | 388 +++++++++++++++++++++++++++ 1 file changed, 388 insertions(+) create mode 100644 tutorial/notebooks/ml_workflow.ipynb diff --git a/tutorial/notebooks/ml_workflow.ipynb b/tutorial/notebooks/ml_workflow.ipynb new file mode 100644 index 0000000000..d492dd6db1 --- /dev/null +++ b/tutorial/notebooks/ml_workflow.ipynb @@ -0,0 +1,388 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pydra\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import sklearn" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: sklearn in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (0.0)\n", + "Requirement already satisfied: scikit-learn in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from sklearn) (0.22.2.post1)\n", + "Requirement already satisfied: numpy>=1.11.0 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from scikit-learn->sklearn) (1.18.2)\n", + "Requirement already satisfied: scipy>=0.17.0 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from scikit-learn->sklearn) (1.4.1)\n", + "Requirement already satisfied: joblib>=0.11 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from scikit-learn->sklearn) (0.14.1)\n", + "Requirement already satisfied: matplotlib in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (3.2.1)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from matplotlib) (0.10.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from matplotlib) (1.1.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from matplotlib) (2.4.6)\n", + "Requirement already satisfied: numpy>=1.11 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from matplotlib) (1.18.2)\n", + "Requirement already satisfied: python-dateutil>=2.1 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from matplotlib) (2.8.1)\n", + "Requirement already satisfied: six in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from cycler>=0.10->matplotlib) (1.14.0)\n", + "Requirement already satisfied: setuptools in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib) (46.1.1.post20200323)\n" + ] + } + ], + "source": [ + "# load data\n", + "!pip install sklearn\n", + "!pip install matplotlib" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Machine Learning Workflow" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hyperparameter tuning is an important step in developing machine learning models. \n", + "XXX You learned how to run multiple tasks within a pipeline with *pydra* `Workflow` in the `intro_workflow` tutorial. \n", + "\n", + "\n", + "Let's initiate a `Workflow` with **MNIST** data as input: " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import datasets\n", + "\n", + "digits = datasets.load_digits(n_class=10, return_X_y=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0., 0., 0., 12., 13., 5., 0., 0.],\n", + " [ 0., 0., 0., 11., 16., 9., 0., 0.],\n", + " [ 0., 0., 3., 15., 16., 6., 0., 0.],\n", + " [ 0., 7., 15., 16., 16., 2., 0., 0.],\n", + " [ 0., 0., 1., 16., 16., 3., 0., 0.],\n", + " [ 0., 0., 1., 16., 16., 6., 0., 0.],\n", + " [ 0., 0., 1., 16., 16., 6., 0., 0.],\n", + " [ 0., 0., 0., 11., 16., 10., 0., 0.]])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "digits.images[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD0CAYAAAB3sfb1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAa/0lEQVR4nO3df5DU9Z3n8ec7TEFWQvhhyOLOTGTaQRQowNgn5q4qalwDuqW4tSwZk9tFNhbZNdlLvPsj5iwTkzMX6uoqGk/jxpMAniWYNSmHOy+QGIPWppbFIf5ilqjMMLPMmOwioKfJgoy+74/+ztBMd3+/35nu6Z7uz+tR1UV/v5/Pt/vbL7797u7v9zPfr7k7IiIShvfVegVERKR6VPRFRAKioi8iEhAVfRGRgKjoi4gEREVfRCQgDVn0zezHZrau0n3rmTIppEyKUy6FGioTd58UN+DtvNt7wL/mTX+m1utXgde3COgCjke3J4FFIWcSvcazgO8CrwNvAs+EnAkwH/BRr/P2FMs1dC6jXutXo4z+MPRMxvr+cXeaxvdRUXnu/oHh+2bWB9zk7k+O7mdmTe4+VM11q5DXgDVAP7lfWJ8HtgNLSy0QQCYADwBNwIXAMWB5XOdAMgGYNZb1DyUXMzsP+FPg10l9A8lkTO8fqIPdO2Z2uZkNmNmXzew3wGYzm21m/8fMjpjZ8eh+S94yu83spuj+jWb2d2b236O+h8zs6nH2bTOzZ8zsLTN70szuM7OH07wOd3/D3fs89/FswLtAe8iZmNkFwHXABnc/4u7vuvu+kDOptAbM5T7gy8A7oWcy3vfPpC/6kXnAHOBcYAO59d4cTX+E3M+2e2OWXwG8DHwI+G/AJjOzcfR9BNgLnA3cAfxZ/oJm9qKZfTruhZjZG8AJ4H8A/zWub4JGyOQScr98vm5mr5vZS2b2JzHrnKQRMhnWHxWmzWb2oYS+SRoiFzP7U+Cku//fmHVNqxEyGd/7p9b7pErsp+oj2l8HXE7uU/39Mf2XA8fzpneT+ykHcCNwcNQ+MAfmjaUvuQ1hCDgrr/1h4OFxvL7pwM3AH4WcCfCfo8e6A5gKXEZuf+uFAWfyASBL7if77wOPAbtCf/8AM4BXgfmjX2PAmYzr/VMv3/SPuPuJ4QkzO8vMvmdm/Wb2/4BngFlmNqXE8r8ZvuPuv4vufmCMff8AOJY3D+DwGF/H8OP+Fvgb4CEz+/B4HoPGyORfgVPAne7+jrs/Dfwc+OQYHiNf3Wfi7m+7e5e7D7n7PwNfAD5pZjPSPkYRdZ8LucL2v9y9bwzLxGmETMb1/qmXoj/6VKD/CVgIrHD3DwIfj+aX+nlVCb8G5pjZWXnzWst4vPeR+9RvHufyjZDJi0XmlXPa10bIZLTh11TOe7URcrkS+A9m9ptoP3wr8AMz+/I416cRMhnX+6deiv5oM8h9yr1hZnOAr030E7p7P7khl3eY2VQz+xhwbdrlzewqM7vIzKaY2QeBb5MbunmgQqtYd5mQ+zb1T8BXzKzJzP4dcAWwq0KrWHeZmNkKM1toZu8zs7OBe4Dd7v5mBVez7nIhV/SXkNvtspzcaLjPkTuwWwn1mMm43j/1WvTvBn6P3NjUPcDOKj3vZ4CPAUeBO4FHgZPDjWbWbWafKbHsLGAbubG0PcB5wKr8n5hlqrtM3P0UsBq4hlwu/xP4c3f/VYXWre4yATLk1vMtYH+03A0VXr+6y8Xdj7r7b4Zv5Ea/HXf3tyu0bvWYybjePxYdEJBxMLNHgV+5+4R/K6gXyqSQMilOuRSqRib1+k2/Jszs35jZedFP71XkPmUfr/V61ZIyKaRMilMuhWqRyaT5i9w6MQ/4EbkxtQPAX7n7c7VdpZpTJoWUSXHKpVDVM9HuHRGRgGj3johIQFT0RUQCMhn36Y9rf9Pdd9897vb+/v6SbcuWLSvZtnv37tjnnDVrVlzzWP/oY0L2w914440l2954442SbY8/PmHHmsaSy7gyWb48/kSEfX19Jdsuv/zykm1xeSVsC0l5TngmSeK29euvv75k2/PPP1+ybf78+WWs0eR4/2zZsmVcbUm1owyJueibvohIQFT0RUQCoqIvIhIQFX0RkYCo6IuIBERFX0QkIJNxyGZJX/rSl0q2xQ2PSlo2boje1q1bJ+Q5qyVpHeJe3+bNmyu9OlUTNyTuhRdeiF125syZJds6OztLtsUN700aJlrOcM9qiNuO4oZeljkss+bi/r8B1q9fX7LtrrvuqvTqVIS+6YuIBERFX0QkICr6IiIBUdEXEQmIir6ISEBU9EVEAjIZL6JScoXihk9ddtllsQ8aN+xtvMPK4s4gmPScVOksgUnD/eJee9LrmyAVOaNk3LpfdNFFsQ/6xS9+sWRb0tlcJ8iEn2Uz6f86LrO4ob1xZ3GNa4P4rGfNmlWV90/cWVUhfijuZN1W9E1fRCQgKvoiIgFR0RcRCYiKvohIQFT0RUQCoqIvIhIQFX0RkYDU1amVV69ePe5l406129/fX7Itbvz/ZDjlbZKkdUw65W+9intdSX/T8Z3vfGdcz1mjcdkVkXQK7rjMrr/++pJtcZk8/vjjsc9ZrTzjTmv99NNPxy57yy23VHp1Jpy+6YuIBERFX0QkICr6IiIBUdEXEQmIir6ISEBU9EVEAlJXp1YuR19fX8m2tra2km3Lli0r2VbmqYcrdmrYuCFnSaeNjhvaGHda2bjT4o73VNWRCT+NcJK41x23HcVtD2UO761IJnGnJo8bdgnw3HPPlWyL24bitoWk0xZv2bIlrrkq75/Zs2fHPui6detKtsVtK3GZJQ2fTXh/6dTKIiJymoq+iEhAVPRFRAKioi8iEhAVfRGRgKjoi4gEJJghm3HihoatX7++ZNtdd90V+7gJQ68qNuQsTtKwuKSzCI5H3DA2iD974qxZsyoyPDFuGF6SuO0h7qyKhw4dKtlWrWGsfX19JTMZ79BkSB7SWcrXv/71km1xw0Ah8QywVXn/JJ2FNi6XuGHNL7zwQsm2r33ta7HPmTBUXEM2RUTkNBV9EZGAqOiLiARERV9EJCAq+iIiAVHRFxEJSF1dGD1OOWemG+8ZEOPOojdZJA21ixuymXQB8VK2bt0a2x43lC1piGlaccNCd+/eHbvseLeHcoaJVkrCmSlLihtCmKa9lLihoEnDISeDpLoSd3H3uO0oLpekrOOGbKbJVN/0RUQCoqIvIhIQFX0RkYCo6IuIBERFX0QkICr6IiIBUdEXEQlIw5xaubOzM7Y97nS/b775Zsm21atXl2y74447Yp9zMpwaNkncOOS4Md9xry3u9MMQnyljy2Vcp1ZO+luAuHHScX+7kDT+vwwVyaQccX/3EPf/vXnz5pJtcX+vkcKkeP/E1YC48fTljLWP+7+YP3++Tq0sIiKnqeiLiARERV9EJCAq+iIiAVHRFxEJiIq+iEhAJuOQTRERmSD6pi8iEhAVfRGRgKjoi4gEREVfRCQgKvoiIgFR0RcRCYiKvohIQFT0RUQCoqIvIhIQFX0RkYCo6IuIBERFX0QkICr6IiIBUdEXEQmIir6ISEBU9EVEAqKiLyISEBV9EZGAqOiLiARERV9EJCAq+iIiAVHRFxEJiIq+iEhAVPRFRAKioi8iEhAVfRGRgKjoi4gEREVfRCQgKvoiIgFR0RcRCYiKvohIQFT0RUQCoqIvIhIQFX0RkYCo6IuIBCSx6JvZ983sX8xsf4l2M7N7zOygmb1oZh/Na1tnZq9Gt3WVXPFaUy6FlEkhZVKccqkhd4+9AR8HPgrsL9F+DfBjwIBLgX+I5s8BeqN/Z0f3Zyc9X73clIsyUSbKpR5vid/03f0Z4FhMl9XAQ56zB5hlZucAK4Gfuvsxdz8O/BRYlfR89UK5FFImhZRJccqldiqxT78ZOJw3PRDNKzU/FMqlkDIppEyKUy4TpKnWKwBgZhuADQDTp0+/+IILLqjxGqWzZMkSDh48SDab9dFtM2fOZN68eX+UzWYBmDFjBs3Nzc++9dZbuDtm9kV3nxv3+PWYizIpVE4m2Wz2v+zbt+914J5Sj1+PmYC2lUrbt2/f60mZAMn79D23H20+pfe9fQ+4IW/6ZeAc4Abge6X6lbpdfPHFXi8OHTrkixcvLtq2YcMGf+SRR0amzz//fH/ttdf8kUce8Q0bNjjQ5Q2YizIpVE4m7u5AV6Nl4q5tpdKGM0m6VWL3zg7gz6Oj7ZcCb7r7r4FdwCfNbLaZzQY+Gc0LwnXXXcdDDz2Eu7Nnzx5mzpzJOeecw8qVK/nJT34CMCW0XJRJoaRMjh8/DjCFgDIBbSsTKulTAdgG/Bo4RW7/2WeBvwT+Mmo34D6gB3gJyOYt+xfAwei2Ps2nUL18Ind0dPi8efO8qanJm5ub/cEHH/T777/f77//fnd3f++99/zmm2/2TCbjS5Ys8WeffXZk2U2bNjlwotFyUSaFys3kvPPOG86lYTJx17YyEUj5Td9yfSePbDbrXV1dtV6NCWdm+9w9m7Z/CLkok+LGkosyKS6EXNJmor/IFREJiIq+iEhAVPRFRAKioi8iEhAVfRGRgKjoi4gEREVfRCQgKvoiIgFR0RcRCYiKvohIQFT0RUQCoqIvIhIQFX0RkYCo6IuIBERFX0QkICr6IiIBSVX0zWyVmb1sZgfN7NYi7XeZ2fPR7RUzeyOv7d28th2VXPla2rlzJwsXLqS9vZ2NGzcWtN9yyy0sX76c5cuXc/755zNr1qyRtilTpgAsUiaNnwkol2KUSQ0lXVqL3PU5e4AMMBV4AVgU0/+vge/nTb+d5hJew7d6uKzZ0NCQZzIZ7+np8ZMnT/rSpUu9u7u7ZP977rnH169fPzI9ffr01Jc28zrJRZkUV+1clEn95lKutJmk+aZ/CXDQ3Xvd/R1gO7A6pv8N5K6r27D27t1Le3s7mUyGqVOn0tHRQWdnZ8n+27Zt44YbbqjiGlafMilOuRRSJrWVpug3A4fzpgeieQXM7FygDXgqb/b7zazLzPaY2fXjXtNJZHBwkNbW1pHplpYWBgcHi/bt7+/n0KFDfOITnxiZd+LECYALlUljZwLKpRhlUluVPpDbATzm7u/mzTvXcxfr/TRwt5mdN3ohM9sQfTB0HTlypMKrVFvbt29nzZo1w/shgdyGDBwgJhNo3FyUSXHjzUWZhLetlCNN0R8EWvOmW6J5xXQwateOuw9G//YCu4GLRi/k7g+4e9bds3Pnzk2xSrXV3NzM4cOnf/wMDAzQ3Fz0xw/bt28v+Gk63Dcuk6i9bnJRJsVVIxdl0hjbStUk7fQHmoBecrtthg/kLi7S7wKgD7C8ebOBadH9DwGvEnMQ2OvkgMupU6e8ra3Ne3t7Rw5E7d+/v6DfgQMH/Nxzz/X33ntvZN6xY8f8xIkTDnSlzcTrIBdlUly1c1Em9ZtLuajUgVx3HwK+AOwi95PqB+7ebWbfMLPr8rp2ANujJx92IdBlZi8APwc2uvs/pvs4mryampq49957WblyJRdeeCFr165l8eLFfPWrX2XHjtMjyLZv305HRwdmNjLvwIEDZLNZgEUoE6BxMwHlUowyqS07s0bXXjab9a6urlqvxoQzs32eO9aRSgi5KJPixpKLMikuhFzSZqK/yBURCYiKvohIQFT0RUQCoqIvIhIQFX0RkYCo6IuIBERFX0QkICr6IiIBUdEXEQmIir6ISEBU9EVEAqKiLyISEBV9EZGAqOiLiARERV9EJCAq+iIiAUlV9M1slZm9bGYHzezWIu03mtkRM3s+ut2U17bOzF6NbusqufK1tHPnThYuXEh7ezsbN24saN+yZQtz585l+fLlLF++nAcffHCkbevWrQBLlEnjZwLKpRhlUkNJ11MEpgA9QIbT18hdNKrPjcC9RZadQ+76unPIXS+3F5gd93z1cC3LoaEhz2Qy3tPTM3KNz+7u7jP6bN682T//+c8XLHv06FFva2tz4Lm0mXgd5KJMiqt2LsqkfnMpF5W6Ri5wCXDQ3Xvd/R1gO7A65WfKSuCn7n7M3Y8DPwVWpVx20tq7dy/t7e1kMhmmTp1KR0cHnZ2dqZbdtWsXV111FcC7yiSnUTMB5VKMMqmtNEW/GTicNz0QzRvtT8zsRTN7zMxax7KsmW0wsy4z6zpy5EjKVa+dwcFBWltbR6ZbWloYHBws6PfDH/6QpUuXsmbNGg4fPlx0WUrnWVe5KJPiqpGLMmmMbaVaKnUg938D8919KblP3q1jWdjdH3D3rLtn586dW6FVqq1rr72Wvr4+XnzxRa666irWrRv7rsdGy0WZFFduLsqkuEbMpRLSFP1BIP+jtSWaN8Ldj7r7yWjyQeDitMvWo+bm5pFvHgADAwM0N5/5ZePss89m2rRpANx0003s27ev6LIok4bNBJRLMcqkxpJ2+gNN5A6WtHH6QO7iUX3Oybv/x8AeP30g9xC5Ay6zo/tz4p6vHg64nDp1ytva2ry3t3fkQNT+/fvP6PPaa6+N3P/Rj37kK1ascPfcgaj58+fnH4hKzMTrIBdlUly1c1Em9ZtLuUh5IDexQ+6xuAZ4hdwontuied8Arovufwvojj4Qfg5ckLfsXwAHo9v6pOeql/+cJ554whcsWOCZTMbvvPNOd3e//fbbvbOz093db731Vl+0aJEvXbrUL7/8cj9w4MDIsps2bXLgRNpMvE5yUSbFVTMXZVLfuZQjbdG3XN/JI5vNeldXV61XY8KZ2T53z6btH0IuyqS4seSiTIoLIZe0megvckVEAqKiLyISEBV9EZGAqOiLiARERV9EJCAq+iIiAVHRFxEJiIq+iEhAVPRFRAKioi8iEhAVfRGRgKjoi4gEREVfRCQgKvoiIgFR0RcRCUiqom9mq8zsZTM7aGa3Fmn/j2b2j9GF0X9mZufmtb1rZs9Htx2VXPla2rlzJwsXLqS9vZ2NGzcWtH/7299m0aJFLF26lCuvvJL+/v6RtilTpgAsUiaNnwkol2KUSQ0lXWUFmELuilkZTl8ucdGoPlcAZ0X3/wp4NK/t7TRXcxm+1cMVboaGhjyTyXhPT8/I5d66u7vP6PPUU0/5b3/7W3d3/+53v+tr164daZs+fXrqq9x4neSiTIqrdi7KpH5zKVfaTNJ8078EOOjuve7+DrAdWD3qg+Pn7v67aHIPuYsVN6y9e/fS3t5OJpNh6tSpdHR00NnZeUafK664grPOOguASy+9lIGBgVqsatUok+KUSyFlUltpin4zkH/5+YFoXimfBX6cN/1+M+sysz1mdv041nHSGRwcpLW1dWS6paWFwcHBkv03bdrE1VdfPTJ94sQJgAuVSWNnAsqlGGVSW02VfDAz+/dAFrgsb/a57j5oZhngKTN7yd17Ri23AdgA8JGPfKSSq1RzDz/8MF1dXTz99NMj8/r7+2lpaTkAfJoSmUDj5qJMihtvLsokvG2lHGm+6Q8CrXnTLdG8M5jZHwK3Ade5+8nh+e4+GP3bC+wGLhq9rLs/4O5Zd8/OnTt3TC+gFpqbmzl8+PSPn4GBAZqbC3/8PPnkk3zzm99kx44dTJs27YzlIT6TqL1uclEmxVUjF2XSGNtK1STt9Cf3a6AXaOP0gdzFo/pcRO5g74JR82cD06L7HwJeZdRB4NG3ejjgcurUKW9ra/Pe3t6RA1H79+8/o88vf/lLz2Qy/sorr5wx/9ixY37ixAkHutJm4nWQizIprtq5KJP6zaVcpDyQm+rIN3AN8EpU2G+L5n2D3Ld6gCeBfwaej247ovn/Fngp+qB4Cfhs0nPVy3/OE0884QsWLPBMJuN33nmnu7vffvvt3tnZ6e7uV155pX/4wx/2ZcuW+bJly/zaa691d/df/OIXvmTJEgd+lzYTr5NclElx1cxFmdR3LuVIW/Qt13fyyGaz3tXVVevVmHBmts/ds2n7h5CLMiluLLkok+JCyCVtJvqLXBGRgKjoi4gEREVfRCQgKvoiIgFR0RcRCYiKvohIQFT0RUQCoqIvIhIQFX0RkYCo6IuIBERFX0QkICr6IiIBUdEXEQmIir6ISEBU9EVEAqKiLyISkFRF38xWmdnLZnbQzG4t0j7NzB6N2v/BzObntX0lmv+yma2s3KrX1s6dO1m4cCHt7e1s3LixoP3kyZN86lOfor29nRUrVtDX1zfS9q1vfQtgSaNlAuXlAszTthLGtqJMaijp0lrAFHKXScxw+hq5i0b1uRn4m+h+B/BodH9R1H8auWvs9gBT4p6vHi5rNjQ05JlMxnt6ekau8dnd3X1Gn/vuu88/97nPubv7tm3bfO3ate7u3t3d7UuXLnVgX9pMPJBcyF0CT9tKGduKMqnfXMpFysslpvmmfwlw0N173f0dYDuwelSf1cDW6P5jwJVmZtH87e5+0t0PAQejx6tre/fupb29nUwmw9SpU+no6KCzs/OMPp2dnaxbtw6ANWvW8LOf/Qx3p7Ozk46ODgBvpEyg/FyAY9pWGn9bUSa1laboNwOH86YHonlF+7j7EPAmcHbKZevO4OAgra2tI9MtLS0MDg6W7NPU1MTMmTM5evRowbI0SCZQfi7AO3ldGyIXbSuFlEltJV4Y3czWAKvc/aZo+s+AFe7+hbw++6M+A9F0D7ACuAPY4+4PR/M3AT9298dGPccGYEM0uQTYX/5Lm1CzgQ8C/dH0HOADwD/l9VkMvAKciqaXAL8C/gB4G5jr7jNKZQJB5jLD3X8PtK0whm1FmTTM+6dcC919RmKvpP0/wMeAXXnTXwG+MqrPLuBj0f0m4HXARvfN7xfzfKn2S9XyVolMhl9nmkwCymVA20p524oyqd9cKpBrxfbpPwssMLM2M5tK7kDtjlF9dgDrovtrgKc8txY7gI5odE8bsADYm+I5J7uyMwGswTKB8nOZo20liG1FmdRSyk+Qa8j91OoBbovmfQO4Lrr/fuBvyR1U2Qtk8pa9LVruZeDqRvlErkAmJ9JmElAuA9pWyttWlEl951JmpqleY+I+/Wozsw3u/kCt12OijfV1hpCLMiluLK9TmVSmfz1K+xonXdEXEZGJo9MwiIgEZFIV/aTTPTQCM/u+mf1LNMw1TX9lUti/4TMB5VKMMik01kxqfvAh7yBE4ukeGuEGfBz4KLBfmSgT5aJMqpmJe7ohm9WS5nQPdc/dnwGOpeyuTAoFkQkol2KUSaExZjKpin5DnrKhTMqkkDIpTrkUUiZFTKaiLyIiE2wyFf1BIP9MSi3RvJApk0LKpDjlUkiZFDGZin6aP80OjTIppEyKUy6FlEkRk6boe+6UzF8gdwKlA8AP3L27tmtVeWa2Dfh7YKGZDZjZZ0v1VSaFQskElEsxyqTQWDIB/UWuiEhQJs03fRERmXgq+iIiAVHRFxEJiIq+iEhAVPRFRAKioi8iEhAVfRGRgKjoi4gE5P8DNy6pHb5QsgEAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import random\n", + "random.seed(10)\n", + "\n", + "_, axes = plt.subplots(2, 5)\n", + "images_and_labels = list(zip(digits.images, digits.target))\n", + "for ax, (image, label) in zip(axes[0, :], random.sample(images_and_labels, 5)):\n", + " ax.set_axis_off()\n", + " ax.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')\n", + " ax.set_title('Training: %i' % label)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "## Support Vector Machine" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's build a workflow with one model only to start. Here we use SVM (Support Vector Machine) for classification" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define `FunctionTask`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# \n" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [], + "source": [ + "import typing as ty\n", + "\n", + "# https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html#sklearn.svm.LinearSVC\n", + "# linear SVM\n", + "@pydra.mark.task\n", + "#@pydra.mark.annotate({\"return\": {\"clf\": ty.Any}})\n", + "def fit_SVM(X_tr, y_tr, C=1, kernel='rbf', gamma=1):\n", + " from sklearn.svm import SVC\n", + " clf = SVC(C=C, kernel=kernel, gamma=gamma)\n", + " clf.fit(X_tr, y_tr)\n", + " return(404)\n", + "\n", + "# metrics\n", + "@pydra.mark.task\n", + "#@pydra.mark.annotate({\"return\": {\"score\": ty.Any}})\n", + "def metric_score(clf, X_tt, y_tt, metric):\n", + " \n", + " y_pred = clf.predict(X_tt)\n", + " \n", + " from sklearn import metrics\n", + " if metric == 'accuracy':\n", + " score = metrics.accuracy_score(y_tt, y_pred)\n", + " elif metric == 'precision':\n", + " score = metrics.precision_score(y_tt, y_pred)\n", + " else:\n", + " score = metrics.recall_score(y_tt, y_pred)\n", + " return(score)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,\n", + " decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',\n", + " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", + " tol=0.001, verbose=False)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.svm import SVC\n", + "n_samples = len(digits.images)\n", + "data = digits.images.reshape((n_samples, -1))\n", + "\n", + "# Split data into train and test subsets\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " data, digits.target, test_size=0.5, shuffle=False)\n", + "\n", + "data[1]\n", + "\n", + "clf = SVC()\n", + "clf.fit(X_train, y_train)\n", + "\n", + "print(clf)" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(899, 64)" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_test.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create workflow" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "input_grid = {'X_tr': X_train,\n", + " 'X_tt': X_test,\n", + " 'y_tr': y_train,\n", + " 'y_tt': y_test,\n", + " 'C': [0.1,1, 10, 100], \n", + " 'gamma': [1,0.1,0.01,0.001], #'gamma': ['scale', 'auto'],\n", + " 'kernel': ['rbf', 'poly', 'sigmoid']}" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [], + "source": [ + "# for fit_SVM\n", + "input_grid = {'X_tr': X_train,\n", + " #'X_tt': X_test,\n", + " 'y_tr': y_train,\n", + " #'y_tt': y_test,\n", + " 'C': 1, \n", + " 'gamma': [1,0.1,0.01,0.001], #'gamma': ['scale', 'auto'],\n", + " 'kernel': 'rbf',\n", + " 'metric': ['accuracy', 'precision', 'recall']}\n", + "\n", + "# for metrics\n", + "#metric_list = ['accuracy', 'precision', 'recall']" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [], + "source": [ + "wf1 = pydra.Workflow(name=\"svm1\", \n", + " input_spec=list(input_grid.keys()), **input_grid)\n", + "wf1.split('gamma')\n", + "wf1.add(fit_SVM(name='svm', **input_grid))\n", + "wf1.set_output([(\"out\", wf1.svm.lzout.out)])\n", + "\n", + "#wf1.add(metric_score(name='metric', clf=wf1.svm.lzout.out,\n", + "# X_tt=wf1.lzin.X_tt, y_tt=wf1.lzin.y_tt, metric=wf1.lzin.metric))\n", + "\n", + "#wf1.set_output([(\"clf\", wf1.svm.lzout.out),\n", + "# (\"score\", wf1.metric.lzout.out)\n", + "# ])\n", + "\n", + "\n", + "#with pydra.Submitter(plugin=\"cf\") as sub:\n", + "# sub(wf1)\n", + "\n", + "wf1.result()" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], + "source": [ + "wf1 = pydra.Workflow(name=\"svm1\", \n", + " input_spec=list(input_grid.keys()), **input_grid)" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wf1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From df80532cffe5dc13b1e45a1fa3f3725958e07dae Mon Sep 17 00:00:00 2001 From: Nicol Lo Date: Fri, 3 Apr 2020 23:16:59 +0800 Subject: [PATCH 07/12] add azure pipeline --- .DS_Store | Bin 0 -> 6148 bytes .azure-pipelines/windows.yml | 39 +++++++++++++++++++++++++++++++++++ .gitignore | 2 ++ azure-pipelines.yml | 12 +++++++++++ 4 files changed, 53 insertions(+) create mode 100644 .DS_Store create mode 100644 .azure-pipelines/windows.yml create mode 100644 azure-pipelines.yml diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 Date: Fri, 3 Apr 2020 23:39:27 +0800 Subject: [PATCH 08/12] windows.yml pytest command fix --- .azure-pipelines/windows.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.azure-pipelines/windows.yml b/.azure-pipelines/windows.yml index f36a740b12..ee262d3f11 100644 --- a/.azure-pipelines/windows.yml +++ b/.azure-pipelines/windows.yml @@ -33,7 +33,6 @@ jobs: python -m pip install .[$(CHECK_TYPE)] displayName: 'Install pydra' - script: | - mkdir for_testing - cd for_testing - pytest + pytest -vs -n auto --cov pydra --cov-config .coveragerc --cov-report xml:cov.xml --doctest-modules pydra displayName: 'Pytest tests' + From 7bc5835113a7cd09237a87145107fbf37dbc13b7 Mon Sep 17 00:00:00 2001 From: Nicol Lo Date: Wed, 15 Apr 2020 14:44:21 +0800 Subject: [PATCH 09/12] skip shell task tests for windows --- pydra/engine/tests/test_helpers_file.py | 1 + pydra/engine/tests/test_node_task.py | 2 ++ pydra/engine/tests/test_shelltask.py | 6 +++++- pydra/engine/tests/test_task.py | 1 + pydra/engine/tests/test_workflow.py | 2 ++ 5 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index b13f5e3be2..389ea34827 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -38,6 +38,7 @@ def test_split_filename(filename, split): assert res == split +@pytest.mark.skipif(sys.platform.startswith("win"), reason="windows drive not known in advance",) def test_fname_presuffix(): fname = "foo.nii" pth = fname_presuffix(fname, "pre_", "_post", "/tmp") diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index ff49ea79a2..fb815f9665 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -538,6 +538,8 @@ def test_task_nostate_cachedir(plugin, tmpdir): assert results.output.out == 5 +# TODO: fix path for windows +@pytest.mark.skipif(sys.platform.startswith("win"), 'windows cache paths are saved differently') @pytest.mark.parametrize("plugin", Plugins) def test_task_nostate_cachedir_relativepath(tmpdir, plugin): """ task with provided cache_dir as relative path""" diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index d2d6936c94..01fb1bf66c 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -12,6 +12,10 @@ from ..core import Workflow from ..specs import ShellOutSpec, ShellSpec, SpecInfo, File + +if not sys.platform.startswith("win"): + pytest.skip("SLURM not available in windows", allow_module_level=True) + if bool(shutil.which("sbatch")): Plugins = ["cf", "slurm"] else: @@ -154,7 +158,7 @@ def test_shell_cmd_4(plugin): assert res[1].output.stdout == "pydra\n" assert res[0].output.return_code == res[1].output.return_code == 0 - assert res[0].output.stderr == res[1].output.stderr == "" + assert res[0].output.stderr == res[1].output.stderr == "" @pytest.mark.parametrize("plugin", Plugins) diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index cc97a138ac..83e723ecf3 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -350,6 +350,7 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] assert (tmpdir / funky.checksum / "messages.jsonld").exists() +@pytest.mark.skipif(sys.platform.startswith("win"), "skip shell tasks for windows") def test_shell_cmd(tmpdir): cmd = ["echo", "hail", "pydra"] diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index 46cfee660c..abe8d912fb 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -1981,6 +1981,8 @@ def test_wf_nostate_cachedir(plugin, tmpdir): shutil.rmtree(cache_dir) +# TODO: fix path for windows +@pytest.mark.skipif(sys.platform.startswith("win"), "failing for windows, something to do with paths?") @pytest.mark.parametrize("plugin", Plugins) def test_wf_nostate_cachedir_relativepath(tmpdir, plugin): """ wf with provided cache_dir as relative path""" From 4e53c480c30c7dd4ff3bc51ce29d99f3d4e77a61 Mon Sep 17 00:00:00 2001 From: nlo Date: Wed, 15 Apr 2020 18:06:57 +0800 Subject: [PATCH 10/12] fix tests that test relative cache paths --- pydra/engine/tests/test_helpers_file.py | 2 +- pydra/engine/tests/test_node_task.py | 4 ++-- pydra/engine/tests/test_shelltask.py | 4 ++-- pydra/engine/tests/test_task.py | 1 - pydra/engine/tests/test_workflow.py | 3 ++- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index 389ea34827..bb550836e0 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -1,4 +1,4 @@ -import os +import os, sys import time import warnings import pytest diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index fb815f9665..9611d467b7 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -538,13 +538,13 @@ def test_task_nostate_cachedir(plugin, tmpdir): assert results.output.out == 5 -# TODO: fix path for windows -@pytest.mark.skipif(sys.platform.startswith("win"), 'windows cache paths are saved differently') @pytest.mark.parametrize("plugin", Plugins) def test_task_nostate_cachedir_relativepath(tmpdir, plugin): """ task with provided cache_dir as relative path""" cwd = tmpdir.chdir() cache_dir = "test_task_nostate" + tmpdir.mkdir(cache_dir) + nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) assert np.allclose(nn.inputs.a, [3]) assert nn.state is None diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 01fb1bf66c..1d12c9f423 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -2,7 +2,7 @@ import attr import typing as ty -import os, shutil +import os, sys, shutil import pytest from pathlib import Path @@ -13,7 +13,7 @@ from ..specs import ShellOutSpec, ShellSpec, SpecInfo, File -if not sys.platform.startswith("win"): +if sys.platform.startswith("win"): pytest.skip("SLURM not available in windows", allow_module_level=True) if bool(shutil.which("sbatch")): diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 83e723ecf3..cc97a138ac 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -350,7 +350,6 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] assert (tmpdir / funky.checksum / "messages.jsonld").exists() -@pytest.mark.skipif(sys.platform.startswith("win"), "skip shell tasks for windows") def test_shell_cmd(tmpdir): cmd = ["echo", "hail", "pydra"] diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index abe8d912fb..a7736d04e1 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -1982,12 +1982,13 @@ def test_wf_nostate_cachedir(plugin, tmpdir): # TODO: fix path for windows -@pytest.mark.skipif(sys.platform.startswith("win"), "failing for windows, something to do with paths?") +#@pytest.mark.skipif(sys.platform.startswith("win"), reason="failing for windows, something to do with paths?") @pytest.mark.parametrize("plugin", Plugins) def test_wf_nostate_cachedir_relativepath(tmpdir, plugin): """ wf with provided cache_dir as relative path""" tmpdir.chdir() cache_dir = "test_wf_cache_2" + tmpdir.mkdir(cache_dir) wf = Workflow(name="wf_2", input_spec=["x", "y"], cache_dir=cache_dir) wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) From b8ad09b2311a7e657ee5d06fd400504d0e1c1ef3 Mon Sep 17 00:00:00 2001 From: nlo Date: Wed, 15 Apr 2020 23:37:07 +0800 Subject: [PATCH 11/12] delete unused functions and fix style --- .azure-pipelines/windows.yml | 2 +- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- .github/ISSUE_TEMPLATE/question.md | 2 +- .github/PULL_REQUEST_TEMPLATE.md | 8 +-- .travis.yml | 1 - MANIFEST.in | 2 +- docs/conf.py | 66 +++++++++++-------------- docs/sphinxext/github_link.py | 31 ++++++------ pydra/engine/helpers_file.py | 43 ---------------- pydra/engine/tests/test_helpers_file.py | 15 +----- pydra/engine/tests/test_shelltask.py | 2 +- pydra/engine/tests/test_task.py | 1 + pydra/engine/tests/test_workflow.py | 2 - 13 files changed, 55 insertions(+), 122 deletions(-) diff --git a/.azure-pipelines/windows.yml b/.azure-pipelines/windows.yml index ee262d3f11..f2907a5db1 100644 --- a/.azure-pipelines/windows.yml +++ b/.azure-pipelines/windows.yml @@ -35,4 +35,4 @@ jobs: - script: | pytest -vs -n auto --cov pydra --cov-config .coveragerc --cov-report xml:cov.xml --doctest-modules pydra displayName: 'Pytest tests' - + diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 35815363b9..0601678af5 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -24,4 +24,4 @@ Like this: ``` ``` ---> \ No newline at end of file +--> diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md index be7a18089d..684dc972f8 100644 --- a/.github/ISSUE_TEMPLATE/question.md +++ b/.github/ISSUE_TEMPLATE/question.md @@ -12,4 +12,4 @@ Include the following: ------------------------ What are you trying to accomplish? What have you tried? ---> \ No newline at end of file +--> diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 1ecbe1cf19..b2cb3a760e 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -12,10 +12,10 @@ - [ ] All tests passing - [ ] I have added tests to cover my changes - [ ] I have updated documentation (if necessary) -- [ ] My code follows the code style of this project -(we are using `black`: you can `pip install pre-commit`, -run `pre-commit install` in the `pydra` directory +- [ ] My code follows the code style of this project +(we are using `black`: you can `pip install pre-commit`, +run `pre-commit install` in the `pydra` directory and `black` will be run automatically with each commit) ## Acknowledgment -- [ ] I acknowledge that this contribution will be available under the Apache 2 license. \ No newline at end of file +- [ ] I acknowledge that this contribution will be available under the Apache 2 license. diff --git a/.travis.yml b/.travis.yml index 911bdac53a..0466ad546a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -57,7 +57,6 @@ matrix: allow_failures: - python: 3.7 env: INSTALL_DEPENDS="pip==10.0.1 setuptools==30.3.0" - - os: windows before_install: diff --git a/MANIFEST.in b/MANIFEST.in index 0e3be1c3f1..01c2de4725 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,3 @@ include versioneer.py include pydra/_version.py -include pydra/schema/context.jsonld \ No newline at end of file +include pydra/schema/context.jsonld diff --git a/docs/conf.py b/docs/conf.py index 9e3145f275..c155320f0f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,17 +13,18 @@ import sys from pathlib import Path from packaging.version import Version + sys.path.insert(0, str(Path(__file__).parent.parent.absolute())) -sys.path.insert(1, str(Path(__file__).parent / 'sphinxext')) +sys.path.insert(1, str(Path(__file__).parent / "sphinxext")) from pydra import __version__ from github_link import make_linkcode_resolve # -- Project information ----------------------------------------------------- -project = 'Pydra: A simple dataflow engine with scalable semantics' -copyright = '2019 - 2020, The Nipype Developers team' -author = 'The Nipype Developers team' +project = "Pydra: A simple dataflow engine with scalable semantics" +copyright = "2019 - 2020, The Nipype Developers team" +author = "The Nipype Developers team" # The full version, including alpha/beta/rc tags release = __version__ @@ -36,25 +37,25 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', - 'sphinx.ext.coverage', - 'sphinx.ext.mathjax', - 'sphinx.ext.ifconfig', - 'sphinx.ext.linkcode', - 'sphinx.ext.githubpages', - 'sphinxcontrib.apidoc', - 'sphinxcontrib.napoleon' + "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "sphinx.ext.intersphinx", + "sphinx.ext.coverage", + "sphinx.ext.mathjax", + "sphinx.ext.ifconfig", + "sphinx.ext.linkcode", + "sphinx.ext.githubpages", + "sphinxcontrib.apidoc", + "sphinxcontrib.napoleon", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'api/pydra.rst'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "api/pydra.rst"] # -- Options for HTML output ------------------------------------------------- @@ -62,27 +63,22 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # -- Options for extensions --------------------------------------------------- # Autodoc -autodoc_mock_imports = [ - 'cloudpickle', - 'matplotlib', - 'numpy', - 'psutil', -] -apidoc_module_dir = '../pydra' -apidoc_output_dir = 'api' -apidoc_excluded_paths = ['conftest.py', '*/tests/*', 'tests/*', 'data/*'] +autodoc_mock_imports = ["cloudpickle", "matplotlib", "numpy", "psutil"] +apidoc_module_dir = "../pydra" +apidoc_output_dir = "api" +apidoc_excluded_paths = ["conftest.py", "*/tests/*", "tests/*", "data/*"] apidoc_separate_modules = True -apidoc_extra_args = ['--module-first', '-d 1', '-T'] +apidoc_extra_args = ["--module-first", "-d 1", "-T"] # Napoleon # Accept custom section names to be parsed for numpy-style docstrings @@ -90,21 +86,17 @@ # Requires pinning sphinxcontrib-napoleon to a specific commit while # https://github.com/sphinx-contrib/napoleon/pull/10 is merged. napoleon_use_param = False -napoleon_custom_sections = [ - ('Inputs', 'Parameters'), - ('Outputs', 'Parameters'), -] +napoleon_custom_sections = [("Inputs", "Parameters"), ("Outputs", "Parameters")] # Intersphinx -intersphinx_mapping = { - 'https://docs.python.org/': None, -} +intersphinx_mapping = {"https://docs.python.org/": None} # Linkcode # The following is used by sphinx.ext.linkcode to provide links to github linkcode_resolve = make_linkcode_resolve( - 'pydra', 'https://github.com/nipype/pydra/blob/{revision}/' - '{package}/{path}#L{lineno}') + "pydra", + "https://github.com/nipype/pydra/blob/{revision}/" "{package}/{path}#L{lineno}", +) # Sphinx-versioning scv_show_banner = True diff --git a/docs/sphinxext/github_link.py b/docs/sphinxext/github_link.py index 84f4b90b6d..d30186c70c 100644 --- a/docs/sphinxext/github_link.py +++ b/docs/sphinxext/github_link.py @@ -9,16 +9,16 @@ import sys from functools import partial -REVISION_CMD = 'git rev-parse --short HEAD' +REVISION_CMD = "git rev-parse --short HEAD" def _get_git_revision(): try: revision = subprocess.check_output(REVISION_CMD.split()).strip() except (subprocess.CalledProcessError, OSError): - print('Failed to execute git to get revision') + print("Failed to execute git to get revision") return None - return revision.decode('utf-8') + return revision.decode("utf-8") def _linkcode_resolve(domain, info, package, url_fmt, revision): @@ -38,18 +38,18 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): if revision is None: return - if domain not in ('py', 'pyx'): + if domain not in ("py", "pyx"): return - if not info.get('module') or not info.get('fullname'): + if not info.get("module") or not info.get("fullname"): return - class_name = info['fullname'].split('.')[0] + class_name = info["fullname"].split(".")[0] if type(class_name) != str: # Python 2 only - class_name = class_name.encode('utf-8') - module = __import__(info['module'], fromlist=[class_name]) + class_name = class_name.encode("utf-8") + module = __import__(info["module"], fromlist=[class_name]) try: - obj = attrgetter(info['fullname'])(module) + obj = attrgetter(info["fullname"])(module) except AttributeError: return @@ -65,14 +65,12 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): if not fn: return - fn = os.path.relpath(fn, - start=os.path.dirname(__import__(package).__file__)) + fn = os.path.relpath(fn, start=os.path.dirname(__import__(package).__file__)) try: lineno = inspect.getsourcelines(obj)[1] except Exception: - lineno = '' - return url_fmt.format(revision=revision, package=package, - path=fn, lineno=lineno) + lineno = "" + return url_fmt.format(revision=revision, package=package, path=fn, lineno=lineno) def make_linkcode_resolve(package, url_fmt): @@ -87,5 +85,6 @@ def make_linkcode_resolve(package, url_fmt): '{path}#L{lineno}') """ revision = _get_git_revision() - return partial(_linkcode_resolve, revision=revision, package=package, - url_fmt=url_fmt) + return partial( + _linkcode_resolve, revision=revision, package=package, url_fmt=url_fmt + ) diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 065986e36c..6b114c2d1d 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -66,49 +66,6 @@ def split_filename(fname): return pth, fname, ext -def fname_presuffix(fname, prefix="", suffix="", newpath=None, use_ext=True): - """ - Manipulate path and name of input filename. - - Parameters - ---------- - fname : :obj:`str` - A filename (may or may not include path) - prefix : :obj:`str` - Characters to prepend to the filename - suffix : :obj:`str` - Characters to append to the filename - newpath : :obj:`str` - Path to replace the path of the input fname - use_ext : :obj:`bool` - If True (default), appends the extension of the original file - to the output name. - - Return - ------ - path : :obj:`str` - Absolute path of the modified filename - - Examples - -------- - >>> import pytest, sys - >>> if sys.platform.startswith('win'): pytest.skip() - >>> from pydra.engine.helpers_file import fname_presuffix - >>> fname = 'foo.nii.gz' - >>> fname_presuffix(fname,'pre','post','/tmp') - '/tmp/prefoopost.nii.gz' - - """ - pth, fname, ext = split_filename(fname) - if not use_ext: - ext = "" - - # No need for isdefined: bool(Undefined) evaluates to False - if newpath: - pth = op.abspath(newpath) - return str(Path(pth) / (prefix + fname + suffix + ext)) - - def hash_file(afile, chunk_len=8192, crypto=sha256, raise_notfound=True): """Compute hash of a file using 'crypto' module.""" from .specs import LazyField diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index bb550836e0..56cc8000c6 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -1,4 +1,4 @@ -import os, sys +import os import time import warnings import pytest @@ -6,7 +6,6 @@ from ..helpers_file import ( split_filename, - fname_presuffix, copyfile, copyfiles, on_cifs, @@ -38,18 +37,6 @@ def test_split_filename(filename, split): assert res == split -@pytest.mark.skipif(sys.platform.startswith("win"), reason="windows drive not known in advance",) -def test_fname_presuffix(): - fname = "foo.nii" - pth = fname_presuffix(fname, "pre_", "_post", "/tmp") - assert pth == str(Path("/tmp/pre_foo_post.nii")) - fname += ".gz" - pth = fname_presuffix(fname, "pre_", "_post", "/tmp") - assert pth == str(Path("/tmp/pre_foo_post.nii.gz")) - pth = fname_presuffix(fname, "pre_", "_post", "/tmp", use_ext=False) - assert pth == str(Path("/tmp/pre_foo_post")) - - @pytest.fixture() def _temp_analyze_files(tmpdir): """Generate temporary analyze file pair.""" diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 1d12c9f423..671c65a31e 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -158,7 +158,7 @@ def test_shell_cmd_4(plugin): assert res[1].output.stdout == "pydra\n" assert res[0].output.return_code == res[1].output.return_code == 0 - assert res[0].output.stderr == res[1].output.stderr == "" + assert res[0].output.stderr == res[1].output.stderr == "" @pytest.mark.parametrize("plugin", Plugins) diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index cc97a138ac..f7f4182ef6 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -350,6 +350,7 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] assert (tmpdir / funky.checksum / "messages.jsonld").exists() +@no_win def test_shell_cmd(tmpdir): cmd = ["echo", "hail", "pydra"] diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index a7736d04e1..091feba54a 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -1981,8 +1981,6 @@ def test_wf_nostate_cachedir(plugin, tmpdir): shutil.rmtree(cache_dir) -# TODO: fix path for windows -#@pytest.mark.skipif(sys.platform.startswith("win"), reason="failing for windows, something to do with paths?") @pytest.mark.parametrize("plugin", Plugins) def test_wf_nostate_cachedir_relativepath(tmpdir, plugin): """ wf with provided cache_dir as relative path""" From 208ab548b3a7aeda4bc313e634051f3ff78c7f83 Mon Sep 17 00:00:00 2001 From: nlo Date: Fri, 17 Apr 2020 17:36:43 +0800 Subject: [PATCH 12/12] Revert "add scikit-learn workflow tutorial" This reverts commit 914f2df1584d8ff1785509425a079a17bd7ecb16. --- tutorial/notebooks/ml_workflow.ipynb | 388 --------------------------- 1 file changed, 388 deletions(-) delete mode 100644 tutorial/notebooks/ml_workflow.ipynb diff --git a/tutorial/notebooks/ml_workflow.ipynb b/tutorial/notebooks/ml_workflow.ipynb deleted file mode 100644 index d492dd6db1..0000000000 --- a/tutorial/notebooks/ml_workflow.ipynb +++ /dev/null @@ -1,388 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pydra\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import sklearn" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: sklearn in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (0.0)\n", - "Requirement already satisfied: scikit-learn in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from sklearn) (0.22.2.post1)\n", - "Requirement already satisfied: numpy>=1.11.0 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from scikit-learn->sklearn) (1.18.2)\n", - "Requirement already satisfied: scipy>=0.17.0 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from scikit-learn->sklearn) (1.4.1)\n", - "Requirement already satisfied: joblib>=0.11 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from scikit-learn->sklearn) (0.14.1)\n", - "Requirement already satisfied: matplotlib in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (3.2.1)\n", - "Requirement already satisfied: cycler>=0.10 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from matplotlib) (0.10.0)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from matplotlib) (1.1.0)\n", - "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from matplotlib) (2.4.6)\n", - "Requirement already satisfied: numpy>=1.11 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from matplotlib) (1.18.2)\n", - "Requirement already satisfied: python-dateutil>=2.1 in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from matplotlib) (2.8.1)\n", - "Requirement already satisfied: six in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from cycler>=0.10->matplotlib) (1.14.0)\n", - "Requirement already satisfied: setuptools in /Users/gablab/anaconda3/envs/pydra/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib) (46.1.1.post20200323)\n" - ] - } - ], - "source": [ - "# load data\n", - "!pip install sklearn\n", - "!pip install matplotlib" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Machine Learning Workflow" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Hyperparameter tuning is an important step in developing machine learning models. \n", - "XXX You learned how to run multiple tasks within a pipeline with *pydra* `Workflow` in the `intro_workflow` tutorial. \n", - "\n", - "\n", - "Let's initiate a `Workflow` with **MNIST** data as input: " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn import datasets\n", - "\n", - "digits = datasets.load_digits(n_class=10, return_X_y=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 0., 0., 0., 12., 13., 5., 0., 0.],\n", - " [ 0., 0., 0., 11., 16., 9., 0., 0.],\n", - " [ 0., 0., 3., 15., 16., 6., 0., 0.],\n", - " [ 0., 7., 15., 16., 16., 2., 0., 0.],\n", - " [ 0., 0., 1., 16., 16., 3., 0., 0.],\n", - " [ 0., 0., 1., 16., 16., 6., 0., 0.],\n", - " [ 0., 0., 1., 16., 16., 6., 0., 0.],\n", - " [ 0., 0., 0., 11., 16., 10., 0., 0.]])" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "digits.images[1]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD0CAYAAAB3sfb1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAa/0lEQVR4nO3df5DU9Z3n8ec7TEFWQvhhyOLOTGTaQRQowNgn5q4qalwDuqW4tSwZk9tFNhbZNdlLvPsj5iwTkzMX6uoqGk/jxpMAniWYNSmHOy+QGIPWppbFIf5ilqjMMLPMmOwioKfJgoy+74/+ztBMd3+/35nu6Z7uz+tR1UV/v5/Pt/vbL7797u7v9zPfr7k7IiIShvfVegVERKR6VPRFRAKioi8iEhAVfRGRgKjoi4gEREVfRCQgDVn0zezHZrau0n3rmTIppEyKUy6FGioTd58UN+DtvNt7wL/mTX+m1utXgde3COgCjke3J4FFIWcSvcazgO8CrwNvAs+EnAkwH/BRr/P2FMs1dC6jXutXo4z+MPRMxvr+cXeaxvdRUXnu/oHh+2bWB9zk7k+O7mdmTe4+VM11q5DXgDVAP7lfWJ8HtgNLSy0QQCYADwBNwIXAMWB5XOdAMgGYNZb1DyUXMzsP+FPg10l9A8lkTO8fqIPdO2Z2uZkNmNmXzew3wGYzm21m/8fMjpjZ8eh+S94yu83spuj+jWb2d2b236O+h8zs6nH2bTOzZ8zsLTN70szuM7OH07wOd3/D3fs89/FswLtAe8iZmNkFwHXABnc/4u7vuvu+kDOptAbM5T7gy8A7oWcy3vfPpC/6kXnAHOBcYAO59d4cTX+E3M+2e2OWXwG8DHwI+G/AJjOzcfR9BNgLnA3cAfxZ/oJm9qKZfTruhZjZG8AJ4H8A/zWub4JGyOQScr98vm5mr5vZS2b2JzHrnKQRMhnWHxWmzWb2oYS+SRoiFzP7U+Cku//fmHVNqxEyGd/7p9b7pErsp+oj2l8HXE7uU/39Mf2XA8fzpneT+ykHcCNwcNQ+MAfmjaUvuQ1hCDgrr/1h4OFxvL7pwM3AH4WcCfCfo8e6A5gKXEZuf+uFAWfyASBL7if77wOPAbtCf/8AM4BXgfmjX2PAmYzr/VMv3/SPuPuJ4QkzO8vMvmdm/Wb2/4BngFlmNqXE8r8ZvuPuv4vufmCMff8AOJY3D+DwGF/H8OP+Fvgb4CEz+/B4HoPGyORfgVPAne7+jrs/Dfwc+OQYHiNf3Wfi7m+7e5e7D7n7PwNfAD5pZjPSPkYRdZ8LucL2v9y9bwzLxGmETMb1/qmXoj/6VKD/CVgIrHD3DwIfj+aX+nlVCb8G5pjZWXnzWst4vPeR+9RvHufyjZDJi0XmlXPa10bIZLTh11TOe7URcrkS+A9m9ptoP3wr8AMz+/I416cRMhnX+6deiv5oM8h9yr1hZnOAr030E7p7P7khl3eY2VQz+xhwbdrlzewqM7vIzKaY2QeBb5MbunmgQqtYd5mQ+zb1T8BXzKzJzP4dcAWwq0KrWHeZmNkKM1toZu8zs7OBe4Dd7v5mBVez7nIhV/SXkNvtspzcaLjPkTuwWwn1mMm43j/1WvTvBn6P3NjUPcDOKj3vZ4CPAUeBO4FHgZPDjWbWbWafKbHsLGAbubG0PcB5wKr8n5hlqrtM3P0UsBq4hlwu/xP4c3f/VYXWre4yATLk1vMtYH+03A0VXr+6y8Xdj7r7b4Zv5Ea/HXf3tyu0bvWYybjePxYdEJBxMLNHgV+5+4R/K6gXyqSQMilOuRSqRib1+k2/Jszs35jZedFP71XkPmUfr/V61ZIyKaRMilMuhWqRyaT5i9w6MQ/4EbkxtQPAX7n7c7VdpZpTJoWUSXHKpVDVM9HuHRGRgGj3johIQFT0RUQCMhn36Y9rf9Pdd9897vb+/v6SbcuWLSvZtnv37tjnnDVrVlzzWP/oY0L2w914440l2954442SbY8/PmHHmsaSy7gyWb48/kSEfX19Jdsuv/zykm1xeSVsC0l5TngmSeK29euvv75k2/PPP1+ybf78+WWs0eR4/2zZsmVcbUm1owyJueibvohIQFT0RUQCoqIvIhIQFX0RkYCo6IuIBERFX0QkIJNxyGZJX/rSl0q2xQ2PSlo2boje1q1bJ+Q5qyVpHeJe3+bNmyu9OlUTNyTuhRdeiF125syZJds6OztLtsUN700aJlrOcM9qiNuO4oZeljkss+bi/r8B1q9fX7LtrrvuqvTqVIS+6YuIBERFX0QkICr6IiIBUdEXEQmIir6ISEBU9EVEAjIZL6JScoXihk9ddtllsQ8aN+xtvMPK4s4gmPScVOksgUnD/eJee9LrmyAVOaNk3LpfdNFFsQ/6xS9+sWRb0tlcJ8iEn2Uz6f86LrO4ob1xZ3GNa4P4rGfNmlWV90/cWVUhfijuZN1W9E1fRCQgKvoiIgFR0RcRCYiKvohIQFT0RUQCoqIvIhIQFX0RkYDU1amVV69ePe5l406129/fX7Itbvz/ZDjlbZKkdUw65W+9intdSX/T8Z3vfGdcz1mjcdkVkXQK7rjMrr/++pJtcZk8/vjjsc9ZrTzjTmv99NNPxy57yy23VHp1Jpy+6YuIBERFX0QkICr6IiIBUdEXEQmIir6ISEBU9EVEAlJXp1YuR19fX8m2tra2km3Lli0r2VbmqYcrdmrYuCFnSaeNjhvaGHda2bjT4o73VNWRCT+NcJK41x23HcVtD2UO761IJnGnJo8bdgnw3HPPlWyL24bitoWk0xZv2bIlrrkq75/Zs2fHPui6detKtsVtK3GZJQ2fTXh/6dTKIiJymoq+iEhAVPRFRAKioi8iEhAVfRGRgKjoi4gEJJghm3HihoatX7++ZNtdd90V+7gJQ68qNuQsTtKwuKSzCI5H3DA2iD974qxZsyoyPDFuGF6SuO0h7qyKhw4dKtlWrWGsfX19JTMZ79BkSB7SWcrXv/71km1xw0Ah8QywVXn/JJ2FNi6XuGHNL7zwQsm2r33ta7HPmTBUXEM2RUTkNBV9EZGAqOiLiARERV9EJCAq+iIiAVHRFxEJSF1dGD1OOWemG+8ZEOPOojdZJA21ixuymXQB8VK2bt0a2x43lC1piGlaccNCd+/eHbvseLeHcoaJVkrCmSlLihtCmKa9lLihoEnDISeDpLoSd3H3uO0oLpekrOOGbKbJVN/0RUQCoqIvIhIQFX0RkYCo6IuIBERFX0QkICr6IiIBUdEXEQlIw5xaubOzM7Y97nS/b775Zsm21atXl2y74447Yp9zMpwaNkncOOS4Md9xry3u9MMQnyljy2Vcp1ZO+luAuHHScX+7kDT+vwwVyaQccX/3EPf/vXnz5pJtcX+vkcKkeP/E1YC48fTljLWP+7+YP3++Tq0sIiKnqeiLiARERV9EJCAq+iIiAVHRFxEJiIq+iEhAJuOQTRERmSD6pi8iEhAVfRGRgKjoi4gEREVfRCQgKvoiIgFR0RcRCYiKvohIQFT0RUQCoqIvIhIQFX0RkYCo6IuIBERFX0QkICr6IiIBUdEXEQmIir6ISEBU9EVEAqKiLyISEBV9EZGAqOiLiARERV9EJCAq+iIiAVHRFxEJiIq+iEhAVPRFRAKioi8iEhAVfRGRgKjoi4gEREVfRCQgKvoiIgFR0RcRCYiKvohIQFT0RUQCoqIvIhIQFX0RkYCo6IuIBCSx6JvZ983sX8xsf4l2M7N7zOygmb1oZh/Na1tnZq9Gt3WVXPFaUy6FlEkhZVKccqkhd4+9AR8HPgrsL9F+DfBjwIBLgX+I5s8BeqN/Z0f3Zyc9X73clIsyUSbKpR5vid/03f0Z4FhMl9XAQ56zB5hlZucAK4Gfuvsxdz8O/BRYlfR89UK5FFImhZRJccqldiqxT78ZOJw3PRDNKzU/FMqlkDIppEyKUy4TpKnWKwBgZhuADQDTp0+/+IILLqjxGqWzZMkSDh48SDab9dFtM2fOZN68eX+UzWYBmDFjBs3Nzc++9dZbuDtm9kV3nxv3+PWYizIpVE4m2Wz2v+zbt+914J5Sj1+PmYC2lUrbt2/f60mZAMn79D23H20+pfe9fQ+4IW/6ZeAc4Abge6X6lbpdfPHFXi8OHTrkixcvLtq2YcMGf+SRR0amzz//fH/ttdf8kUce8Q0bNjjQ5Q2YizIpVE4m7u5AV6Nl4q5tpdKGM0m6VWL3zg7gz6Oj7ZcCb7r7r4FdwCfNbLaZzQY+Gc0LwnXXXcdDDz2Eu7Nnzx5mzpzJOeecw8qVK/nJT34CMCW0XJRJoaRMjh8/DjCFgDIBbSsTKulTAdgG/Bo4RW7/2WeBvwT+Mmo34D6gB3gJyOYt+xfAwei2Ps2nUL18Ind0dPi8efO8qanJm5ub/cEHH/T777/f77//fnd3f++99/zmm2/2TCbjS5Ys8WeffXZk2U2bNjlwotFyUSaFys3kvPPOG86lYTJx17YyEUj5Td9yfSePbDbrXV1dtV6NCWdm+9w9m7Z/CLkok+LGkosyKS6EXNJmor/IFREJiIq+iEhAVPRFRAKioi8iEhAVfRGRgKjoi4gEREVfRCQgKvoiIgFR0RcRCYiKvohIQFT0RUQCoqIvIhIQFX0RkYCo6IuIBERFX0QkICr6IiIBSVX0zWyVmb1sZgfN7NYi7XeZ2fPR7RUzeyOv7d28th2VXPla2rlzJwsXLqS9vZ2NGzcWtN9yyy0sX76c5cuXc/755zNr1qyRtilTpgAsUiaNnwkol2KUSQ0lXVqL3PU5e4AMMBV4AVgU0/+vge/nTb+d5hJew7d6uKzZ0NCQZzIZ7+np8ZMnT/rSpUu9u7u7ZP977rnH169fPzI9ffr01Jc28zrJRZkUV+1clEn95lKutJmk+aZ/CXDQ3Xvd/R1gO7A6pv8N5K6r27D27t1Le3s7mUyGqVOn0tHRQWdnZ8n+27Zt44YbbqjiGlafMilOuRRSJrWVpug3A4fzpgeieQXM7FygDXgqb/b7zazLzPaY2fXjXtNJZHBwkNbW1pHplpYWBgcHi/bt7+/n0KFDfOITnxiZd+LECYALlUljZwLKpRhlUluVPpDbATzm7u/mzTvXcxfr/TRwt5mdN3ohM9sQfTB0HTlypMKrVFvbt29nzZo1w/shgdyGDBwgJhNo3FyUSXHjzUWZhLetlCNN0R8EWvOmW6J5xXQwateOuw9G//YCu4GLRi/k7g+4e9bds3Pnzk2xSrXV3NzM4cOnf/wMDAzQ3Fz0xw/bt28v+Gk63Dcuk6i9bnJRJsVVIxdl0hjbStUk7fQHmoBecrtthg/kLi7S7wKgD7C8ebOBadH9DwGvEnMQ2OvkgMupU6e8ra3Ne3t7Rw5E7d+/v6DfgQMH/Nxzz/X33ntvZN6xY8f8xIkTDnSlzcTrIBdlUly1c1Em9ZtLuajUgVx3HwK+AOwi95PqB+7ebWbfMLPr8rp2ANujJx92IdBlZi8APwc2uvs/pvs4mryampq49957WblyJRdeeCFr165l8eLFfPWrX2XHjtMjyLZv305HRwdmNjLvwIEDZLNZgEUoE6BxMwHlUowyqS07s0bXXjab9a6urlqvxoQzs32eO9aRSgi5KJPixpKLMikuhFzSZqK/yBURCYiKvohIQFT0RUQCoqIvIhIQFX0RkYCo6IuIBERFX0QkICr6IiIBUdEXEQmIir6ISEBU9EVEAqKiLyISEBV9EZGAqOiLiARERV9EJCAq+iIiAUlV9M1slZm9bGYHzezWIu03mtkRM3s+ut2U17bOzF6NbusqufK1tHPnThYuXEh7ezsbN24saN+yZQtz585l+fLlLF++nAcffHCkbevWrQBLlEnjZwLKpRhlUkNJ11MEpgA9QIbT18hdNKrPjcC9RZadQ+76unPIXS+3F5gd93z1cC3LoaEhz2Qy3tPTM3KNz+7u7jP6bN682T//+c8XLHv06FFva2tz4Lm0mXgd5KJMiqt2LsqkfnMpF5W6Ri5wCXDQ3Xvd/R1gO7A65WfKSuCn7n7M3Y8DPwVWpVx20tq7dy/t7e1kMhmmTp1KR0cHnZ2dqZbdtWsXV111FcC7yiSnUTMB5VKMMqmtNEW/GTicNz0QzRvtT8zsRTN7zMxax7KsmW0wsy4z6zpy5EjKVa+dwcFBWltbR6ZbWloYHBws6PfDH/6QpUuXsmbNGg4fPlx0WUrnWVe5KJPiqpGLMmmMbaVaKnUg938D8919KblP3q1jWdjdH3D3rLtn586dW6FVqq1rr72Wvr4+XnzxRa666irWrRv7rsdGy0WZFFduLsqkuEbMpRLSFP1BIP+jtSWaN8Ldj7r7yWjyQeDitMvWo+bm5pFvHgADAwM0N5/5ZePss89m2rRpANx0003s27ev6LIok4bNBJRLMcqkxpJ2+gNN5A6WtHH6QO7iUX3Oybv/x8AeP30g9xC5Ay6zo/tz4p6vHg64nDp1ytva2ry3t3fkQNT+/fvP6PPaa6+N3P/Rj37kK1ascPfcgaj58+fnH4hKzMTrIBdlUly1c1Em9ZtLuUh5IDexQ+6xuAZ4hdwontuied8Arovufwvojj4Qfg5ckLfsXwAHo9v6pOeql/+cJ554whcsWOCZTMbvvPNOd3e//fbbvbOz093db731Vl+0aJEvXbrUL7/8cj9w4MDIsps2bXLgRNpMvE5yUSbFVTMXZVLfuZQjbdG3XN/JI5vNeldXV61XY8KZ2T53z6btH0IuyqS4seSiTIoLIZe0megvckVEAqKiLyISEBV9EZGAqOiLiARERV9EJCAq+iIiAVHRFxEJiIq+iEhAVPRFRAKioi8iEhAVfRGRgKjoi4gEREVfRCQgKvoiIgFR0RcRCUiqom9mq8zsZTM7aGa3Fmn/j2b2j9GF0X9mZufmtb1rZs9Htx2VXPla2rlzJwsXLqS9vZ2NGzcWtH/7299m0aJFLF26lCuvvJL+/v6RtilTpgAsUiaNnwkol2KUSQ0lXWUFmELuilkZTl8ucdGoPlcAZ0X3/wp4NK/t7TRXcxm+1cMVboaGhjyTyXhPT8/I5d66u7vP6PPUU0/5b3/7W3d3/+53v+tr164daZs+fXrqq9x4neSiTIqrdi7KpH5zKVfaTNJ8078EOOjuve7+DrAdWD3qg+Pn7v67aHIPuYsVN6y9e/fS3t5OJpNh6tSpdHR00NnZeUafK664grPOOguASy+9lIGBgVqsatUok+KUSyFlUltpin4zkH/5+YFoXimfBX6cN/1+M+sysz1mdv041nHSGRwcpLW1dWS6paWFwcHBkv03bdrE1VdfPTJ94sQJgAuVSWNnAsqlGGVSW02VfDAz+/dAFrgsb/a57j5oZhngKTN7yd17Ri23AdgA8JGPfKSSq1RzDz/8MF1dXTz99NMj8/r7+2lpaTkAfJoSmUDj5qJMihtvLsokvG2lHGm+6Q8CrXnTLdG8M5jZHwK3Ade5+8nh+e4+GP3bC+wGLhq9rLs/4O5Zd8/OnTt3TC+gFpqbmzl8+PSPn4GBAZqbC3/8PPnkk3zzm99kx44dTJs27YzlIT6TqL1uclEmxVUjF2XSGNtK1STt9Cf3a6AXaOP0gdzFo/pcRO5g74JR82cD06L7HwJeZdRB4NG3ejjgcurUKW9ra/Pe3t6RA1H79+8/o88vf/lLz2Qy/sorr5wx/9ixY37ixAkHutJm4nWQizIprtq5KJP6zaVcpDyQm+rIN3AN8EpU2G+L5n2D3Ld6gCeBfwaej247ovn/Fngp+qB4Cfhs0nPVy3/OE0884QsWLPBMJuN33nmnu7vffvvt3tnZ6e7uV155pX/4wx/2ZcuW+bJly/zaa691d/df/OIXvmTJEgd+lzYTr5NclElx1cxFmdR3LuVIW/Qt13fyyGaz3tXVVevVmHBmts/ds2n7h5CLMiluLLkok+JCyCVtJvqLXBGRgKjoi4gEREVfRCQgKvoiIgFR0RcRCYiKvohIQFT0RUQCoqIvIhIQFX0RkYCo6IuIBERFX0QkICr6IiIBUdEXEQmIir6ISEBU9EVEAqKiLyISkFRF38xWmdnLZnbQzG4t0j7NzB6N2v/BzObntX0lmv+yma2s3KrX1s6dO1m4cCHt7e1s3LixoP3kyZN86lOfor29nRUrVtDX1zfS9q1vfQtgSaNlAuXlAszTthLGtqJMaijp0lrAFHKXScxw+hq5i0b1uRn4m+h+B/BodH9R1H8auWvs9gBT4p6vHi5rNjQ05JlMxnt6ekau8dnd3X1Gn/vuu88/97nPubv7tm3bfO3ate7u3t3d7UuXLnVgX9pMPJBcyF0CT9tKGduKMqnfXMpFysslpvmmfwlw0N173f0dYDuwelSf1cDW6P5jwJVmZtH87e5+0t0PAQejx6tre/fupb29nUwmw9SpU+no6KCzs/OMPp2dnaxbtw6ANWvW8LOf/Qx3p7Ozk46ODgBvpEyg/FyAY9pWGn9bUSa1laboNwOH86YHonlF+7j7EPAmcHbKZevO4OAgra2tI9MtLS0MDg6W7NPU1MTMmTM5evRowbI0SCZQfi7AO3ldGyIXbSuFlEltJV4Y3czWAKvc/aZo+s+AFe7+hbw++6M+A9F0D7ACuAPY4+4PR/M3AT9298dGPccGYEM0uQTYX/5Lm1CzgQ8C/dH0HOADwD/l9VkMvAKciqaXAL8C/gB4G5jr7jNKZQJB5jLD3X8PtK0whm1FmTTM+6dcC919RmKvpP0/wMeAXXnTXwG+MqrPLuBj0f0m4HXARvfN7xfzfKn2S9XyVolMhl9nmkwCymVA20p524oyqd9cKpBrxfbpPwssMLM2M5tK7kDtjlF9dgDrovtrgKc8txY7gI5odE8bsADYm+I5J7uyMwGswTKB8nOZo20liG1FmdRSyk+Qa8j91OoBbovmfQO4Lrr/fuBvyR1U2Qtk8pa9LVruZeDqRvlErkAmJ9JmElAuA9pWyttWlEl951JmpqleY+I+/Wozsw3u/kCt12OijfV1hpCLMiluLK9TmVSmfz1K+xonXdEXEZGJo9MwiIgEZFIV/aTTPTQCM/u+mf1LNMw1TX9lUti/4TMB5VKMMik01kxqfvAh7yBE4ukeGuEGfBz4KLBfmSgT5aJMqpmJe7ohm9WS5nQPdc/dnwGOpeyuTAoFkQkol2KUSaExZjKpin5DnrKhTMqkkDIpTrkUUiZFTKaiLyIiE2wyFf1BIP9MSi3RvJApk0LKpDjlUkiZFDGZin6aP80OjTIppEyKUy6FlEkRk6boe+6UzF8gdwKlA8AP3L27tmtVeWa2Dfh7YKGZDZjZZ0v1VSaFQskElEsxyqTQWDIB/UWuiEhQJs03fRERmXgq+iIiAVHRFxEJiIq+iEhAVPRFRAKioi8iEhAVfRGRgKjoi4gE5P8DNy6pHb5QsgEAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "import random\n", - "random.seed(10)\n", - "\n", - "_, axes = plt.subplots(2, 5)\n", - "images_and_labels = list(zip(digits.images, digits.target))\n", - "for ax, (image, label) in zip(axes[0, :], random.sample(images_and_labels, 5)):\n", - " ax.set_axis_off()\n", - " ax.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')\n", - " ax.set_title('Training: %i' % label)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "## Support Vector Machine" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's build a workflow with one model only to start. Here we use SVM (Support Vector Machine) for classification" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Define `FunctionTask`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# \n" - ] - }, - { - "cell_type": "code", - "execution_count": 101, - "metadata": {}, - "outputs": [], - "source": [ - "import typing as ty\n", - "\n", - "# https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html#sklearn.svm.LinearSVC\n", - "# linear SVM\n", - "@pydra.mark.task\n", - "#@pydra.mark.annotate({\"return\": {\"clf\": ty.Any}})\n", - "def fit_SVM(X_tr, y_tr, C=1, kernel='rbf', gamma=1):\n", - " from sklearn.svm import SVC\n", - " clf = SVC(C=C, kernel=kernel, gamma=gamma)\n", - " clf.fit(X_tr, y_tr)\n", - " return(404)\n", - "\n", - "# metrics\n", - "@pydra.mark.task\n", - "#@pydra.mark.annotate({\"return\": {\"score\": ty.Any}})\n", - "def metric_score(clf, X_tt, y_tt, metric):\n", - " \n", - " y_pred = clf.predict(X_tt)\n", - " \n", - " from sklearn import metrics\n", - " if metric == 'accuracy':\n", - " score = metrics.accuracy_score(y_tt, y_pred)\n", - " elif metric == 'precision':\n", - " score = metrics.precision_score(y_tt, y_pred)\n", - " else:\n", - " score = metrics.recall_score(y_tt, y_pred)\n", - " return(score)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,\n", - " decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',\n", - " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", - " tol=0.001, verbose=False)\n" - ] - } - ], - "source": [ - "from sklearn.model_selection import train_test_split\n", - "from sklearn.svm import SVC\n", - "n_samples = len(digits.images)\n", - "data = digits.images.reshape((n_samples, -1))\n", - "\n", - "# Split data into train and test subsets\n", - "X_train, X_test, y_train, y_test = train_test_split(\n", - " data, digits.target, test_size=0.5, shuffle=False)\n", - "\n", - "data[1]\n", - "\n", - "clf = SVC()\n", - "clf.fit(X_train, y_train)\n", - "\n", - "print(clf)" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(899, 64)" - ] - }, - "execution_count": 100, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "X_test.shape" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Create workflow" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "input_grid = {'X_tr': X_train,\n", - " 'X_tt': X_test,\n", - " 'y_tr': y_train,\n", - " 'y_tt': y_test,\n", - " 'C': [0.1,1, 10, 100], \n", - " 'gamma': [1,0.1,0.01,0.001], #'gamma': ['scale', 'auto'],\n", - " 'kernel': ['rbf', 'poly', 'sigmoid']}" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "metadata": {}, - "outputs": [], - "source": [ - "# for fit_SVM\n", - "input_grid = {'X_tr': X_train,\n", - " #'X_tt': X_test,\n", - " 'y_tr': y_train,\n", - " #'y_tt': y_test,\n", - " 'C': 1, \n", - " 'gamma': [1,0.1,0.01,0.001], #'gamma': ['scale', 'auto'],\n", - " 'kernel': 'rbf',\n", - " 'metric': ['accuracy', 'precision', 'recall']}\n", - "\n", - "# for metrics\n", - "#metric_list = ['accuracy', 'precision', 'recall']" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "metadata": {}, - "outputs": [], - "source": [ - "wf1 = pydra.Workflow(name=\"svm1\", \n", - " input_spec=list(input_grid.keys()), **input_grid)\n", - "wf1.split('gamma')\n", - "wf1.add(fit_SVM(name='svm', **input_grid))\n", - "wf1.set_output([(\"out\", wf1.svm.lzout.out)])\n", - "\n", - "#wf1.add(metric_score(name='metric', clf=wf1.svm.lzout.out,\n", - "# X_tt=wf1.lzin.X_tt, y_tt=wf1.lzin.y_tt, metric=wf1.lzin.metric))\n", - "\n", - "#wf1.set_output([(\"clf\", wf1.svm.lzout.out),\n", - "# (\"score\", wf1.metric.lzout.out)\n", - "# ])\n", - "\n", - "\n", - "#with pydra.Submitter(plugin=\"cf\") as sub:\n", - "# sub(wf1)\n", - "\n", - "wf1.result()" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "metadata": {}, - "outputs": [], - "source": [ - "wf1 = pydra.Workflow(name=\"svm1\", \n", - " input_spec=list(input_grid.keys()), **input_grid)" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "wf1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}