Merge branch 'kenzab/refactor_tiles_plotting' of https://github.com/m…

…icrosoft/hi-ml into kenzab/refactor_tiles_plotting
microsoft · Jun 1, 2022 · 1d7dec9 · 1d7dec9
2 parents 65d9bab + ec74e5b
commit 1d7dec9
Show file tree

Hide file tree

Showing 161 changed files with 2,483 additions and 1,243 deletions.
diff --git a/docs/source/examples/1/sample.py b/docs/source/examples/1/sample.py
@@ -28,7 +28,8 @@ def sieve(n: int) -> List[int]:
 
 def main() -> None:
     parser = ArgumentParser()
-    parser.add_argument("-n", "--count", type=int, default=100, required=False, help="Maximum value (not included)")
+    parser.add_argument("-n", "--count", type=int, default=100,
+                        required=False, help="Maximum value (not included)")
     args = parser.parse_args()
 
     primes = sieve(args.count)

diff --git a/docs/source/examples/10/inputs.py b/docs/source/examples/10/inputs.py
@@ -26,7 +26,8 @@ def main() -> None:
 
     print("Creating splits")
     num_cross_validation_splits = 5
-    k_folds = KFold(n_splits=num_cross_validation_splits, shuffle=True, random_state=0)
+    k_folds = KFold(n_splits=num_cross_validation_splits,
+                    shuffle=True, random_state=0)
     splits = np.array(list(k_folds.split(X)))
     indices_train_splits, indices_test_splits = [], []
     for split in splits:
@@ -36,16 +37,19 @@ def main() -> None:
     train_splits_file = inputs / "iris_data_splits.csv"
     target_splits_file = inputs / "iris_target_splits.csv"
 
-    np.savetxt(str(train_splits_file), np.vstack(indices_train_splits), delimiter=",")
-    np.savetxt(str(target_splits_file), np.vstack(indices_test_splits), delimiter=",")
+    np.savetxt(str(train_splits_file), np.vstack(
+        indices_train_splits), delimiter=",")
+    np.savetxt(str(target_splits_file), np.vstack(
+        indices_test_splits), delimiter=",")
 
     ws = get_workspace()
     datastore = get_datastore(workspace=ws,
                               datastore_name="himldatasets")
 
     dataset_name = 'himl_kfold_split_iris'
     datastore.upload_files(
-        [str(train_data_file), str(targets_file), str(train_splits_file), str(target_splits_file)],
+        [str(train_data_file), str(targets_file), str(
+            train_splits_file), str(target_splits_file)],
         relative_root=str(inputs),
         target_path=dataset_name,
         overwrite=True,

diff --git a/docs/source/examples/10/sample.py b/docs/source/examples/10/sample.py
@@ -47,7 +47,8 @@ def main() -> None:
                         help='Penalty parameter of the error term')
     parser.add_argument('--cross_validation_split_index', help="An index denoting which split of the dataset this"
                                                                "run represents in k-fold cross-validation")
-    parser.add_argument("--num_splits", help="The total number of splits being used for k-fol cross validation")
+    parser.add_argument(
+        "--num_splits", help="The total number of splits being used for k-fol cross validation")
 
     args = parser.parse_args()
     run.log('Kernel type', args.kernel)
@@ -59,7 +60,8 @@ def main() -> None:
     targets_file = input_folder / "iris_targets.csv"
 
     X = np.loadtxt(fname=train_data_file, delimiter=',').astype(float)
-    y = np.loadtxt(fname=targets_file, dtype='str', delimiter=',').astype(float)
+    y = np.loadtxt(fname=targets_file, dtype='str',
+                   delimiter=',').astype(float)
 
     # training a linear SVM classifier
     from sklearn.svm import SVC
@@ -70,8 +72,10 @@ def main() -> None:
     train_splits_file = str(input_folder / "iris_data_splits.csv")
     test_splits_file = str(input_folder / "iris_target_splits.csv")
 
-    train_splits_indices = np.loadtxt(fname=train_splits_file, delimiter=",").astype(int)
-    test_splits_indices = np.loadtxt(fname=test_splits_file, delimiter=",").astype(int)
+    train_splits_indices = np.loadtxt(
+        fname=train_splits_file, delimiter=",").astype(int)
+    test_splits_indices = np.loadtxt(
+        fname=test_splits_file, delimiter=",").astype(int)
 
     fold = int(args.cross_validation_split_index)
     fold_train_idx = train_splits_indices[fold]
@@ -80,7 +84,8 @@ def main() -> None:
     X_train, X_test = X[fold_train_idx], X[fold_test_idx]
     y_train, y_test = y[fold_train_idx], y[fold_test_idx]
 
-    svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
+    svm_model_linear = SVC(
+        kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
     svm_predictions = svm_model_linear.predict(X_test)
     lb = LabelBinarizer()
     y_pred = lb.fit_transform(svm_predictions)

diff --git a/docs/source/examples/2/sample.py b/docs/source/examples/2/sample.py
@@ -35,7 +35,8 @@ def main() -> None:
         wait_for_completion_show_output=True)
 
     parser = ArgumentParser()
-    parser.add_argument("-n", "--count", type=int, default=100, required=False, help="Maximum value (not included)")
+    parser.add_argument("-n", "--count", type=int, default=100,
+                        required=False, help="Maximum value (not included)")
     args = parser.parse_args()
 
     primes = sieve(args.count)

diff --git a/docs/source/examples/3/sample.py b/docs/source/examples/3/sample.py
@@ -35,8 +35,10 @@ def main() -> None:
         wait_for_completion_show_output=True)
 
     parser = ArgumentParser()
-    parser.add_argument("-n", "--count", type=int, default=100, required=False, help="Maximum value (not included)")
-    parser.add_argument("-o", "--output", type=str, default="primes.txt", required=False, help="Output file name")
+    parser.add_argument("-n", "--count", type=int, default=100,
+                        required=False, help="Maximum value (not included)")
+    parser.add_argument("-o", "--output", type=str,
+                        default="primes.txt", required=False, help="Output file name")
     args = parser.parse_args()
 
     primes = sieve(args.count)

diff --git a/docs/source/examples/4/sample.py b/docs/source/examples/4/sample.py
@@ -38,12 +38,15 @@ def main() -> None:
         wait_for_completion_show_output=True)
 
     parser = ArgumentParser()
-    parser.add_argument("-n", "--count", type=int, default=100, required=False, help="Maximum value (not included)")
-    parser.add_argument("-o", "--output", type=str, default="primes.txt", required=False, help="Output file name")
+    parser.add_argument("-n", "--count", type=int, default=100,
+                        required=False, help="Maximum value (not included)")
+    parser.add_argument("-o", "--output", type=str,
+                        default="primes.txt", required=False, help="Output file name")
     args = parser.parse_args()
 
     primes = sieve(args.count)
-    output_folder = run_info.output_datasets[0] or Path("outputs") / "himl_sample4_output"
+    output_folder = run_info.output_datasets[0] or Path(
+        "outputs") / "himl_sample4_output"
     output_folder.mkdir(parents=True, exist_ok=True)
     output = output_folder / args.output
     output.write_text("\n".join(map(str, primes)))

diff --git a/docs/source/examples/5/sample.py b/docs/source/examples/5/sample.py
@@ -28,14 +28,16 @@ def main() -> None:
     # X -> features, y -> label
     input_folder = Path("dataset")
     X = np.loadtxt(fname=input_folder / "X.csv", delimiter=',', skiprows=1)
-    y = np.loadtxt(fname=input_folder / "y.csv", dtype='str', delimiter=',', skiprows=1)
+    y = np.loadtxt(fname=input_folder / "y.csv",
+                   dtype='str', delimiter=',', skiprows=1)
 
     # dividing X, y into train and test data
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 
     # training a linear SVM classifier
     from sklearn.svm import SVC
-    svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
+    svm_model_linear = SVC(
+        kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
     svm_predictions = svm_model_linear.predict(X_test)
 
     # model accuracy for X_test

diff --git a/docs/source/examples/6/sample.py b/docs/source/examples/6/sample.py
@@ -36,14 +36,16 @@ def main() -> None:
     # X -> features, y -> label
     input_folder = run_info.input_datasets[0] or Path("dataset")
     X = np.loadtxt(fname=input_folder / "X.csv", delimiter=',', skiprows=1)
-    y = np.loadtxt(fname=input_folder / "y.csv", dtype='str', delimiter=',', skiprows=1)
+    y = np.loadtxt(fname=input_folder / "y.csv",
+                   dtype='str', delimiter=',', skiprows=1)
 
     # dividing X, y into train and test data
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 
     # training a linear SVM classifier
     from sklearn.svm import SVC
-    svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
+    svm_model_linear = SVC(
+        kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
     svm_predictions = svm_model_linear.predict(X_test)
 
     # model accuracy for X_test

diff --git a/docs/source/examples/7/sample.py b/docs/source/examples/7/sample.py
@@ -36,14 +36,16 @@ def main() -> None:
     # X -> features, y -> label
     input_folder = run_info.input_datasets[0] or Path("dataset")
     X = np.loadtxt(fname=input_folder / "X.csv", delimiter=',', skiprows=1)
-    y = np.loadtxt(fname=input_folder / "y.csv", dtype='str', delimiter=',', skiprows=1)
+    y = np.loadtxt(fname=input_folder / "y.csv",
+                   dtype='str', delimiter=',', skiprows=1)
 
     # dividing X, y into train and test data
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 
     # training a linear SVM classifier
     from sklearn.svm import SVC
-    svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
+    svm_model_linear = SVC(
+        kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
     svm_predictions = svm_model_linear.predict(X_test)
 
     # model accuracy for X_test

diff --git a/docs/source/examples/8/sample.py b/docs/source/examples/8/sample.py
@@ -58,14 +58,16 @@ def main() -> None:
     # X -> features, y -> label
     input_folder = run_info.input_datasets[0] or Path("inputs")
     X = np.loadtxt(fname=input_folder / "X.csv", delimiter=',', skiprows=1)
-    y = np.loadtxt(fname=input_folder / "y.csv", dtype='str', delimiter=',', skiprows=1)
+    y = np.loadtxt(fname=input_folder / "y.csv",
+                   dtype='str', delimiter=',', skiprows=1)
 
     # dividing X, y into train and test data
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 
     # training a linear SVM classifier
     from sklearn.svm import SVC
-    svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
+    svm_model_linear = SVC(
+        kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
     svm_predictions = svm_model_linear.predict(X_test)
 
     # model accuracy for X_test

diff --git a/docs/source/examples/9/aml_sample.py b/docs/source/examples/9/aml_sample.py
@@ -13,7 +13,8 @@ def main() -> None:
         script="pytorch_sample.py",
         compute_target="<name of compute target>"
     )
-    env = Environment.from_conda_specification("TensorboardTestEnv", "tensorboard_env.yml")
+    env = Environment.from_conda_specification(
+        "TensorboardTestEnv", "tensorboard_env.yml")
     config.run_config.environment = env
 
     run = experiment.submit(config)

diff --git a/docs/source/examples/modify_checkpoint/modify_checkpoint.py b/docs/source/examples/modify_checkpoint/modify_checkpoint.py
@@ -38,7 +38,8 @@
         state_dict = checkpoint['state_dict']
         # Here we modify the checkpoints: They reference weights from an older version of the code, delete any
         # such weights
-        linear_head_states = [name for name in state_dict.keys() if name.startswith("non_linear_evaluator")]
+        linear_head_states = [name for name in state_dict.keys(
+        ) if name.startswith("non_linear_evaluator")]
         print(linear_head_states)
         if linear_head_states:
             print(f"Removing linear head from {file}")
@@ -47,9 +48,12 @@
             torch.save(checkpoint, file)
 
     # Create a new AzureML run in the same experiment. The run will get a new unique ID
-    new_run = create_aml_run_object(experiment_name=experiment_name, workspace_config_path=workspace_config_json)
-    new_run.upload_folder(name=checkpoint_folder, path=str(download_folder / checkpoint_folder))
+    new_run = create_aml_run_object(
+        experiment_name=experiment_name, workspace_config_path=workspace_config_json)
+    new_run.upload_folder(name=checkpoint_folder, path=str(
+        download_folder / checkpoint_folder))
     new_run.complete()
 
-    print(f"Uploaded the modified checkpoints to this run: {new_run.get_portal_url()}")
+    print(
+        f"Uploaded the modified checkpoints to this run: {new_run.get_portal_url()}")
     print(f"Use this RunID to download the modified checkpoints: {new_run.id}")
diff --git a/hi-ml-azure/run_pytest.py b/hi-ml-azure/run_pytest.py
@@ -16,7 +16,8 @@ def add_to_sys_path(folder: Path) -> None:
         sys.path.insert(0, str(folder))
 
 
-folders_to_add = [himl_root / "hi-ml" / "src", himl_root / "hi-ml-azure" / "src"]
+folders_to_add = [himl_root / "hi-ml" /
+                  "src", himl_root / "hi-ml-azure" / "src"]
 for folder in folders_to_add:
     add_to_sys_path(folder)
 
@@ -36,13 +37,15 @@ def add_to_sys_path(folder: Path) -> None:
 
 
 class RunPytestConfig(param.Parameterized):
-    mark: str = param.String(default="", doc="The value to pass to pytest for the -m (mark) argument.")
+    mark: str = param.String(
+        default="", doc="The value to pass to pytest for the -m (mark) argument.")
     folder: str = param.String(
         default="",
         doc="The file or folder of tests that should be run. This value is used as the first argument to start "
         "pytest, so it can also be a specific test like 'my_test.py::any_test'",
     )
-    cluster: str = param.String(default="", doc="The name of the AzureML compute cluster where the script should run.")
+    cluster: str = param.String(
+        default="", doc="The name of the AzureML compute cluster where the script should run.")
     conda_env: str = param.String(
         default="", doc="The path to the Conda environment file that should be used when starting pytest in AzureML."
     )
@@ -76,7 +79,8 @@ def run_pytest(folder_to_test: str, pytest_mark: str) -> None:
     logging.info(f"Starting pytest with these args: {pytest_args}")
     status_code = pytest.main(pytest_args)
     if status_code == ExitCode.NO_TESTS_COLLECTED:
-        raise ValueError(f"PyTest did not find any tests to run, when restricting with this mark: {pytest_mark}")
+        raise ValueError(
+            f"PyTest did not find any tests to run, when restricting with this mark: {pytest_mark}")
     if status_code != ExitCode.OK:
         raise ValueError(f"PyTest failed with exit code: {status_code}")
 

diff --git a/hi-ml-azure/setup.py b/hi-ml-azure/setup.py
@@ -19,7 +19,8 @@
 here = pathlib.Path(__file__).parent.resolve()
 
 # Get the long description from the README file
-long_description = (here / 'package_description.md').read_text(encoding='utf-8')
+long_description = (
+    here / 'package_description.md').read_text(encoding='utf-8')
 
 version = ''
 

diff --git a/hi-ml-azure/src/health_azure/datasets.py b/hi-ml-azure/src/health_azure/datasets.py
@@ -32,7 +32,8 @@ def get_datastore(workspace: Workspace, datastore_name: str) -> Datastore:
         if len(existing_stores) == 1:
             return datastores[existing_stores[0]]
         datastore = workspace.get_default_datastore()
-        logging.info(f"Using the workspace default datastore {datastore.name} to access datasets.")
+        logging.info(
+            f"Using the workspace default datastore {datastore.name} to access datasets.")
         return datastore
     if datastore_name in datastores:
         return datastores[datastore_name]
@@ -54,12 +55,15 @@ def get_or_create_dataset(workspace: Workspace, datastore_name: str, dataset_nam
         azureml_dataset = Dataset.get_by_name(workspace, name=dataset_name)
         logging.info("Dataset found.")
     except Exception:
-        logging.info(f"Retrieving datastore '{datastore_name}' from AzureML workspace")
+        logging.info(
+            f"Retrieving datastore '{datastore_name}' from AzureML workspace")
         datastore = get_datastore(workspace, datastore_name)
-        logging.info(f"Creating a new dataset from data in folder '{dataset_name}' in the datastore")
+        logging.info(
+            f"Creating a new dataset from data in folder '{dataset_name}' in the datastore")
         # Ensure that there is a / at the end of the file path, otherwise folder that share a prefix could create
         # trouble (for example, folders foo and foo_bar exist, and I'm trying to create a dataset from "foo")
-        azureml_dataset = Dataset.File.from_files(path=(datastore, dataset_name + "/"))
+        azureml_dataset = Dataset.File.from_files(
+            path=(datastore, dataset_name + "/"))
         logging.info("Registering the dataset for future use.")
         azureml_dataset.register(workspace, name=dataset_name)
     return azureml_dataset
@@ -107,15 +111,17 @@ def __init__(self,
         # documentation tools in the editor work nicer.
         name = name.strip()
         if not name:
-            raise ValueError("The name of the dataset must be a non-empty string.")
+            raise ValueError(
+                "The name of the dataset must be a non-empty string.")
         self.name = name
         self.datastore = datastore
         self.version = version
         self.use_mounting = use_mounting
         # If target_folder is "" then convert to None
         self.target_folder = Path(target_folder) if target_folder else None
         if str(self.target_folder) == ".":
-            raise ValueError("Can't mount or download a dataset to the current working directory.")
+            raise ValueError(
+                "Can't mount or download a dataset to the current working directory.")
         self.local_folder = Path(local_folder) if local_folder else None
 
     def to_input_dataset_local(self, workspace: Optional[Workspace]) -> Tuple[Path, Optional[MountContext]]:
@@ -152,7 +158,8 @@ def to_input_dataset_local(self, workspace: Optional[Workspace]) -> Tuple[Path,
         else:
             status += f"downloaded to {target_path}"
             print(status)
-            azureml_dataset.download(target_path=str(target_path), overwrite=False)
+            azureml_dataset.download(
+                target_path=str(target_path), overwrite=False)
             result = target_path, None
         return result
 
@@ -170,10 +177,12 @@ def to_input_dataset(self,
         azureml_dataset = get_or_create_dataset(workspace=workspace,
                                                 dataset_name=self.name,
                                                 datastore_name=self.datastore)
-        named_input = azureml_dataset.as_named_input(_input_dataset_key(index=dataset_index))
+        named_input = azureml_dataset.as_named_input(
+            _input_dataset_key(index=dataset_index))
         # If running on windows then self.target_folder may be a WindowsPath, make sure it is
         # in posix format for Azure.
-        path_on_compute = self.target_folder.as_posix() if self.target_folder is not None else None
+        path_on_compute = self.target_folder.as_posix(
+        ) if self.target_folder is not None else None
         use_mounting = False if self.use_mounting is None else self.use_mounting
         if use_mounting:
             status += "mounted at "
@@ -311,7 +320,8 @@ def find_workspace_for_local_datasets(aml_workspace: Optional[Workspace],
             workspace = get_workspace(aml_workspace, workspace_config_path)
             logging.info(f"Found workspace for datasets: {workspace.name}")
         except Exception as ex:
-            logging.info(f"Could not find workspace for datasets. Exception: {ex}")
+            logging.info(
+                f"Could not find workspace for datasets. Exception: {ex}")
     return workspace
 
 
@@ -332,7 +342,8 @@ def setup_local_datasets(aml_workspace: Optional[Workspace],
     :param dataset_configs: List of DatasetConfig describing the input datasets.
     :return: Pair of: list of optional paths to the input datasets, list of mountcontexts, one for each mounted dataset.
     """
-    workspace = find_workspace_for_local_datasets(aml_workspace, workspace_config_path, dataset_configs)
+    workspace = find_workspace_for_local_datasets(
+        aml_workspace, workspace_config_path, dataset_configs)
 
     mounted_input_datasets: List[Optional[Path]] = []
     mount_contexts: List[MountContext] = []

diff --git a/hi-ml-azure/src/health_azure/examples/elevate_this.py b/hi-ml-azure/src/health_azure/examples/elevate_this.py
@@ -29,7 +29,8 @@ def main() -> None:
         wait_for_completion_show_output=True)
 
     parser = ArgumentParser()
-    parser.add_argument("-m", "--message", type=str, required=True, help="The message to print out")
+    parser.add_argument("-m", "--message", type=str,
+                        required=True, help="The message to print out")
     args = parser.parse_args()
 
     print(f"The message was: {args.message}")