Skip to content

Commit

Permalink
Merge branch 'kenzab/refactor_tiles_plotting' of https://github.com/m…
Browse files Browse the repository at this point in the history
…icrosoft/hi-ml into kenzab/refactor_tiles_plotting
  • Loading branch information
kenza-bouzid committed Jun 1, 2022
2 parents 65d9bab + ec74e5b commit 1d7dec9
Show file tree
Hide file tree
Showing 161 changed files with 2,483 additions and 1,243 deletions.
3 changes: 2 additions & 1 deletion docs/source/examples/1/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ def sieve(n: int) -> List[int]:

def main() -> None:
parser = ArgumentParser()
parser.add_argument("-n", "--count", type=int, default=100, required=False, help="Maximum value (not included)")
parser.add_argument("-n", "--count", type=int, default=100,
required=False, help="Maximum value (not included)")
args = parser.parse_args()

primes = sieve(args.count)
Expand Down
12 changes: 8 additions & 4 deletions docs/source/examples/10/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ def main() -> None:

print("Creating splits")
num_cross_validation_splits = 5
k_folds = KFold(n_splits=num_cross_validation_splits, shuffle=True, random_state=0)
k_folds = KFold(n_splits=num_cross_validation_splits,
shuffle=True, random_state=0)
splits = np.array(list(k_folds.split(X)))
indices_train_splits, indices_test_splits = [], []
for split in splits:
Expand All @@ -36,16 +37,19 @@ def main() -> None:
train_splits_file = inputs / "iris_data_splits.csv"
target_splits_file = inputs / "iris_target_splits.csv"

np.savetxt(str(train_splits_file), np.vstack(indices_train_splits), delimiter=",")
np.savetxt(str(target_splits_file), np.vstack(indices_test_splits), delimiter=",")
np.savetxt(str(train_splits_file), np.vstack(
indices_train_splits), delimiter=",")
np.savetxt(str(target_splits_file), np.vstack(
indices_test_splits), delimiter=",")

ws = get_workspace()
datastore = get_datastore(workspace=ws,
datastore_name="himldatasets")

dataset_name = 'himl_kfold_split_iris'
datastore.upload_files(
[str(train_data_file), str(targets_file), str(train_splits_file), str(target_splits_file)],
[str(train_data_file), str(targets_file), str(
train_splits_file), str(target_splits_file)],
relative_root=str(inputs),
target_path=dataset_name,
overwrite=True,
Expand Down
15 changes: 10 additions & 5 deletions docs/source/examples/10/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def main() -> None:
help='Penalty parameter of the error term')
parser.add_argument('--cross_validation_split_index', help="An index denoting which split of the dataset this"
"run represents in k-fold cross-validation")
parser.add_argument("--num_splits", help="The total number of splits being used for k-fol cross validation")
parser.add_argument(
"--num_splits", help="The total number of splits being used for k-fol cross validation")

args = parser.parse_args()
run.log('Kernel type', args.kernel)
Expand All @@ -59,7 +60,8 @@ def main() -> None:
targets_file = input_folder / "iris_targets.csv"

X = np.loadtxt(fname=train_data_file, delimiter=',').astype(float)
y = np.loadtxt(fname=targets_file, dtype='str', delimiter=',').astype(float)
y = np.loadtxt(fname=targets_file, dtype='str',
delimiter=',').astype(float)

# training a linear SVM classifier
from sklearn.svm import SVC
Expand All @@ -70,8 +72,10 @@ def main() -> None:
train_splits_file = str(input_folder / "iris_data_splits.csv")
test_splits_file = str(input_folder / "iris_target_splits.csv")

train_splits_indices = np.loadtxt(fname=train_splits_file, delimiter=",").astype(int)
test_splits_indices = np.loadtxt(fname=test_splits_file, delimiter=",").astype(int)
train_splits_indices = np.loadtxt(
fname=train_splits_file, delimiter=",").astype(int)
test_splits_indices = np.loadtxt(
fname=test_splits_file, delimiter=",").astype(int)

fold = int(args.cross_validation_split_index)
fold_train_idx = train_splits_indices[fold]
Expand All @@ -80,7 +84,8 @@ def main() -> None:
X_train, X_test = X[fold_train_idx], X[fold_test_idx]
y_train, y_test = y[fold_train_idx], y[fold_test_idx]

svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
svm_model_linear = SVC(
kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
svm_predictions = svm_model_linear.predict(X_test)
lb = LabelBinarizer()
y_pred = lb.fit_transform(svm_predictions)
Expand Down
3 changes: 2 additions & 1 deletion docs/source/examples/2/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def main() -> None:
wait_for_completion_show_output=True)

parser = ArgumentParser()
parser.add_argument("-n", "--count", type=int, default=100, required=False, help="Maximum value (not included)")
parser.add_argument("-n", "--count", type=int, default=100,
required=False, help="Maximum value (not included)")
args = parser.parse_args()

primes = sieve(args.count)
Expand Down
6 changes: 4 additions & 2 deletions docs/source/examples/3/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@ def main() -> None:
wait_for_completion_show_output=True)

parser = ArgumentParser()
parser.add_argument("-n", "--count", type=int, default=100, required=False, help="Maximum value (not included)")
parser.add_argument("-o", "--output", type=str, default="primes.txt", required=False, help="Output file name")
parser.add_argument("-n", "--count", type=int, default=100,
required=False, help="Maximum value (not included)")
parser.add_argument("-o", "--output", type=str,
default="primes.txt", required=False, help="Output file name")
args = parser.parse_args()

primes = sieve(args.count)
Expand Down
9 changes: 6 additions & 3 deletions docs/source/examples/4/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,15 @@ def main() -> None:
wait_for_completion_show_output=True)

parser = ArgumentParser()
parser.add_argument("-n", "--count", type=int, default=100, required=False, help="Maximum value (not included)")
parser.add_argument("-o", "--output", type=str, default="primes.txt", required=False, help="Output file name")
parser.add_argument("-n", "--count", type=int, default=100,
required=False, help="Maximum value (not included)")
parser.add_argument("-o", "--output", type=str,
default="primes.txt", required=False, help="Output file name")
args = parser.parse_args()

primes = sieve(args.count)
output_folder = run_info.output_datasets[0] or Path("outputs") / "himl_sample4_output"
output_folder = run_info.output_datasets[0] or Path(
"outputs") / "himl_sample4_output"
output_folder.mkdir(parents=True, exist_ok=True)
output = output_folder / args.output
output.write_text("\n".join(map(str, primes)))
Expand Down
6 changes: 4 additions & 2 deletions docs/source/examples/5/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,16 @@ def main() -> None:
# X -> features, y -> label
input_folder = Path("dataset")
X = np.loadtxt(fname=input_folder / "X.csv", delimiter=',', skiprows=1)
y = np.loadtxt(fname=input_folder / "y.csv", dtype='str', delimiter=',', skiprows=1)
y = np.loadtxt(fname=input_folder / "y.csv",
dtype='str', delimiter=',', skiprows=1)

# dividing X, y into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# training a linear SVM classifier
from sklearn.svm import SVC
svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
svm_model_linear = SVC(
kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
svm_predictions = svm_model_linear.predict(X_test)

# model accuracy for X_test
Expand Down
6 changes: 4 additions & 2 deletions docs/source/examples/6/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,16 @@ def main() -> None:
# X -> features, y -> label
input_folder = run_info.input_datasets[0] or Path("dataset")
X = np.loadtxt(fname=input_folder / "X.csv", delimiter=',', skiprows=1)
y = np.loadtxt(fname=input_folder / "y.csv", dtype='str', delimiter=',', skiprows=1)
y = np.loadtxt(fname=input_folder / "y.csv",
dtype='str', delimiter=',', skiprows=1)

# dividing X, y into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# training a linear SVM classifier
from sklearn.svm import SVC
svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
svm_model_linear = SVC(
kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
svm_predictions = svm_model_linear.predict(X_test)

# model accuracy for X_test
Expand Down
6 changes: 4 additions & 2 deletions docs/source/examples/7/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,16 @@ def main() -> None:
# X -> features, y -> label
input_folder = run_info.input_datasets[0] or Path("dataset")
X = np.loadtxt(fname=input_folder / "X.csv", delimiter=',', skiprows=1)
y = np.loadtxt(fname=input_folder / "y.csv", dtype='str', delimiter=',', skiprows=1)
y = np.loadtxt(fname=input_folder / "y.csv",
dtype='str', delimiter=',', skiprows=1)

# dividing X, y into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# training a linear SVM classifier
from sklearn.svm import SVC
svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
svm_model_linear = SVC(
kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
svm_predictions = svm_model_linear.predict(X_test)

# model accuracy for X_test
Expand Down
6 changes: 4 additions & 2 deletions docs/source/examples/8/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,16 @@ def main() -> None:
# X -> features, y -> label
input_folder = run_info.input_datasets[0] or Path("inputs")
X = np.loadtxt(fname=input_folder / "X.csv", delimiter=',', skiprows=1)
y = np.loadtxt(fname=input_folder / "y.csv", dtype='str', delimiter=',', skiprows=1)
y = np.loadtxt(fname=input_folder / "y.csv",
dtype='str', delimiter=',', skiprows=1)

# dividing X, y into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# training a linear SVM classifier
from sklearn.svm import SVC
svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
svm_model_linear = SVC(
kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
svm_predictions = svm_model_linear.predict(X_test)

# model accuracy for X_test
Expand Down
3 changes: 2 additions & 1 deletion docs/source/examples/9/aml_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ def main() -> None:
script="pytorch_sample.py",
compute_target="<name of compute target>"
)
env = Environment.from_conda_specification("TensorboardTestEnv", "tensorboard_env.yml")
env = Environment.from_conda_specification(
"TensorboardTestEnv", "tensorboard_env.yml")
config.run_config.environment = env

run = experiment.submit(config)
Expand Down
12 changes: 8 additions & 4 deletions docs/source/examples/modify_checkpoint/modify_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
state_dict = checkpoint['state_dict']
# Here we modify the checkpoints: They reference weights from an older version of the code, delete any
# such weights
linear_head_states = [name for name in state_dict.keys() if name.startswith("non_linear_evaluator")]
linear_head_states = [name for name in state_dict.keys(
) if name.startswith("non_linear_evaluator")]
print(linear_head_states)
if linear_head_states:
print(f"Removing linear head from {file}")
Expand All @@ -47,9 +48,12 @@
torch.save(checkpoint, file)

# Create a new AzureML run in the same experiment. The run will get a new unique ID
new_run = create_aml_run_object(experiment_name=experiment_name, workspace_config_path=workspace_config_json)
new_run.upload_folder(name=checkpoint_folder, path=str(download_folder / checkpoint_folder))
new_run = create_aml_run_object(
experiment_name=experiment_name, workspace_config_path=workspace_config_json)
new_run.upload_folder(name=checkpoint_folder, path=str(
download_folder / checkpoint_folder))
new_run.complete()

print(f"Uploaded the modified checkpoints to this run: {new_run.get_portal_url()}")
print(
f"Uploaded the modified checkpoints to this run: {new_run.get_portal_url()}")
print(f"Use this RunID to download the modified checkpoints: {new_run.id}")
12 changes: 8 additions & 4 deletions hi-ml-azure/run_pytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ def add_to_sys_path(folder: Path) -> None:
sys.path.insert(0, str(folder))


folders_to_add = [himl_root / "hi-ml" / "src", himl_root / "hi-ml-azure" / "src"]
folders_to_add = [himl_root / "hi-ml" /
"src", himl_root / "hi-ml-azure" / "src"]
for folder in folders_to_add:
add_to_sys_path(folder)

Expand All @@ -36,13 +37,15 @@ def add_to_sys_path(folder: Path) -> None:


class RunPytestConfig(param.Parameterized):
mark: str = param.String(default="", doc="The value to pass to pytest for the -m (mark) argument.")
mark: str = param.String(
default="", doc="The value to pass to pytest for the -m (mark) argument.")
folder: str = param.String(
default="",
doc="The file or folder of tests that should be run. This value is used as the first argument to start "
"pytest, so it can also be a specific test like 'my_test.py::any_test'",
)
cluster: str = param.String(default="", doc="The name of the AzureML compute cluster where the script should run.")
cluster: str = param.String(
default="", doc="The name of the AzureML compute cluster where the script should run.")
conda_env: str = param.String(
default="", doc="The path to the Conda environment file that should be used when starting pytest in AzureML."
)
Expand Down Expand Up @@ -76,7 +79,8 @@ def run_pytest(folder_to_test: str, pytest_mark: str) -> None:
logging.info(f"Starting pytest with these args: {pytest_args}")
status_code = pytest.main(pytest_args)
if status_code == ExitCode.NO_TESTS_COLLECTED:
raise ValueError(f"PyTest did not find any tests to run, when restricting with this mark: {pytest_mark}")
raise ValueError(
f"PyTest did not find any tests to run, when restricting with this mark: {pytest_mark}")
if status_code != ExitCode.OK:
raise ValueError(f"PyTest failed with exit code: {status_code}")

Expand Down
3 changes: 2 additions & 1 deletion hi-ml-azure/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
here = pathlib.Path(__file__).parent.resolve()

# Get the long description from the README file
long_description = (here / 'package_description.md').read_text(encoding='utf-8')
long_description = (
here / 'package_description.md').read_text(encoding='utf-8')

version = ''

Expand Down
33 changes: 22 additions & 11 deletions hi-ml-azure/src/health_azure/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ def get_datastore(workspace: Workspace, datastore_name: str) -> Datastore:
if len(existing_stores) == 1:
return datastores[existing_stores[0]]
datastore = workspace.get_default_datastore()
logging.info(f"Using the workspace default datastore {datastore.name} to access datasets.")
logging.info(
f"Using the workspace default datastore {datastore.name} to access datasets.")
return datastore
if datastore_name in datastores:
return datastores[datastore_name]
Expand All @@ -54,12 +55,15 @@ def get_or_create_dataset(workspace: Workspace, datastore_name: str, dataset_nam
azureml_dataset = Dataset.get_by_name(workspace, name=dataset_name)
logging.info("Dataset found.")
except Exception:
logging.info(f"Retrieving datastore '{datastore_name}' from AzureML workspace")
logging.info(
f"Retrieving datastore '{datastore_name}' from AzureML workspace")
datastore = get_datastore(workspace, datastore_name)
logging.info(f"Creating a new dataset from data in folder '{dataset_name}' in the datastore")
logging.info(
f"Creating a new dataset from data in folder '{dataset_name}' in the datastore")
# Ensure that there is a / at the end of the file path, otherwise folder that share a prefix could create
# trouble (for example, folders foo and foo_bar exist, and I'm trying to create a dataset from "foo")
azureml_dataset = Dataset.File.from_files(path=(datastore, dataset_name + "/"))
azureml_dataset = Dataset.File.from_files(
path=(datastore, dataset_name + "/"))
logging.info("Registering the dataset for future use.")
azureml_dataset.register(workspace, name=dataset_name)
return azureml_dataset
Expand Down Expand Up @@ -107,15 +111,17 @@ def __init__(self,
# documentation tools in the editor work nicer.
name = name.strip()
if not name:
raise ValueError("The name of the dataset must be a non-empty string.")
raise ValueError(
"The name of the dataset must be a non-empty string.")
self.name = name
self.datastore = datastore
self.version = version
self.use_mounting = use_mounting
# If target_folder is "" then convert to None
self.target_folder = Path(target_folder) if target_folder else None
if str(self.target_folder) == ".":
raise ValueError("Can't mount or download a dataset to the current working directory.")
raise ValueError(
"Can't mount or download a dataset to the current working directory.")
self.local_folder = Path(local_folder) if local_folder else None

def to_input_dataset_local(self, workspace: Optional[Workspace]) -> Tuple[Path, Optional[MountContext]]:
Expand Down Expand Up @@ -152,7 +158,8 @@ def to_input_dataset_local(self, workspace: Optional[Workspace]) -> Tuple[Path,
else:
status += f"downloaded to {target_path}"
print(status)
azureml_dataset.download(target_path=str(target_path), overwrite=False)
azureml_dataset.download(
target_path=str(target_path), overwrite=False)
result = target_path, None
return result

Expand All @@ -170,10 +177,12 @@ def to_input_dataset(self,
azureml_dataset = get_or_create_dataset(workspace=workspace,
dataset_name=self.name,
datastore_name=self.datastore)
named_input = azureml_dataset.as_named_input(_input_dataset_key(index=dataset_index))
named_input = azureml_dataset.as_named_input(
_input_dataset_key(index=dataset_index))
# If running on windows then self.target_folder may be a WindowsPath, make sure it is
# in posix format for Azure.
path_on_compute = self.target_folder.as_posix() if self.target_folder is not None else None
path_on_compute = self.target_folder.as_posix(
) if self.target_folder is not None else None
use_mounting = False if self.use_mounting is None else self.use_mounting
if use_mounting:
status += "mounted at "
Expand Down Expand Up @@ -311,7 +320,8 @@ def find_workspace_for_local_datasets(aml_workspace: Optional[Workspace],
workspace = get_workspace(aml_workspace, workspace_config_path)
logging.info(f"Found workspace for datasets: {workspace.name}")
except Exception as ex:
logging.info(f"Could not find workspace for datasets. Exception: {ex}")
logging.info(
f"Could not find workspace for datasets. Exception: {ex}")
return workspace


Expand All @@ -332,7 +342,8 @@ def setup_local_datasets(aml_workspace: Optional[Workspace],
:param dataset_configs: List of DatasetConfig describing the input datasets.
:return: Pair of: list of optional paths to the input datasets, list of mountcontexts, one for each mounted dataset.
"""
workspace = find_workspace_for_local_datasets(aml_workspace, workspace_config_path, dataset_configs)
workspace = find_workspace_for_local_datasets(
aml_workspace, workspace_config_path, dataset_configs)

mounted_input_datasets: List[Optional[Path]] = []
mount_contexts: List[MountContext] = []
Expand Down
3 changes: 2 additions & 1 deletion hi-ml-azure/src/health_azure/examples/elevate_this.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ def main() -> None:
wait_for_completion_show_output=True)

parser = ArgumentParser()
parser.add_argument("-m", "--message", type=str, required=True, help="The message to print out")
parser.add_argument("-m", "--message", type=str,
required=True, help="The message to print out")
args = parser.parse_args()

print(f"The message was: {args.message}")
Expand Down
Loading

0 comments on commit 1d7dec9

Please sign in to comment.