-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[automl] Memory Aware Config Tuning #1257
Merged
Merged
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
import copy | ||
from collections import OrderedDict | ||
|
||
import psutil | ||
import ray | ||
|
||
try: | ||
import GPUtil | ||
except ImportError: | ||
raise ImportError( | ||
' ray is not installed. ' | ||
'In order to use auto_train please run ' | ||
'pip install ludwig[ray]' | ||
) | ||
|
||
from ludwig.api import LudwigModel | ||
from ludwig.automl.utils import get_available_resources | ||
from ludwig.data.preprocessing import preprocess_for_training | ||
from ludwig.features.feature_registries import update_config_with_metadata | ||
from ludwig.utils.defaults import merge_with_defaults | ||
from ludwig.constants import COMBINER, HYPEROPT, BATCH_SIZE, TRAINING, TYPE, PREPROCESSING, SPACE | ||
|
||
# maps variable search space that can be modified to minimum permissible value for the range | ||
RANKED_MODIFIABLE_PARAM_LIST = { | ||
'tabnet': OrderedDict({ | ||
'training.batch_size': 32, | ||
'combiner.size': 8, | ||
'combiner.output_size': 8, | ||
}), | ||
'concat': OrderedDict({ | ||
'training.batch_size': 32, | ||
'combiner.fc_size': 64, | ||
'combiner.num_fc_layers': 1, | ||
|
||
}), | ||
'tabtransformer': OrderedDict({ | ||
'training.batch_size': 32, | ||
'combiner.num_heads:': 4, | ||
'combiner.output_size': 8, | ||
'combiner.num_layers': 4, | ||
'combiner.num_fc_layers': 1, | ||
}), | ||
} | ||
|
||
|
||
BYTES_PER_MiB = 1048576 | ||
|
||
|
||
def get_trainingset_metadata(config, dataset): | ||
(_, _, _, training_set_metadata) = preprocess_for_training( | ||
config, | ||
dataset=dataset, | ||
preprocessing_params=config[PREPROCESSING]) | ||
return training_set_metadata | ||
|
||
|
||
def get_machine_memory(): | ||
|
||
if ray.is_initialized(): # using ray cluster | ||
@ray.remote(num_gpus=1) | ||
def get_remote_gpu(): | ||
gpus = GPUtil.getGPUs() | ||
total_mem_mb = gpus[0].memory_total | ||
return total_mem_mb * BYTES_PER_MiB | ||
|
||
@ray.remote(num_cpus=1) | ||
def get_remote_cpu(): | ||
total_mem = psutil.virtual_memory().total | ||
return total_mem | ||
|
||
resources = get_available_resources() # check if cluster has GPUS | ||
|
||
if resources['gpu'] > 0: | ||
machine_mem = ray.get(get_remote_gpu.remote()) | ||
else: | ||
machine_mem = ray.get(get_remote_cpu.remote()) | ||
else: # not using ray cluster | ||
if GPUtil.getGPUs(): | ||
machine_mem = GPUtil.getGPUs()[0].memory_total * BYTES_PER_MiB | ||
else: | ||
machine_mem = psutil.virtual_memory().total | ||
|
||
return machine_mem | ||
|
||
|
||
def compute_memory_usage(config, training_set_metadata) -> int: | ||
update_config_with_metadata(config, training_set_metadata) | ||
lm = LudwigModel.create_model(config) | ||
lm.get_connected_model() | ||
model_tensors = lm.collect_weights() | ||
total_size = 0 | ||
batch_size = config[TRAINING][BATCH_SIZE] | ||
for tnsr in model_tensors: | ||
total_size += tnsr[1].numpy().size * batch_size | ||
total_bytes = total_size * 32 # assumes 32-bit precision | ||
return total_bytes | ||
|
||
|
||
def sub_new_params(config: dict, new_param_vals: dict): | ||
new_config = copy.deepcopy(config) | ||
for param, val in new_param_vals.items(): | ||
config_section = param.split(".")[0] | ||
param_name = param.split(".")[1] | ||
new_config[config_section][param_name] = val | ||
return new_config | ||
|
||
|
||
def get_new_params(current_param_values, hyperparam_search_space, params_to_modify): | ||
for param, _ in params_to_modify.items(): | ||
if hyperparam_search_space[param][SPACE] == "choice": | ||
current_param_values[param] = hyperparam_search_space[param]['categories'][-1] | ||
else: | ||
current_param_values[param] = hyperparam_search_space[param]['upper'] | ||
return current_param_values | ||
|
||
|
||
def memory_tune_config(config, dataset): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If would suggest running this with |
||
fits_in_memory = False | ||
raw_config = merge_with_defaults(config) | ||
training_set_metadata = get_trainingset_metadata(raw_config, dataset) | ||
modified_hyperparam_search_space = copy.deepcopy( | ||
raw_config[HYPEROPT]['parameters']) | ||
params_to_modify = RANKED_MODIFIABLE_PARAM_LIST[raw_config[COMBINER][TYPE]] | ||
param_list = list(params_to_modify.keys()) | ||
current_param_values = {} | ||
max_memory = get_machine_memory() | ||
|
||
while param_list is not None: | ||
# compute memory utilization | ||
current_param_values = get_new_params( | ||
current_param_values, modified_hyperparam_search_space, params_to_modify) | ||
temp_config = sub_new_params(raw_config, current_param_values) | ||
if compute_memory_usage(temp_config, training_set_metadata) < max_memory: | ||
fits_in_memory = True | ||
break | ||
# check if we have exhausted tuning of current param (e.g. we can no longer reduce the param value) | ||
param, min_value = param_list[0], params_to_modify[param_list[0]] | ||
|
||
if param in modified_hyperparam_search_space.keys(): | ||
param_space = modified_hyperparam_search_space[param]["space"] | ||
if param_space == "choice": | ||
if len(modified_hyperparam_search_space[param]['categories']) > 2 and \ | ||
modified_hyperparam_search_space[param]['categories'][-2] > min_value: | ||
modified_hyperparam_search_space[param][ | ||
'categories'] = modified_hyperparam_search_space[param]['categories'][:-1] | ||
else: | ||
param_list.pop(0) # exhausted reduction of this parameter | ||
else: | ||
# reduce by 10% | ||
upper_bound, lower_bound = modified_hyperparam_search_space[param][ | ||
"upper"], modified_hyperparam_search_space[param]["lower"] | ||
reduction_val = (upper_bound - lower_bound) * 0.1 | ||
new_upper_bound = upper_bound - reduction_val | ||
if (new_upper_bound) > lower_bound and new_upper_bound > min_value: | ||
modified_hyperparam_search_space[param]["upper"] = new_upper_bound | ||
else: | ||
param_list.pop(0) # exhausted reduction of this parameter | ||
else: | ||
param_list.pop(0) # param not in hyperopt search space | ||
|
||
modified_config = copy.deepcopy(config) | ||
|
||
modified_config[HYPEROPT]["parameters"] = modified_hyperparam_search_space | ||
return modified_config, fits_in_memory |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
ray[default,tune] | ||
pickle5 | ||
tensorboardX<2.3 | ||
GPUtil | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we add this to the
requirements_ray.txt
instead of asking the user to install this by hand?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should probably move the
import ray
in here, too, given the error message.