-
Notifications
You must be signed in to change notification settings - Fork 10.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MLGO] Remove absl dependency from scripts #78880
[MLGO] Remove absl dependency from scripts #78880
Conversation
@llvm/pr-subscribers-mlgo Author: Aiden Grossman (boomanaiden154) ChangesThis patch removes the absl dependency from the mlgo-utils scripts. We were only using absl.logging, and absl.flags, so this patch just consists of mechanically converting the absl flags parsing to Python's builtin argparse as Python's logging is a drop in replacement for absl.logging. Full diff: https://github.com/llvm/llvm-project/pull/78880.diff 8 Files Affected:
diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py b/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py
index 9aabd87b4688e0..cc21061cbbef5e 100644
--- a/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py
+++ b/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py
@@ -23,26 +23,21 @@
and corpus2 are combined into combinedcorpus.
"""
-from absl import app
-from absl import flags
+import argparse
from mlgo.corpus import combine_training_corpus_lib
-flags.DEFINE_string("root_dir", "", "root dir of module paths to combine.")
-FLAGS = flags.FLAGS
-
-
-def main(argv):
- if len(argv) > 1:
- raise app.UsageError("Too many command-line arguments.")
-
- combine_training_corpus_lib.combine_corpus(FLAGS.root_dir)
-
-
-def entrypoint():
- app.run(main)
+def main(args):
+ combine_training_corpus_lib.combine_corpus(args.root_dir)
if __name__ == "__main__":
- entrypoint()
+ parser = argparse.ArgumentParser(
+ description="A tool for combining multiple training corpora"
+ )
+ parser.add_argument(
+ "--root_dir", type=str, help="The root dir of module paths to combine."
+ )
+ args = parser.parse_args()
+ main(args)
diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py b/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py
index 9463e61dc534fe..4426463e22b0e7 100644
--- a/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py
+++ b/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py
@@ -26,127 +26,59 @@
import json
import multiprocessing
-
-from absl import app
-from absl import flags
-from absl import logging
+import logging
+import argparse
from mlgo.corpus import extract_ir_lib
-flags.DEFINE_string(
- "input",
- None,
- "Input file or directory - either compile_commands.json, a linker parameter"
- "list, or a path to a directory containing object files.",
-)
-flags.DEFINE_enum(
- "input_type",
- "json",
- ["json", "params", "directory"],
- "Input file type - json, params, or directory. params latter refers to lld"
- "params.",
-)
-flags.DEFINE_string("output_dir", None, "Output directory")
-flags.DEFINE_integer(
- "num_workers",
- None,
- "Number of parallel workers for objcopy. `None` for maximum available.",
-)
-flags.DEFINE_string("llvm_objcopy_path", "llvm-objcopy", "Path to llvm-objcopy")
-flags.DEFINE_string(
- "obj_base_dir",
- "",
- "Base directory for object files. Defaults to current working dir.",
-)
-flags.DEFINE_string(
- "cmd_filter",
- None,
- "Include only those modules with a command line matching this regexp. "
- "Setting it to None for not filtering. Note that the regexp is applied "
- "independently for each separate command line option. For example, ^-Oz$ "
- "will match Oz - built binaries. Does not work with thinlto_build=lld.",
-)
-flags.DEFINE_enum(
- "thinlto_build",
- None,
- ["distributed", "local"],
- "Set if the build was performed with either 'distributed' or "
- "'local' ThinLTO. This ensures the thinlto.bc files are also copied. "
- "The build is assumed to have had "
- "-mllvm -lto-embed-bitcode=post-merge-pre-opt passed in the distributed "
- "case, or -Wl,--save-temps=import and -Wl,--thinlto-emit-index-files "
- "passed in the local case.",
-)
-flags.DEFINE_string(
- "cmd_section_name",
- ".llvmcmd",
- "The section name passed to llvm-objcopy. For ELF object files, the "
- "default .llvmcmd is correct. For Mach-O object files, one should use "
- "something like __LLVM,__cmdline",
-)
-flags.DEFINE_string(
- "bitcode_section_name",
- ".llvmbc",
- "The section name passed to llvm-objcopy. For ELF object files, the "
- "default .llvmbc is correct. For Mach-O object files, one should use "
- "__LLVM,__bitcode",
-)
-
-flags.mark_flag_as_required("output_dir")
-
-FLAGS = flags.FLAGS
-
-
-def main(argv):
- if len(argv) > 1:
- raise app.UsageError("Too many command-line arguments.")
+def main(args):
objs = []
- if FLAGS.input is not None and FLAGS.thinlto_build == "local":
+ if args.input is not None and args.thinlto_build == "local":
raise ValueError("--thinlto_build=local cannot be run with --input")
- if FLAGS.input is None:
- if FLAGS.thinlto_build != "local":
+ if args.input is None:
+ if args.thinlto_build != "local":
raise ValueError("--input or --thinlto_build=local must be provided")
- objs = extract_ir_lib.load_for_lld_thinlto(FLAGS.obj_base_dir, FLAGS.output_dir)
- elif FLAGS.input_type == "json":
- with open(FLAGS.input, encoding="utf-8") as f:
+ objs = extract_ir_lib.load_for_lld_thinlto(args.obj_base_dir, args.output_dir)
+ elif args.input_type == "json":
+ with open(args.input, encoding="utf-8") as f:
objs = extract_ir_lib.load_from_compile_commands(
- json.load(f), FLAGS.output_dir
+ json.load(f), args.output_dir
)
- elif FLAGS.input_type == "params":
- if not FLAGS.obj_base_dir:
+ elif args.input_type == "params":
+ if not args.obj_base_dir:
logging.info(
"-obj_base_dir is unspecified, assuming current directory."
"If no objects are found, use this option to specify the root"
"directory for the object file paths in the input file."
)
- with open(FLAGS.input, encoding="utf-8") as f:
+ with open(args.input, encoding="utf-8") as f:
objs = extract_ir_lib.load_from_lld_params(
- [l.strip() for l in f.readlines()], FLAGS.obj_base_dir, FLAGS.output_dir
+ [l.strip() for l in f.readlines()], args.obj_base_dir, args.output_dir
)
- elif FLAGS.input_type == "directory":
+ elif args.input_type == "directory":
logging.warning(
"Using the directory input is only recommended if the build system"
"your project uses does not support any structured output that"
"ml-compiler-opt understands. If your build system provides a"
"structured compilation database, use that instead"
)
- objs = extract_ir_lib.load_from_directory(FLAGS.input, FLAGS.output_dir)
+ objs = extract_ir_lib.load_from_directory(args.input, args.output_dir)
else:
- logging.error("Unknown input type: %s", FLAGS.input_type)
+ logging.error("Unknown input type: %s", args.input_type)
relative_output_paths = extract_ir_lib.run_extraction(
objs,
- FLAGS.num_workers,
- FLAGS.llvm_objcopy_path,
- FLAGS.cmd_filter,
- FLAGS.thinlto_build,
- FLAGS.cmd_section_name,
- FLAGS.bitcode_section_name,
+ args.num_workers,
+ args.llvm_objcopy_path,
+ args.cmd_filter,
+ args.thinlto_build,
+ args.cmd_section_name,
+ args.bitcode_section_name,
)
extract_ir_lib.write_corpus_manifest(
- FLAGS.thinlto_build, relative_output_paths, FLAGS.output_dir
+ args.thinlto_build, relative_output_paths, args.output_dir
)
logging.info(
@@ -156,10 +88,86 @@ def main(argv):
)
-def entrypoint():
- multiprocessing.set_start_method("fork")
- app.run(main)
-
-
if __name__ == "__main__":
- entrypoint()
+ parser = argparse.ArgumentParser(
+ description="A tool for making a corpus from build artifacts"
+ )
+ parser.add_argument(
+ "--input",
+ type=str,
+ help="Input file or directory - either compile_commands.json, a linker "
+ "parameter list, or a path to a directory containing object files.",
+ )
+ parser.add_argument(
+ "--input_type",
+ type=str,
+ help="Input file type - JSON, LLD params, or directory.",
+ choices=["json", "params", "directory"],
+ default="json",
+ nargs="?",
+ )
+ parser.add_argument("--output_dir", type=str, help="Output directory")
+ parser.add_argument(
+ "--num_workers",
+ type=int,
+ help="Number of parallel works for objcopy. `None` for maximum available.",
+ default=None,
+ nargs="?",
+ )
+ parser.add_argument(
+ "--llvm_objcopy_path",
+ type=str,
+ help="Path to llvm-objcopy",
+ default="llvm-objcopy",
+ nargs="?",
+ )
+ parser.add_argument(
+ "--obj_base_dir",
+ type=str,
+ help="Base directory for object files. Defaults to current working dir.",
+ default="",
+ nargs="?",
+ )
+ parser.add_argument(
+ "--cmd_filter",
+ type=str,
+ help="Include only those modules with a command line matching this regular "
+ "expression. Set it to None to not perform any filtering. Note that the "
+ "regular expression is applied independently for each separate command line "
+ "option. For example, ^-Oz$ will match Oz built binaries. This does not work "
+ "with thinlto_build=lld.",
+ default=None,
+ nargs="?",
+ )
+ parser.add_argument(
+ "--thinlto_build",
+ type=str,
+ help="Set if the build was performed with either 'distributed' or 'local' "
+ "ThinLTO. This ensures the thinlto.bc files are also copied. The build is "
+ "assumed to have had -mllvm -lto-embed-bitcode=post-merge-pre-opt passed in "
+ "the distributed case or -Wl,--save-temps=import and "
+ "-Wl,--thinlto-emit-index-files passed in the local case",
+ choices=["distributed", "local"],
+ default=None,
+ nargs="?",
+ )
+ parser.add_argument(
+ "--cmd_section_name",
+ type=str,
+ help="The section name passed to llvm-objcopy. For ELF object files, the "
+ "default .llvmcmd is correct. For Mach-O object files, one should use "
+ "something like __LLVM,__cmdline",
+ default=".llvmcmd",
+ nargs="?",
+ )
+ parser.add_argument(
+ "--bitcode_section_name",
+ type=str,
+ help="The section name passed to llvm-objcopy. For ELF object files, the "
+ "default .llvmbc is correct. For Mach-O object files, one should use "
+ "__LLVM,__bitcode",
+ default=".llvmbc",
+ nargs="?",
+ )
+ args = parser.parse_args()
+ main(args)
diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py b/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py
index edb0ecd853de24..05ceb750de673e 100644
--- a/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py
+++ b/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py
@@ -12,43 +12,38 @@
--default_args="<list of space separated flags>"
"""
-from absl import app
-from absl import flags
-from absl import logging
+import logging
+import argparse
from mlgo.corpus import make_corpus_lib
-flags.DEFINE_string("input_dir", None, "The input directory.")
-flags.DEFINE_string("output_dir", None, "The output directory.")
-flags.DEFINE_string(
- "default_args",
- "",
- "The compiler flags to compile with when using downstream tooling.",
-)
-flags.mark_flag_as_required("input_dir")
-flags.mark_flag_as_required("output_dir")
-
-FLAGS = flags.FLAGS
-
-
-def main(_):
+def main(args):
logging.warning(
"Using this tool does not guarantee that the bitcode is taken at "
"the correct stage for consumption during model training. Make "
"sure to validate assumptions about where the bitcode is coming "
"from before using it in production."
)
- relative_paths = make_corpus_lib.load_bitcode_from_directory(FLAGS.input_dir)
- make_corpus_lib.copy_bitcode(relative_paths, FLAGS.input_dir, FLAGS.output_dir)
+ relative_paths = make_corpus_lib.load_bitcode_from_directory(args.input_dir)
+ make_corpus_lib.copy_bitcode(relative_paths, args.input_dir, args.output_dir)
make_corpus_lib.write_corpus_manifest(
- relative_paths, FLAGS.output_dir, FLAGS.default_args.split()
+ relative_paths, args.output_dir, args.default_args.split()
)
-def entrypoint():
- app.run(main)
-
-
if __name__ == "__main__":
- entrypoint()
+ parser = argparse.ArgumentParser(
+ description="A tool for making a corpus from arbitrary bitcode"
+ )
+ parser.add_argument("--input_dir", type=str, help="The input directory.")
+ parser.add_argument("--output_dir", type=str, help="The output directory.")
+ parser.add_argument(
+ "--default_args",
+ type=str,
+ help="The compiler flags to compile with when using downstream tooling.",
+ default="",
+ nargs="?",
+ )
+ args = parser.parse_args()
+ main(args)
diff --git a/llvm/utils/mlgo-utils/pyproject.toml b/llvm/utils/mlgo-utils/pyproject.toml
index be2af86cd05df3..dac18a785c17b9 100644
--- a/llvm/utils/mlgo-utils/pyproject.toml
+++ b/llvm/utils/mlgo-utils/pyproject.toml
@@ -7,9 +7,6 @@ name = "mlgo"
description = "Tooling for ML in LLVM"
readme = "README.md"
requires-python = ">=3.8,<3.11"
-dependencies = [
- "absl-py>=1.0.0"
-]
dynamic = ["version"]
license = {text = "Apache-2.0 WITH LLVM-exception"}
classifiers = [
diff --git a/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test b/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test
index 933a9c2b9f811e..51dc637347caf0 100644
--- a/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test
+++ b/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test
@@ -1,4 +1,4 @@
-# REQUIRES: python-38, absl, system-linux
+# REQUIRES: python-38, system-linux
## Testing that the combine_trainig_corpus script works as expected when
## invoked.
diff --git a/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test b/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test
index c20581dacdc651..107116618ce97b 100644
--- a/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test
+++ b/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test
@@ -1,4 +1,4 @@
-# REQUIRES: python-38, absl, system-linux
+# REQUIRES: python-38, system-linux
## Test that invoking the extract_ir script work as expected.
diff --git a/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test b/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test
index 3c1b96523718e4..a08780055f31f1 100644
--- a/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test
+++ b/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test
@@ -1,4 +1,4 @@
-# REQUIRES: python-38, absl, system-linux
+# REQUIRES: python-38, system-linux
## Testing that the make_corpus script works as expected when invoked.
diff --git a/llvm/utils/mlgo-utils/tests/lit.local.cfg b/llvm/utils/mlgo-utils/tests/lit.local.cfg
index 90cdf8ba618ed8..a9088750cb58b1 100644
--- a/llvm/utils/mlgo-utils/tests/lit.local.cfg
+++ b/llvm/utils/mlgo-utils/tests/lit.local.cfg
@@ -4,11 +4,3 @@ import sys
# the entire project has been bumped to 3.8.
if sys.version_info > (3,8):
config.available_features.add("python-38")
-
-# TODO(boomanaiden154): Remove this flag once the scripts are converted to
-# not use absl anymore.
-try:
- import absl
- config.available_features.add("absl")
-except:
- pass
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, some nits
This patch removes the absl dependency from the mlgo-utils scripts. We were only using absl.logging, and absl.flags, so this patch just consists of mechanically converting the absl flags parsing to Python's builtin argparse as Python's logging is a drop in replacement for absl.logging.
c96e746
to
336ccb0
Compare
This patch removes the absl dependency from the mlgo-utils scripts. We were only using absl.logging, and absl.flags, so this patch just consists of mechanically converting the absl flags parsing to Python's builtin argparse as Python's logging is a drop in replacement for absl.logging.