diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py b/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py index 9aabd87b4688e..3b2077b4c0e0e 100644 --- a/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py +++ b/llvm/utils/mlgo-utils/mlgo/corpus/combine_training_corpus.py @@ -23,26 +23,25 @@ and corpus2 are combined into combinedcorpus. """ -from absl import app -from absl import flags +import argparse from mlgo.corpus import combine_training_corpus_lib -flags.DEFINE_string("root_dir", "", "root dir of module paths to combine.") -FLAGS = flags.FLAGS +def parse_args_and_run(): + parser = argparse.ArgumentParser( + description="A tool for combining multiple training corpora" + ) + parser.add_argument( + "--root_dir", type=str, help="The root dir of module paths to combine." + ) + args = parser.parse_args() + main(args) -def main(argv): - if len(argv) > 1: - raise app.UsageError("Too many command-line arguments.") - - combine_training_corpus_lib.combine_corpus(FLAGS.root_dir) - - -def entrypoint(): - app.run(main) +def main(args): + combine_training_corpus_lib.combine_corpus(args.root_dir) if __name__ == "__main__": - entrypoint() + parse_args_and_run() diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py b/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py index 9463e61dc534f..94415431ab4a3 100644 --- a/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py +++ b/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py @@ -24,129 +24,146 @@ any output. """ +import argparse import json +import logging import multiprocessing -from absl import app -from absl import flags -from absl import logging - from mlgo.corpus import extract_ir_lib -flags.DEFINE_string( - "input", - None, - "Input file or directory - either compile_commands.json, a linker parameter" - "list, or a path to a directory containing object files.", -) -flags.DEFINE_enum( - "input_type", - "json", - ["json", "params", "directory"], - "Input file type - json, params, or directory. params latter refers to lld" - "params.", -) -flags.DEFINE_string("output_dir", None, "Output directory") -flags.DEFINE_integer( - "num_workers", - None, - "Number of parallel workers for objcopy. `None` for maximum available.", -) -flags.DEFINE_string("llvm_objcopy_path", "llvm-objcopy", "Path to llvm-objcopy") -flags.DEFINE_string( - "obj_base_dir", - "", - "Base directory for object files. Defaults to current working dir.", -) -flags.DEFINE_string( - "cmd_filter", - None, - "Include only those modules with a command line matching this regexp. " - "Setting it to None for not filtering. Note that the regexp is applied " - "independently for each separate command line option. For example, ^-Oz$ " - "will match Oz - built binaries. Does not work with thinlto_build=lld.", -) -flags.DEFINE_enum( - "thinlto_build", - None, - ["distributed", "local"], - "Set if the build was performed with either 'distributed' or " - "'local' ThinLTO. This ensures the thinlto.bc files are also copied. " - "The build is assumed to have had " - "-mllvm -lto-embed-bitcode=post-merge-pre-opt passed in the distributed " - "case, or -Wl,--save-temps=import and -Wl,--thinlto-emit-index-files " - "passed in the local case.", -) -flags.DEFINE_string( - "cmd_section_name", - ".llvmcmd", - "The section name passed to llvm-objcopy. For ELF object files, the " - "default .llvmcmd is correct. For Mach-O object files, one should use " - "something like __LLVM,__cmdline", -) -flags.DEFINE_string( - "bitcode_section_name", - ".llvmbc", - "The section name passed to llvm-objcopy. For ELF object files, the " - "default .llvmbc is correct. For Mach-O object files, one should use " - "__LLVM,__bitcode", -) - -flags.mark_flag_as_required("output_dir") - -FLAGS = flags.FLAGS - - -def main(argv): - if len(argv) > 1: - raise app.UsageError("Too many command-line arguments.") +def parse_args_and_run(): + parser = argparse.ArgumentParser( + description="A tool for making a corpus from build artifacts" + ) + parser.add_argument( + "--input", + type=str, + help="Input file or directory - either compile_commands.json, a linker " + "parameter list, or a path to a directory containing object files.", + ) + parser.add_argument( + "--input_type", + type=str, + help="Input file type - JSON, LLD params, or directory.", + choices=["json", "params", "directory"], + default="json", + nargs="?", + ) + parser.add_argument("--output_dir", type=str, help="Output directory") + parser.add_argument( + "--num_workers", + type=int, + help="Number of parallel works for objcopy. `None` for maximum available.", + default=None, + nargs="?", + ) + parser.add_argument( + "--llvm_objcopy_path", + type=str, + help="Path to llvm-objcopy", + default="llvm-objcopy", + nargs="?", + ) + parser.add_argument( + "--obj_base_dir", + type=str, + help="Base directory for object files. Defaults to current working dir.", + default="", + nargs="?", + ) + parser.add_argument( + "--cmd_filter", + type=str, + help="Include only those modules with a command line matching this regular " + "expression. Set it to None to not perform any filtering. Note that the " + "regular expression is applied independently for each separate command line " + "option. For example, ^-Oz$ will match Oz built binaries. This does not work " + "with thinlto_build=lld.", + default=None, + nargs="?", + ) + parser.add_argument( + "--thinlto_build", + type=str, + help="Set if the build was performed with either 'distributed' or 'local' " + "ThinLTO. This ensures the thinlto.bc files are also copied. The build is " + "assumed to have had -mllvm -lto-embed-bitcode=post-merge-pre-opt passed in " + "the distributed case or -Wl,--save-temps=import and " + "-Wl,--thinlto-emit-index-files passed in the local case", + choices=["distributed", "local"], + default=None, + nargs="?", + ) + parser.add_argument( + "--cmd_section_name", + type=str, + help="The section name passed to llvm-objcopy. For ELF object files, the " + "default .llvmcmd is correct. For Mach-O object files, one should use " + "something like __LLVM,__cmdline", + default=".llvmcmd", + nargs="?", + ) + parser.add_argument( + "--bitcode_section_name", + type=str, + help="The section name passed to llvm-objcopy. For ELF object files, the " + "default .llvmbc is correct. For Mach-O object files, one should use " + "__LLVM,__bitcode", + default=".llvmbc", + nargs="?", + ) + args = parser.parse_args() + main(args) + + +def main(args): objs = [] - if FLAGS.input is not None and FLAGS.thinlto_build == "local": + if args.input is not None and args.thinlto_build == "local": raise ValueError("--thinlto_build=local cannot be run with --input") - if FLAGS.input is None: - if FLAGS.thinlto_build != "local": + if args.input is None: + if args.thinlto_build != "local": raise ValueError("--input or --thinlto_build=local must be provided") - objs = extract_ir_lib.load_for_lld_thinlto(FLAGS.obj_base_dir, FLAGS.output_dir) - elif FLAGS.input_type == "json": - with open(FLAGS.input, encoding="utf-8") as f: + objs = extract_ir_lib.load_for_lld_thinlto(args.obj_base_dir, args.output_dir) + elif args.input_type == "json": + with open(args.input, encoding="utf-8") as f: objs = extract_ir_lib.load_from_compile_commands( - json.load(f), FLAGS.output_dir + json.load(f), args.output_dir ) - elif FLAGS.input_type == "params": - if not FLAGS.obj_base_dir: + elif args.input_type == "params": + if not args.obj_base_dir: logging.info( "-obj_base_dir is unspecified, assuming current directory." "If no objects are found, use this option to specify the root" "directory for the object file paths in the input file." ) - with open(FLAGS.input, encoding="utf-8") as f: + with open(args.input, encoding="utf-8") as f: objs = extract_ir_lib.load_from_lld_params( - [l.strip() for l in f.readlines()], FLAGS.obj_base_dir, FLAGS.output_dir + [l.strip() for l in f.readlines()], args.obj_base_dir, args.output_dir ) - elif FLAGS.input_type == "directory": + elif args.input_type == "directory": logging.warning( "Using the directory input is only recommended if the build system" "your project uses does not support any structured output that" "ml-compiler-opt understands. If your build system provides a" "structured compilation database, use that instead" ) - objs = extract_ir_lib.load_from_directory(FLAGS.input, FLAGS.output_dir) + objs = extract_ir_lib.load_from_directory(args.input, args.output_dir) else: - logging.error("Unknown input type: %s", FLAGS.input_type) + logging.error("Unknown input type: %s", args.input_type) relative_output_paths = extract_ir_lib.run_extraction( objs, - FLAGS.num_workers, - FLAGS.llvm_objcopy_path, - FLAGS.cmd_filter, - FLAGS.thinlto_build, - FLAGS.cmd_section_name, - FLAGS.bitcode_section_name, + args.num_workers, + args.llvm_objcopy_path, + args.cmd_filter, + args.thinlto_build, + args.cmd_section_name, + args.bitcode_section_name, ) extract_ir_lib.write_corpus_manifest( - FLAGS.thinlto_build, relative_output_paths, FLAGS.output_dir + args.thinlto_build, relative_output_paths, args.output_dir ) logging.info( @@ -156,10 +173,5 @@ def main(argv): ) -def entrypoint(): - multiprocessing.set_start_method("fork") - app.run(main) - - if __name__ == "__main__": - entrypoint() + parse_args_and_run() diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py b/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py index edb0ecd853de2..221486e16c6e0 100644 --- a/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py +++ b/llvm/utils/mlgo-utils/mlgo/corpus/make_corpus.py @@ -12,43 +12,42 @@ --default_args="" """ -from absl import app -from absl import flags -from absl import logging +import argparse +import logging from mlgo.corpus import make_corpus_lib -flags.DEFINE_string("input_dir", None, "The input directory.") -flags.DEFINE_string("output_dir", None, "The output directory.") -flags.DEFINE_string( - "default_args", - "", - "The compiler flags to compile with when using downstream tooling.", -) -flags.mark_flag_as_required("input_dir") -flags.mark_flag_as_required("output_dir") - -FLAGS = flags.FLAGS +def parse_args_and_run(): + parser = argparse.ArgumentParser( + description="A tool for making a corpus from arbitrary bitcode" + ) + parser.add_argument("--input_dir", type=str, help="The input directory.") + parser.add_argument("--output_dir", type=str, help="The output directory.") + parser.add_argument( + "--default_args", + type=str, + help="The compiler flags to compile with when using downstream tooling.", + default="", + nargs="?", + ) + args = parser.parse_args() + main(args) -def main(_): +def main(args): logging.warning( "Using this tool does not guarantee that the bitcode is taken at " "the correct stage for consumption during model training. Make " "sure to validate assumptions about where the bitcode is coming " "from before using it in production." ) - relative_paths = make_corpus_lib.load_bitcode_from_directory(FLAGS.input_dir) - make_corpus_lib.copy_bitcode(relative_paths, FLAGS.input_dir, FLAGS.output_dir) + relative_paths = make_corpus_lib.load_bitcode_from_directory(args.input_dir) + make_corpus_lib.copy_bitcode(relative_paths, args.input_dir, args.output_dir) make_corpus_lib.write_corpus_manifest( - relative_paths, FLAGS.output_dir, FLAGS.default_args.split() + relative_paths, args.output_dir, args.default_args.split() ) -def entrypoint(): - app.run(main) - - if __name__ == "__main__": - entrypoint() + parse_args_and_run() diff --git a/llvm/utils/mlgo-utils/pyproject.toml b/llvm/utils/mlgo-utils/pyproject.toml index be2af86cd05df..dac18a785c17b 100644 --- a/llvm/utils/mlgo-utils/pyproject.toml +++ b/llvm/utils/mlgo-utils/pyproject.toml @@ -7,9 +7,6 @@ name = "mlgo" description = "Tooling for ML in LLVM" readme = "README.md" requires-python = ">=3.8,<3.11" -dependencies = [ - "absl-py>=1.0.0" -] dynamic = ["version"] license = {text = "Apache-2.0 WITH LLVM-exception"} classifiers = [ diff --git a/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test b/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test index 933a9c2b9f811..51dc637347caf 100644 --- a/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test +++ b/llvm/utils/mlgo-utils/tests/corpus/combine_training_corpus_script.test @@ -1,4 +1,4 @@ -# REQUIRES: python-38, absl, system-linux +# REQUIRES: python-38, system-linux ## Testing that the combine_trainig_corpus script works as expected when ## invoked. diff --git a/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test b/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test index c20581dacdc65..107116618ce97 100644 --- a/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test +++ b/llvm/utils/mlgo-utils/tests/corpus/extract_ir_script.test @@ -1,4 +1,4 @@ -# REQUIRES: python-38, absl, system-linux +# REQUIRES: python-38, system-linux ## Test that invoking the extract_ir script work as expected. diff --git a/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test b/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test index 3c1b96523718e..a08780055f31f 100644 --- a/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test +++ b/llvm/utils/mlgo-utils/tests/corpus/make_corpus_script.test @@ -1,4 +1,4 @@ -# REQUIRES: python-38, absl, system-linux +# REQUIRES: python-38, system-linux ## Testing that the make_corpus script works as expected when invoked. diff --git a/llvm/utils/mlgo-utils/tests/lit.local.cfg b/llvm/utils/mlgo-utils/tests/lit.local.cfg index 90cdf8ba618ed..a9088750cb58b 100644 --- a/llvm/utils/mlgo-utils/tests/lit.local.cfg +++ b/llvm/utils/mlgo-utils/tests/lit.local.cfg @@ -4,11 +4,3 @@ import sys # the entire project has been bumped to 3.8. if sys.version_info > (3,8): config.available_features.add("python-38") - -# TODO(boomanaiden154): Remove this flag once the scripts are converted to -# not use absl anymore. -try: - import absl - config.available_features.add("absl") -except: - pass