From 6574ef81f1c64b4cfa26a6eea07231093f6a49b9 Mon Sep 17 00:00:00 2001 From: Will Constable Date: Thu, 24 Sep 2020 14:48:00 -0400 Subject: [PATCH 01/13] Add generate_score_config.py and compute_score.py - Add baseline score config (score.yml) and check in output torchbench_0.0.yaml --- score/README.md | 24 +++ score/compute_score.py | 41 +++++ score/generate_score_config.py | 121 +++++++++++++++ score/score.yml | 38 +++++ score/torchbench_0.0.yaml | 266 +++++++++++++++++++++++++++++++++ 5 files changed, 490 insertions(+) create mode 100644 score/README.md create mode 100644 score/compute_score.py create mode 100644 score/generate_score_config.py create mode 100644 score/score.yml create mode 100644 score/torchbench_0.0.yaml diff --git a/score/README.md b/score/README.md new file mode 100644 index 0000000000..cff74249b6 --- /dev/null +++ b/score/README.md @@ -0,0 +1,24 @@ +# Torchbench Score + +Torchbench provides a normalized benchmark score similar to 'spec' or other computing benchmarks. + +This is a prototype. Current status and limitations are described below. + +## Score versioning +The score is versioned, meaning only a certain set of benchmarks are captured in a particular +version of the score (even if additional benchmarks are added to the suite). The relative weight +of each benchmark in the overall score is frozen, along with a normalization factor measured on +a particular 'gold' machine with a particular PyTorch release. The intent is to measure the effect +of new pytorch versions on the same workloads using the same reference machine using a consistent +benchmark configuration. + +## Computing the score +To compute the current score, provide a score config and benchmark data produced by pytest with `--benchmark-json` or related arguments. +`python compute_score.py --configuration --benchmark_data ` + +## New score versions +Periodically, as more workloads have been added to the torchbenchmark suite, or as changes to +relative weights or categories have been proposed, a new score configuration should be generated +rather than modifying an existing score definition. + +See `python generate_score_config.py -h` \ No newline at end of file diff --git a/score/compute_score.py b/score/compute_score.py new file mode 100644 index 0000000000..e0d8b48520 --- /dev/null +++ b/score/compute_score.py @@ -0,0 +1,41 @@ + +""" +Compute the benchmark score given a frozen score configuration and current benchmark data. +""" +import argparse +import json +import yaml + +def compute_score(config, data): + target = config['target'] + score = 1.0 + weight_sum = 0.0 + for name in config['benchmarks']: + cfg = config['benchmarks'][name] + weight, norm = cfg['weight'], cfg['norm'] + weight_sum += weight + measured_mean = [b['stats']['mean'] for b in data['benchmarks'] if b['name'] == name][0] + benchmark_score = (norm / measured_mean) ** weight + print(f"{name}: {benchmark_score}") + score *= benchmark_score + + score = score ** (1.0 / len(config['benchmarks'])) + assert abs(weight_sum - 1.0) < 1e-6, f"Bad configuration, weights don't sum to 1, but {weight_sum}" + return score * target + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--configuration", required=True, + help="frozen benchmark configuration generated by generate_score_config.py") + parser.add_argument("--benchmark_data", required=True, + help="pytest-benchmark json file with current benchmark data") + args = parser.parse_args() + + with open(args.configuration) as cfg_file: + config = yaml.full_load(cfg_file) + + with open(args.benchmark_data) as data_file: + data = json.load(data_file) + + score = compute_score(config, data) + print(f"Benchmark Score: {score} (rounded) {int(round(score))}") \ No newline at end of file diff --git a/score/generate_score_config.py b/score/generate_score_config.py new file mode 100644 index 0000000000..567de9a7d9 --- /dev/null +++ b/score/generate_score_config.py @@ -0,0 +1,121 @@ +""" +Generate a fully specified benchmark configuration file, given a lightweight +specification and a complete source of benchmark data. + +Specification File +------------------ +Score heirarchy input intended to be as easy to construct as possible, +relying on automatic inference of unspecified weights, benchmark configs, +and normalization factors given a particular instance of benchmark data. + +Structure: + Root _ + - category | required: + - domain | 3 layers of organizational structure + - task _| + + - benchmark name - keyword match for root name in benchmark, + omit children unless used + _ + - train/eval | optional: + - device | provide specific weights or + - compiler/runtime _| exclude particular configs by omission + +Rules for describing the weight heirarchy +- everything is a dict, since at any level you could specify a weight +- if a weight is not specified, it is computed automatically with respect +its direct siblings. +- if specific benchmark configurations are omitted under a benchmark name, +all configurations present in the normalization data json are weighted equally + +Normalization Data +------------------ +Used to 'fill in the gaps' in the human written specification. + +Benchmark configurations (train/eval, device, compiler/runtime) present in +this data are frozen into the configuration and weighted equally if weights +specific weights aren't provided in the specification. + +Normalization values are the benchmark measurements taken from this data, used +to produce a value of 1.0 for each benchmark before applying weights and combining. + +#### +TODO +#### + - handle multiple normalization files, one for models, one for synthetic, etc + - make explicit configuration choice for throughput vs runtime metrics + - assert same machine used for all normalization files and freeze that in +""" +import argparse +import json +import yaml + +def generate_bench_cfg(spec, norm, target): + cfg = { + 'target': target, + 'benchmarks': {}, + } + benchmark_names = [b['name'] for b in norm['benchmarks']] + benchmark_norms = {b['name']: b['stats']['mean'] for b in norm['benchmarks']} + + assert len(spec['heirarchy']) > 0, "Must specify at least one category" + category_weight = 1.0 / len(spec['heirarchy']) + for category in spec['heirarchy']: + + category_spec = spec['heirarchy'][category] + assert isinstance(category_spec, dict), f"Category {category} in spec must be non-empty" + assert 'weight' not in category_spec, "TODO implement manual category weights" + domain_weight = 1.0 / len(category_spec) + + for domain in category_spec: + + tasks = category_spec[domain] + assert isinstance(tasks, dict), f"Domain {category}:{domain} in spec must be non-empty" + assert 'weight' not in tasks, "TODO implement manual domain weights" + task_weight = 1.0 / len(tasks) + + for task in tasks: + + benchmarks = tasks[task] + assert isinstance(benchmarks, dict), f"Task {category}:{domain}:{task} in spec must be non-empty" + assert 'weight' not in benchmarks, "TODO implement manual task weights" + benchmark_weight = 1.0 / len(benchmarks) + + for benchmark in benchmarks: + + assert benchmarks[benchmark] is None, "TODO handle benchmark as dict of config specs" + # assert 'weight' not in benchmarks[benchmark], "TODO implement manual benchmark weights" + found_benchmarks = [name for name in benchmark_names if benchmark in name] + assert len(found_benchmarks) > 0, f"No normalization data found for {benchmark}" + config_weight = 1.0 / len(found_benchmarks) + for b in found_benchmarks: + weight = domain_weight * task_weight * benchmark_weight * config_weight + cfg['benchmarks'][b] = { + 'weight': weight, + 'norm': benchmark_norms[b], + } + return cfg + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--specification", required=True, + help="yaml file describing weight heirarchy") + parser.add_argument("--normalization_data", required=True, + help="pytest-benchmark json file used for generating normalization " + "values and filling in unspecified benchmark configurations") + parser.add_argument("--output_file", required=True, + help="generated complete benchmark configuration") + parser.add_argument("--target_score", default=1000, + help="target score value given these normalizations and specifications") + args = parser.parse_args() + + with open(args.specification) as spec_file: + spec = yaml.full_load(spec_file) + + with open(args.normalization_data) as norm_file: + norm = json.load(norm_file) + + with open(args.output_file, 'w') as out_file: + bench_cfg = generate_bench_cfg(spec, norm, args.target_score) + yaml.dump(bench_cfg, out_file) + \ No newline at end of file diff --git a/score/score.yml b/score/score.yml new file mode 100644 index 0000000000..fb2994958b --- /dev/null +++ b/score/score.yml @@ -0,0 +1,38 @@ +--- +heirarchy: + model: + computer vision: + segmentation: + maskrcnn_benchmark: + classification: + pytorch_mobilenet_v3: + detection: + yolov3: + generation: + pytorch_CycleGAN_and_pix...: + pytorch_stargan: + other computer vision: + Background_Matting: + Super_SloMo: + natural language processing: + translation: + attention_is_all_you_nee...: + language_modeling: + BERT_pytorch: + other nlp: + fastNLP: + speech: + synthesis: + tacotron2: + recommendation: + recommendation: + dlrm: + reinforcement learning: + other rl: + LearningToPaint: + other: + other tasks: + moco: + demucs: + pytorch_struct: +... \ No newline at end of file diff --git a/score/torchbench_0.0.yaml b/score/torchbench_0.0.yaml new file mode 100644 index 0000000000..4d8ab8a3df --- /dev/null +++ b/score/torchbench_0.0.yaml @@ -0,0 +1,266 @@ +benchmarks: + test_eval[BERT_pytorch-cpu-eager]: + norm: 0.056674222277769734 + weight: 0.006944444444444444 + test_eval[BERT_pytorch-cpu-jit]: + norm: 0.04845556728573034 + weight: 0.006944444444444444 + test_eval[BERT_pytorch-cuda-eager]: + norm: 0.026858860157876495 + weight: 0.006944444444444444 + test_eval[BERT_pytorch-cuda-jit]: + norm: 0.017104738694909322 + weight: 0.006944444444444444 + test_eval[LearningToPaint-cpu-eager]: + norm: 0.015391482121220757 + weight: 0.020833333333333332 + test_eval[LearningToPaint-cpu-jit]: + norm: 0.01744512900000531 + weight: 0.020833333333333332 + test_eval[LearningToPaint-cuda-eager]: + norm: 0.004052860649187367 + weight: 0.020833333333333332 + test_eval[LearningToPaint-cuda-jit]: + norm: 0.004101277514284299 + weight: 0.020833333333333332 + test_eval[Super_SloMo-cuda-eager]: + norm: 0.5727992373999768 + weight: 0.004166666666666667 + test_eval[Super_SloMo-cuda-jit]: + norm: 0.5758905388000131 + weight: 0.004166666666666667 + test_eval[attention_is_all_you_nee...-cpu-eager]: + norm: 0.8630710880000152 + weight: 0.006944444444444444 + test_eval[attention_is_all_you_nee...-cpu-jit]: + norm: 0.8489351521999652 + weight: 0.006944444444444444 + test_eval[attention_is_all_you_nee...-cuda-eager]: + norm: 0.18483205416672868 + weight: 0.006944444444444444 + test_eval[attention_is_all_you_nee...-cuda-jit]: + norm: 0.18512023083333892 + weight: 0.006944444444444444 + test_eval[demucs-cpu-eager]: + norm: 0.15394044100006404 + weight: 0.006944444444444444 + test_eval[demucs-cpu-jit]: + norm: 0.154964403714335 + weight: 0.006944444444444444 + test_eval[demucs-cuda-eager]: + norm: 0.08700494508337897 + weight: 0.006944444444444444 + test_eval[demucs-cuda-jit]: + norm: 0.08798973516667274 + weight: 0.006944444444444444 + test_eval[dlrm-cpu-eager]: + norm: 0.0007821738814477574 + weight: 0.041666666666666664 + test_eval[dlrm-cuda-eager]: + norm: 0.0011976210188676443 + weight: 0.041666666666666664 + test_eval[fastNLP-cpu-eager]: + norm: 0.0003339627349426991 + weight: 0.006944444444444444 + test_eval[fastNLP-cpu-jit]: + norm: 0.0002815807873683167 + weight: 0.006944444444444444 + test_eval[fastNLP-cuda-eager]: + norm: 0.00026201308868427635 + weight: 0.006944444444444444 + test_eval[fastNLP-cuda-jit]: + norm: 0.00021672122712418238 + weight: 0.006944444444444444 + test_eval[maskrcnn_benchmark-cuda-eager]: + norm: 0.15884241357148962 + weight: 0.016666666666666666 + test_eval[moco-cuda-eager]: + norm: 0.6445621068000037 + weight: 0.013888888888888888 + test_eval[moco-cuda-jit]: + norm: 0.6464073749999443 + weight: 0.013888888888888888 + test_eval[pytorch_CycleGAN_and_pix...-cuda-eager]: + norm: 0.037252862678558686 + weight: 0.005555555555555555 + test_eval[pytorch_CycleGAN_and_pix...-cuda-jit]: + norm: 0.03734425745829147 + weight: 0.005555555555555555 + test_eval[pytorch_mobilenet_v3-cpu-eager]: + norm: 0.02729150021618684 + weight: 0.004166666666666667 + test_eval[pytorch_mobilenet_v3-cpu-jit]: + norm: 0.027794305918903136 + weight: 0.004166666666666667 + test_eval[pytorch_mobilenet_v3-cuda-eager]: + norm: 0.02729599686483345 + weight: 0.004166666666666667 + test_eval[pytorch_mobilenet_v3-cuda-jit]: + norm: 0.02273801193330453 + weight: 0.004166666666666667 + test_eval[pytorch_stargan-cpu-eager]: + norm: 1.030070522799997 + weight: 0.0020833333333333333 + test_eval[pytorch_stargan-cpu-jit]: + norm: 1.0323324262000824 + weight: 0.0020833333333333333 + test_eval[pytorch_stargan-cuda-eager]: + norm: 0.10837374479997379 + weight: 0.0020833333333333333 + test_eval[pytorch_stargan-cuda-jit]: + norm: 0.10870929039992916 + weight: 0.0020833333333333333 + test_eval[pytorch_struct-cpu-eager]: + norm: 0.002562631041459985 + weight: 0.006944444444444444 + test_eval[pytorch_struct-cpu-jit]: + norm: 0.002147531797012927 + weight: 0.006944444444444444 + test_eval[pytorch_struct-cuda-eager]: + norm: 0.0016883496258385442 + weight: 0.006944444444444444 + test_eval[pytorch_struct-cuda-jit]: + norm: 0.0012041589738027185 + weight: 0.006944444444444444 + test_eval[tacotron2-cuda-eager]: + norm: 1.0758157056000528 + weight: 0.08333333333333333 + test_eval[yolov3-cpu-eager]: + norm: 0.8865785995999431 + weight: 0.01111111111111111 + test_eval[yolov3-cuda-eager]: + norm: 0.5735116551998545 + weight: 0.01111111111111111 + test_train[BERT_pytorch-cpu-eager]: + norm: 0.27472745519994535 + weight: 0.006944444444444444 + test_train[BERT_pytorch-cpu-jit]: + norm: 0.3208459026000128 + weight: 0.006944444444444444 + test_train[BERT_pytorch-cuda-eager]: + norm: 0.13274032924994117 + weight: 0.006944444444444444 + test_train[BERT_pytorch-cuda-jit]: + norm: 0.12152077777778282 + weight: 0.006944444444444444 + test_train[Background_Matting-cuda-eager]: + norm: 4.454985670599944 + weight: 0.008333333333333333 + test_train[Background_Matting-cuda-jit]: + norm: 4.3299996296000245 + weight: 0.008333333333333333 + test_train[LearningToPaint-cpu-eager]: + norm: 0.06769545181248304 + weight: 0.020833333333333332 + test_train[LearningToPaint-cpu-jit]: + norm: 0.09986237930002062 + weight: 0.020833333333333332 + test_train[LearningToPaint-cuda-eager]: + norm: 0.021776436187498877 + weight: 0.020833333333333332 + test_train[LearningToPaint-cuda-jit]: + norm: 0.02460534921213807 + weight: 0.020833333333333332 + test_train[Super_SloMo-cuda-eager]: + norm: 1.4887921610000376 + weight: 0.004166666666666667 + test_train[Super_SloMo-cuda-jit]: + norm: 1.4842954989999726 + weight: 0.004166666666666667 + test_train[attention_is_all_you_nee...-cpu-eager]: + norm: 2.8790815572000157 + weight: 0.006944444444444444 + test_train[attention_is_all_you_nee...-cpu-jit]: + norm: 2.8627824179999153 + weight: 0.006944444444444444 + test_train[attention_is_all_you_nee...-cuda-eager]: + norm: 0.5916350972000146 + weight: 0.006944444444444444 + test_train[attention_is_all_you_nee...-cuda-jit]: + norm: 0.5918281878000471 + weight: 0.006944444444444444 + test_train[demucs-cpu-eager]: + norm: 0.7204151263999847 + weight: 0.006944444444444444 + test_train[demucs-cpu-jit]: + norm: 1.567230096000003 + weight: 0.006944444444444444 + test_train[demucs-cuda-eager]: + norm: 0.21315780479999374 + weight: 0.006944444444444444 + test_train[demucs-cuda-jit]: + norm: 0.21378219499997614 + weight: 0.006944444444444444 + test_train[dlrm-cpu-eager]: + norm: 0.0024918104727723656 + weight: 0.041666666666666664 + test_train[dlrm-cuda-eager]: + norm: 0.0038975701444924336 + weight: 0.041666666666666664 + test_train[fastNLP-cpu-eager]: + norm: 0.004783386184209382 + weight: 0.006944444444444444 + test_train[fastNLP-cpu-jit]: + norm: 0.004753424066670743 + weight: 0.006944444444444444 + test_train[fastNLP-cuda-eager]: + norm: 0.0025605078503221097 + weight: 0.006944444444444444 + test_train[fastNLP-cuda-jit]: + norm: 0.0025617683341551596 + weight: 0.006944444444444444 + test_train[maskrcnn_benchmark-cuda-eager]: + norm: 0.39820833739995576 + weight: 0.016666666666666666 + test_train[moco-cuda-eager]: + norm: 1.3086710984000092 + weight: 0.013888888888888888 + test_train[moco-cuda-jit]: + norm: 1.3050591925999924 + weight: 0.013888888888888888 + test_train[pytorch_CycleGAN_and_pix...-cuda-eager]: + norm: 53.70666004940008 + weight: 0.005555555555555555 + test_train[pytorch_mobilenet_v3-cpu-eager]: + norm: 0.25344906619993707 + weight: 0.004166666666666667 + test_train[pytorch_mobilenet_v3-cpu-jit]: + norm: 0.23886577479997867 + weight: 0.004166666666666667 + test_train[pytorch_mobilenet_v3-cuda-eager]: + norm: 0.2574923820000549 + weight: 0.004166666666666667 + test_train[pytorch_mobilenet_v3-cuda-jit]: + norm: 0.24167844399989918 + weight: 0.004166666666666667 + test_train[pytorch_stargan-cpu-eager]: + norm: 4.203840443199988 + weight: 0.0020833333333333333 + test_train[pytorch_stargan-cpu-jit]: + norm: 4.67151848600015 + weight: 0.0020833333333333333 + test_train[pytorch_stargan-cuda-eager]: + norm: 0.9323661928000547 + weight: 0.0020833333333333333 + test_train[pytorch_stargan-cuda-jit]: + norm: 0.9228581516000304 + weight: 0.0020833333333333333 + test_train[pytorch_struct-cpu-eager]: + norm: 1.2756745926000803 + weight: 0.006944444444444444 + test_train[pytorch_struct-cpu-jit]: + norm: 1.3099148541999057 + weight: 0.006944444444444444 + test_train[pytorch_struct-cuda-eager]: + norm: 0.4209450229998765 + weight: 0.006944444444444444 + test_train[pytorch_struct-cuda-jit]: + norm: 0.4204848049998873 + weight: 0.006944444444444444 + test_train[tacotron2-cuda-eager]: + norm: 4.155933508199996 + weight: 0.08333333333333333 + test_train[yolov3-cuda-eager]: + norm: 6.38542746819985 + weight: 0.01111111111111111 +target: 1000 From 935f0141d92b9b7fdb0fb9a2016378522a63fb72 Mon Sep 17 00:00:00 2001 From: Will Constable Date: Thu, 24 Sep 2020 20:09:38 -0400 Subject: [PATCH 02/13] Have CI upload benchmark score to scribe --- score/compute_score.py | 5 ++-- scripts/run_bench_and_upload.sh | 4 ++- scripts/upload_scribe.py | 43 ++++++++++++++++++++++++++++----- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/score/compute_score.py b/score/compute_score.py index e0d8b48520..9dfa847a28 100644 --- a/score/compute_score.py +++ b/score/compute_score.py @@ -16,7 +16,7 @@ def compute_score(config, data): weight_sum += weight measured_mean = [b['stats']['mean'] for b in data['benchmarks'] if b['name'] == name][0] benchmark_score = (norm / measured_mean) ** weight - print(f"{name}: {benchmark_score}") + # print(f"{name}: {benchmark_score}") score *= benchmark_score score = score ** (1.0 / len(config['benchmarks'])) @@ -38,4 +38,5 @@ def compute_score(config, data): data = json.load(data_file) score = compute_score(config, data) - print(f"Benchmark Score: {score} (rounded) {int(round(score))}") \ No newline at end of file + print(score) + # print(f"Benchmark Score: {score} (rounded) {int(round(score))}") \ No newline at end of file diff --git a/scripts/run_bench_and_upload.sh b/scripts/run_bench_and_upload.sh index 9611d72b8e..043be21b2f 100755 --- a/scripts/run_bench_and_upload.sh +++ b/scripts/run_bench_and_upload.sh @@ -24,12 +24,14 @@ BENCHMARK_ABS_FILENAME=${BENCHMARK_DATA}/${BENCHMARK_FILENAME} pytest test_bench.py --setup-show --benchmark-sort=Name --benchmark-json=${BENCHMARK_ABS_FILENAME} -k "$PYTEST_FILTER" +# Compute benchmark score +TORCHBENCH_SCORE=$(python score/compute_score.py --configuration score/torchbench_0.0.yaml --benchmark_data ${BENCHMARK_DATA}/hub.json) # Token is only present for certain jobs, only upload if present if [ -z "$SCRIBE_GRAPHQL_ACCESS_TOKEN" ] then echo "Skipping benchmark upload, token is missing." else - python scripts/upload_scribe.py --pytest_bench_json ${BENCHMARK_ABS_FILENAME} + python scripts/upload_scribe.py --pytest_bench_json ${BENCHMARK_ABS_FILENAME} --torchbench_score ${TORCHBENCH_SCORE} fi diff --git a/scripts/upload_scribe.py b/scripts/upload_scribe.py index 4192642271..c41d46dab5 100644 --- a/scripts/upload_scribe.py +++ b/scripts/upload_scribe.py @@ -85,7 +85,7 @@ def __init__(self): 'circle_build_num', 'circle_project_reponame', ], 'float': [ - 'stddev', 'min', 'median', 'max', 'mean', + 'stddev', 'min', 'median', 'max', 'mean', 'torchbench_score', ] } @@ -125,13 +125,44 @@ def post_pytest_benchmarks(self, pytest_json): messages.append(m) self.upload(messages) + def post_torchbench_score(self, pytest_json, score): + machine_info = pytest_json['machine_info'] + commit_info = pytest_json['commit_info'] + upload_time = int(time.time()) + m = self.format_message({ + "time": upload_time, + "benchmark_time": pytest_json['datetime'], + "git_repo": commit_info['project'], + "git_commit_id": commit_info['id'], + "git_branch": commit_info['branch'], + "git_commit_time": commit_info['time'], + "git_dirty": commit_info['dirty'], + "pytorch_version": machine_info.get('pytorch_version', None), + "torchtext_version": machine_info.get('torchtext_version', None), + "torchvision_version": machine_info.get('torchvision_version', None), + "python_version": machine_info['python_implementation_version'], + "machine_kernel": machine_info['release'], + "machine_processor": machine_info['processor'], + "machine_hostname": machine_info['node'], + "circle_build_num": machine_info.get('circle_build_num', None), + "circle_project_reponame": machine_info.get('circle_project_name', None), + "torchbench_score": score, + }) + self.upload(m) + if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("--pytest_bench_json", type=argparse.FileType('r'), + parser.add_argument("--pytest_bench_json", required=True, + type=argparse.FileType('r'), help='Upload json data formatted by pytest-benchmark module') + parser.add_argument("--torchbench_score", type=float, + help="optional torchbench score to include") args = parser.parse_args() - if args.pytest_bench_json: - benchmark_uploader = PytorchBenchmarkUploader() - json_data = json.load(args.pytest_bench_json) - benchmark_uploader.post_pytest_benchmarks(json_data) + + benchmark_uploader = PytorchBenchmarkUploader() + json_data = json.load(args.pytest_bench_json) + benchmark_uploader.post_pytest_benchmarks(json_data) + + if args.torchbench_score is not None: + benchmark_uploader.post_torchbench_score(json_data, args.torchbench_score) From 29b34925c993cb9026fcbfafa14bed6a2aec86e2 Mon Sep 17 00:00:00 2001 From: Will Constable Date: Thu, 24 Sep 2020 23:18:54 -0400 Subject: [PATCH 03/13] Remove maskrcnn from score temporarily --- score/score.yml | 6 ++-- score/torchbench_0.0.yaml | 62 ++++++++++++++++++--------------------- 2 files changed, 31 insertions(+), 37 deletions(-) diff --git a/score/score.yml b/score/score.yml index fb2994958b..80f0bbb4f1 100644 --- a/score/score.yml +++ b/score/score.yml @@ -2,8 +2,8 @@ heirarchy: model: computer vision: - segmentation: - maskrcnn_benchmark: + # segmentation: + # maskrcnn_benchmark: classification: pytorch_mobilenet_v3: detection: @@ -35,4 +35,4 @@ heirarchy: moco: demucs: pytorch_struct: -... \ No newline at end of file +... diff --git a/score/torchbench_0.0.yaml b/score/torchbench_0.0.yaml index 4d8ab8a3df..75ff829254 100644 --- a/score/torchbench_0.0.yaml +++ b/score/torchbench_0.0.yaml @@ -25,10 +25,10 @@ benchmarks: weight: 0.020833333333333332 test_eval[Super_SloMo-cuda-eager]: norm: 0.5727992373999768 - weight: 0.004166666666666667 + weight: 0.005208333333333333 test_eval[Super_SloMo-cuda-jit]: norm: 0.5758905388000131 - weight: 0.004166666666666667 + weight: 0.005208333333333333 test_eval[attention_is_all_you_nee...-cpu-eager]: norm: 0.8630710880000152 weight: 0.006944444444444444 @@ -71,9 +71,6 @@ benchmarks: test_eval[fastNLP-cuda-jit]: norm: 0.00021672122712418238 weight: 0.006944444444444444 - test_eval[maskrcnn_benchmark-cuda-eager]: - norm: 0.15884241357148962 - weight: 0.016666666666666666 test_eval[moco-cuda-eager]: norm: 0.6445621068000037 weight: 0.013888888888888888 @@ -82,34 +79,34 @@ benchmarks: weight: 0.013888888888888888 test_eval[pytorch_CycleGAN_and_pix...-cuda-eager]: norm: 0.037252862678558686 - weight: 0.005555555555555555 + weight: 0.006944444444444444 test_eval[pytorch_CycleGAN_and_pix...-cuda-jit]: norm: 0.03734425745829147 - weight: 0.005555555555555555 + weight: 0.006944444444444444 test_eval[pytorch_mobilenet_v3-cpu-eager]: norm: 0.02729150021618684 - weight: 0.004166666666666667 + weight: 0.005208333333333333 test_eval[pytorch_mobilenet_v3-cpu-jit]: norm: 0.027794305918903136 - weight: 0.004166666666666667 + weight: 0.005208333333333333 test_eval[pytorch_mobilenet_v3-cuda-eager]: norm: 0.02729599686483345 - weight: 0.004166666666666667 + weight: 0.005208333333333333 test_eval[pytorch_mobilenet_v3-cuda-jit]: norm: 0.02273801193330453 - weight: 0.004166666666666667 + weight: 0.005208333333333333 test_eval[pytorch_stargan-cpu-eager]: norm: 1.030070522799997 - weight: 0.0020833333333333333 + weight: 0.0026041666666666665 test_eval[pytorch_stargan-cpu-jit]: norm: 1.0323324262000824 - weight: 0.0020833333333333333 + weight: 0.0026041666666666665 test_eval[pytorch_stargan-cuda-eager]: norm: 0.10837374479997379 - weight: 0.0020833333333333333 + weight: 0.0026041666666666665 test_eval[pytorch_stargan-cuda-jit]: norm: 0.10870929039992916 - weight: 0.0020833333333333333 + weight: 0.0026041666666666665 test_eval[pytorch_struct-cpu-eager]: norm: 0.002562631041459985 weight: 0.006944444444444444 @@ -127,10 +124,10 @@ benchmarks: weight: 0.08333333333333333 test_eval[yolov3-cpu-eager]: norm: 0.8865785995999431 - weight: 0.01111111111111111 + weight: 0.013888888888888888 test_eval[yolov3-cuda-eager]: norm: 0.5735116551998545 - weight: 0.01111111111111111 + weight: 0.013888888888888888 test_train[BERT_pytorch-cpu-eager]: norm: 0.27472745519994535 weight: 0.006944444444444444 @@ -145,10 +142,10 @@ benchmarks: weight: 0.006944444444444444 test_train[Background_Matting-cuda-eager]: norm: 4.454985670599944 - weight: 0.008333333333333333 + weight: 0.010416666666666666 test_train[Background_Matting-cuda-jit]: norm: 4.3299996296000245 - weight: 0.008333333333333333 + weight: 0.010416666666666666 test_train[LearningToPaint-cpu-eager]: norm: 0.06769545181248304 weight: 0.020833333333333332 @@ -163,10 +160,10 @@ benchmarks: weight: 0.020833333333333332 test_train[Super_SloMo-cuda-eager]: norm: 1.4887921610000376 - weight: 0.004166666666666667 + weight: 0.005208333333333333 test_train[Super_SloMo-cuda-jit]: norm: 1.4842954989999726 - weight: 0.004166666666666667 + weight: 0.005208333333333333 test_train[attention_is_all_you_nee...-cpu-eager]: norm: 2.8790815572000157 weight: 0.006944444444444444 @@ -209,9 +206,6 @@ benchmarks: test_train[fastNLP-cuda-jit]: norm: 0.0025617683341551596 weight: 0.006944444444444444 - test_train[maskrcnn_benchmark-cuda-eager]: - norm: 0.39820833739995576 - weight: 0.016666666666666666 test_train[moco-cuda-eager]: norm: 1.3086710984000092 weight: 0.013888888888888888 @@ -220,31 +214,31 @@ benchmarks: weight: 0.013888888888888888 test_train[pytorch_CycleGAN_and_pix...-cuda-eager]: norm: 53.70666004940008 - weight: 0.005555555555555555 + weight: 0.006944444444444444 test_train[pytorch_mobilenet_v3-cpu-eager]: norm: 0.25344906619993707 - weight: 0.004166666666666667 + weight: 0.005208333333333333 test_train[pytorch_mobilenet_v3-cpu-jit]: norm: 0.23886577479997867 - weight: 0.004166666666666667 + weight: 0.005208333333333333 test_train[pytorch_mobilenet_v3-cuda-eager]: norm: 0.2574923820000549 - weight: 0.004166666666666667 + weight: 0.005208333333333333 test_train[pytorch_mobilenet_v3-cuda-jit]: norm: 0.24167844399989918 - weight: 0.004166666666666667 + weight: 0.005208333333333333 test_train[pytorch_stargan-cpu-eager]: norm: 4.203840443199988 - weight: 0.0020833333333333333 + weight: 0.0026041666666666665 test_train[pytorch_stargan-cpu-jit]: norm: 4.67151848600015 - weight: 0.0020833333333333333 + weight: 0.0026041666666666665 test_train[pytorch_stargan-cuda-eager]: norm: 0.9323661928000547 - weight: 0.0020833333333333333 + weight: 0.0026041666666666665 test_train[pytorch_stargan-cuda-jit]: norm: 0.9228581516000304 - weight: 0.0020833333333333333 + weight: 0.0026041666666666665 test_train[pytorch_struct-cpu-eager]: norm: 1.2756745926000803 weight: 0.006944444444444444 @@ -262,5 +256,5 @@ benchmarks: weight: 0.08333333333333333 test_train[yolov3-cuda-eager]: norm: 6.38542746819985 - weight: 0.01111111111111111 + weight: 0.013888888888888888 target: 1000 From ecec19ea5ce0bd8a37c563b5bc0533f0e6548b22 Mon Sep 17 00:00:00 2001 From: Will Constable Date: Fri, 25 Sep 2020 17:52:57 -0400 Subject: [PATCH 04/13] Add --benchmark_data_dir option to compute_score.py --- score/compute_score.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/score/compute_score.py b/score/compute_score.py index 9dfa847a28..9e76bd30c0 100644 --- a/score/compute_score.py +++ b/score/compute_score.py @@ -4,8 +4,11 @@ """ import argparse import json +import os import yaml +from tabulate import tabulate + def compute_score(config, data): target = config['target'] score = 1.0 @@ -27,16 +30,31 @@ def compute_score(config, data): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--configuration", required=True, help="frozen benchmark configuration generated by generate_score_config.py") - parser.add_argument("--benchmark_data", required=True, + parser.add_argument("--benchmark_data_file", help="pytest-benchmark json file with current benchmark data") + parser.add_argument("--benchmark_data_dir", + help="directory containing multiple .json files for each of which to compute a score") args = parser.parse_args() with open(args.configuration) as cfg_file: config = yaml.full_load(cfg_file) - with open(args.benchmark_data) as data_file: - data = json.load(data_file) + if args.benchmark_data_file is not None: + with open(args.benchmark_data_file) as data_file: + data = json.load(data_file) + + score = compute_score(config, data) + print(score) + elif args.benchmark_data_dir is not None: + scores = [('File', 'Score')] + for f in os.listdir(args.benchmark_data_dir): + path = os.path.join(args.benchmark_data_dir, f) + if os.path.isfile(path) and os.path.splitext(path)[1] == '.json': + with open(path) as data_file: + data = json.load(data_file) + score = compute_score(config, data) + scores.append((f, score)) + + print(tabulate(scores, headers='firstrow')) - score = compute_score(config, data) - print(score) # print(f"Benchmark Score: {score} (rounded) {int(round(score))}") \ No newline at end of file From be9394804bbbcc624c81c61dd285a0e5b5c1a7d5 Mon Sep 17 00:00:00 2001 From: Will Constable Date: Fri, 25 Sep 2020 17:53:55 -0400 Subject: [PATCH 05/13] Update run_bench_upload to use new compute_score flag --- scripts/run_bench_and_upload.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_bench_and_upload.sh b/scripts/run_bench_and_upload.sh index 043be21b2f..784590f40e 100755 --- a/scripts/run_bench_and_upload.sh +++ b/scripts/run_bench_and_upload.sh @@ -25,7 +25,7 @@ pytest test_bench.py --setup-show --benchmark-sort=Name --benchmark-json=${BENCH # Compute benchmark score -TORCHBENCH_SCORE=$(python score/compute_score.py --configuration score/torchbench_0.0.yaml --benchmark_data ${BENCHMARK_DATA}/hub.json) +TORCHBENCH_SCORE=$(python score/compute_score.py --configuration score/torchbench_0.0.yaml --benchmark_data_file ${BENCHMARK_DATA}/hub.json) # Token is only present for certain jobs, only upload if present if [ -z "$SCRIBE_GRAPHQL_ACCESS_TOKEN" ] then From a1500977009318a36e44e1ea833b9f79d9ddb948 Mon Sep 17 00:00:00 2001 From: Will Constable Date: Fri, 25 Sep 2020 17:54:40 -0400 Subject: [PATCH 06/13] Fix benchmark filename used in score computation --- scripts/run_bench_and_upload.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_bench_and_upload.sh b/scripts/run_bench_and_upload.sh index 784590f40e..7f224554aa 100755 --- a/scripts/run_bench_and_upload.sh +++ b/scripts/run_bench_and_upload.sh @@ -25,7 +25,7 @@ pytest test_bench.py --setup-show --benchmark-sort=Name --benchmark-json=${BENCH # Compute benchmark score -TORCHBENCH_SCORE=$(python score/compute_score.py --configuration score/torchbench_0.0.yaml --benchmark_data_file ${BENCHMARK_DATA}/hub.json) +TORCHBENCH_SCORE=$(python score/compute_score.py --configuration score/torchbench_0.0.yaml --benchmark_data_file ${BENCHMARK_ABS_FILENAME}) # Token is only present for certain jobs, only upload if present if [ -z "$SCRIBE_GRAPHQL_ACCESS_TOKEN" ] then From 0709e171d8dce7ce18db5f33f22f77f7ede4a57c Mon Sep 17 00:00:00 2001 From: Will Constable Date: Fri, 25 Sep 2020 18:51:50 -0400 Subject: [PATCH 07/13] Move dependencies from CI script to requirements.txt and add tabulate --- requirements.txt | 4 +++- scripts/install_nightlies.sh | 2 -- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7bc348361c..7689900959 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ pytest -pytest-benchmark \ No newline at end of file +pytest-benchmark +requests +tabulate diff --git a/scripts/install_nightlies.sh b/scripts/install_nightlies.sh index a8f6d22942..cdeff59218 100755 --- a/scripts/install_nightlies.sh +++ b/scripts/install_nightlies.sh @@ -4,5 +4,3 @@ set -e . ~/miniconda3/etc/profile.d/conda.sh conda activate base conda install -y pytorch torchtext torchvision -c pytorch-nightly -pip install -q pytest pytest-benchmark requests - From aa8eb99c31d994565c841582ea434ad174cb8340 Mon Sep 17 00:00:00 2001 From: Will Constable Date: Mon, 28 Sep 2020 21:05:55 -0400 Subject: [PATCH 08/13] Change score computation to exp sum weighted log This compuation has a more intuitive appeal, as a 2x across the board improvement would yield a 2x score improvement. Also add 'hack_data' option to enable quick experiments starting with real data and 'editing' only a keyword match set of measurements by some factor. --- score/compute_score.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/score/compute_score.py b/score/compute_score.py index 9e76bd30c0..e7c0c25818 100644 --- a/score/compute_score.py +++ b/score/compute_score.py @@ -4,27 +4,31 @@ """ import argparse import json +import math import os import yaml from tabulate import tabulate -def compute_score(config, data): +def compute_score(config, data, fake_data=None): target = config['target'] - score = 1.0 + score = 0.0 weight_sum = 0.0 for name in config['benchmarks']: cfg = config['benchmarks'][name] weight, norm = cfg['weight'], cfg['norm'] weight_sum += weight measured_mean = [b['stats']['mean'] for b in data['benchmarks'] if b['name'] == name][0] - benchmark_score = (norm / measured_mean) ** weight + if fake_data is not None and name in fake_data: + # used for sanity checks on the sensitivity of the score metric + measured_mean = fake_data[name] + benchmark_score = weight * math.log(norm / measured_mean) # print(f"{name}: {benchmark_score}") - score *= benchmark_score + score += benchmark_score - score = score ** (1.0 / len(config['benchmarks'])) + score = target * math.exp(score) assert abs(weight_sum - 1.0) < 1e-6, f"Bad configuration, weights don't sum to 1, but {weight_sum}" - return score * target + return score if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) @@ -34,8 +38,8 @@ def compute_score(config, data): help="pytest-benchmark json file with current benchmark data") parser.add_argument("--benchmark_data_dir", help="directory containing multiple .json files for each of which to compute a score") + parser.add_argument('--hack_data', nargs=2, action='append', help="keyword to match benchmark names, and multiplicative factor to adjust their measurement") args = parser.parse_args() - with open(args.configuration) as cfg_file: config = yaml.full_load(cfg_file) @@ -45,6 +49,16 @@ def compute_score(config, data): score = compute_score(config, data) print(score) + if args.hack_data: + fake_data = {} + for keyword, factor in args.hack_data: + for b in data['benchmarks']: + if keyword.lower() in b['name'].lower(): + fake_data[b['name']] = b['stats']['mean'] * float(factor) + + hacked_score = compute_score(config, data, fake_data) + print(f"Using hacks {args.hack_data}, hacked_score {hacked_score}") + elif args.benchmark_data_dir is not None: scores = [('File', 'Score')] for f in os.listdir(args.benchmark_data_dir): From 3acd61bc329580da4bbc4a69c7bed634bfe2f387 Mon Sep 17 00:00:00 2001 From: Will Constable Date: Mon, 28 Sep 2020 21:09:46 -0400 Subject: [PATCH 09/13] Add assert for missing data during score computation - doesn't change the outcome but prints helpful error --- score/compute_score.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/score/compute_score.py b/score/compute_score.py index e7c0c25818..ea0c33f15e 100644 --- a/score/compute_score.py +++ b/score/compute_score.py @@ -18,7 +18,9 @@ def compute_score(config, data, fake_data=None): cfg = config['benchmarks'][name] weight, norm = cfg['weight'], cfg['norm'] weight_sum += weight - measured_mean = [b['stats']['mean'] for b in data['benchmarks'] if b['name'] == name][0] + measured_mean = [b['stats']['mean'] for b in data['benchmarks'] if b['name'] == name] + assert len(measured_mean) == 1, f"Missing data for {name}, unable to compute score" + measured_mean = measured_mean[0] if fake_data is not None and name in fake_data: # used for sanity checks on the sensitivity of the score metric measured_mean = fake_data[name] From 67c4730cf9b591b3d1f5e3c3fcf542a2ee89ee92 Mon Sep 17 00:00:00 2001 From: Will Constable Date: Mon, 28 Sep 2020 23:43:38 -0400 Subject: [PATCH 10/13] Temporarily remove cyclegan/stargan from score as they have been disabled on PR jobs preventing score calculation on PRs --- score/score.yml | 6 ++-- score/torchbench_0.0.yaml | 67 ++++++++++----------------------------- 2 files changed, 20 insertions(+), 53 deletions(-) diff --git a/score/score.yml b/score/score.yml index 80f0bbb4f1..982695f3ea 100644 --- a/score/score.yml +++ b/score/score.yml @@ -8,9 +8,9 @@ heirarchy: pytorch_mobilenet_v3: detection: yolov3: - generation: - pytorch_CycleGAN_and_pix...: - pytorch_stargan: + # generation: + # pytorch_CycleGAN_and_pix...: + # pytorch_stargan: other computer vision: Background_Matting: Super_SloMo: diff --git a/score/torchbench_0.0.yaml b/score/torchbench_0.0.yaml index 75ff829254..d972734e60 100644 --- a/score/torchbench_0.0.yaml +++ b/score/torchbench_0.0.yaml @@ -25,10 +25,10 @@ benchmarks: weight: 0.020833333333333332 test_eval[Super_SloMo-cuda-eager]: norm: 0.5727992373999768 - weight: 0.005208333333333333 + weight: 0.006944444444444444 test_eval[Super_SloMo-cuda-jit]: norm: 0.5758905388000131 - weight: 0.005208333333333333 + weight: 0.006944444444444444 test_eval[attention_is_all_you_nee...-cpu-eager]: norm: 0.8630710880000152 weight: 0.006944444444444444 @@ -77,36 +77,18 @@ benchmarks: test_eval[moco-cuda-jit]: norm: 0.6464073749999443 weight: 0.013888888888888888 - test_eval[pytorch_CycleGAN_and_pix...-cuda-eager]: - norm: 0.037252862678558686 - weight: 0.006944444444444444 - test_eval[pytorch_CycleGAN_and_pix...-cuda-jit]: - norm: 0.03734425745829147 - weight: 0.006944444444444444 test_eval[pytorch_mobilenet_v3-cpu-eager]: norm: 0.02729150021618684 - weight: 0.005208333333333333 + weight: 0.006944444444444444 test_eval[pytorch_mobilenet_v3-cpu-jit]: norm: 0.027794305918903136 - weight: 0.005208333333333333 + weight: 0.006944444444444444 test_eval[pytorch_mobilenet_v3-cuda-eager]: norm: 0.02729599686483345 - weight: 0.005208333333333333 + weight: 0.006944444444444444 test_eval[pytorch_mobilenet_v3-cuda-jit]: norm: 0.02273801193330453 - weight: 0.005208333333333333 - test_eval[pytorch_stargan-cpu-eager]: - norm: 1.030070522799997 - weight: 0.0026041666666666665 - test_eval[pytorch_stargan-cpu-jit]: - norm: 1.0323324262000824 - weight: 0.0026041666666666665 - test_eval[pytorch_stargan-cuda-eager]: - norm: 0.10837374479997379 - weight: 0.0026041666666666665 - test_eval[pytorch_stargan-cuda-jit]: - norm: 0.10870929039992916 - weight: 0.0026041666666666665 + weight: 0.006944444444444444 test_eval[pytorch_struct-cpu-eager]: norm: 0.002562631041459985 weight: 0.006944444444444444 @@ -124,10 +106,10 @@ benchmarks: weight: 0.08333333333333333 test_eval[yolov3-cpu-eager]: norm: 0.8865785995999431 - weight: 0.013888888888888888 + weight: 0.018518518518518517 test_eval[yolov3-cuda-eager]: norm: 0.5735116551998545 - weight: 0.013888888888888888 + weight: 0.018518518518518517 test_train[BERT_pytorch-cpu-eager]: norm: 0.27472745519994535 weight: 0.006944444444444444 @@ -142,10 +124,10 @@ benchmarks: weight: 0.006944444444444444 test_train[Background_Matting-cuda-eager]: norm: 4.454985670599944 - weight: 0.010416666666666666 + weight: 0.013888888888888888 test_train[Background_Matting-cuda-jit]: norm: 4.3299996296000245 - weight: 0.010416666666666666 + weight: 0.013888888888888888 test_train[LearningToPaint-cpu-eager]: norm: 0.06769545181248304 weight: 0.020833333333333332 @@ -160,10 +142,10 @@ benchmarks: weight: 0.020833333333333332 test_train[Super_SloMo-cuda-eager]: norm: 1.4887921610000376 - weight: 0.005208333333333333 + weight: 0.006944444444444444 test_train[Super_SloMo-cuda-jit]: norm: 1.4842954989999726 - weight: 0.005208333333333333 + weight: 0.006944444444444444 test_train[attention_is_all_you_nee...-cpu-eager]: norm: 2.8790815572000157 weight: 0.006944444444444444 @@ -212,33 +194,18 @@ benchmarks: test_train[moco-cuda-jit]: norm: 1.3050591925999924 weight: 0.013888888888888888 - test_train[pytorch_CycleGAN_and_pix...-cuda-eager]: - norm: 53.70666004940008 - weight: 0.006944444444444444 test_train[pytorch_mobilenet_v3-cpu-eager]: norm: 0.25344906619993707 - weight: 0.005208333333333333 + weight: 0.006944444444444444 test_train[pytorch_mobilenet_v3-cpu-jit]: norm: 0.23886577479997867 - weight: 0.005208333333333333 + weight: 0.006944444444444444 test_train[pytorch_mobilenet_v3-cuda-eager]: norm: 0.2574923820000549 - weight: 0.005208333333333333 + weight: 0.006944444444444444 test_train[pytorch_mobilenet_v3-cuda-jit]: norm: 0.24167844399989918 - weight: 0.005208333333333333 - test_train[pytorch_stargan-cpu-eager]: - norm: 4.203840443199988 - weight: 0.0026041666666666665 - test_train[pytorch_stargan-cpu-jit]: - norm: 4.67151848600015 - weight: 0.0026041666666666665 - test_train[pytorch_stargan-cuda-eager]: - norm: 0.9323661928000547 - weight: 0.0026041666666666665 - test_train[pytorch_stargan-cuda-jit]: - norm: 0.9228581516000304 - weight: 0.0026041666666666665 + weight: 0.006944444444444444 test_train[pytorch_struct-cpu-eager]: norm: 1.2756745926000803 weight: 0.006944444444444444 @@ -256,5 +223,5 @@ benchmarks: weight: 0.08333333333333333 test_train[yolov3-cuda-eager]: norm: 6.38542746819985 - weight: 0.013888888888888888 + weight: 0.018518518518518517 target: 1000 From fbf346865b8388ce1199cb76371efc772d68718a Mon Sep 17 00:00:00 2001 From: wconstab Date: Tue, 29 Sep 2020 12:51:59 -0700 Subject: [PATCH 11/13] Update README.md --- score/README.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/score/README.md b/score/README.md index cff74249b6..c8defe157c 100644 --- a/score/README.md +++ b/score/README.md @@ -14,11 +14,23 @@ benchmark configuration. ## Computing the score To compute the current score, provide a score config and benchmark data produced by pytest with `--benchmark-json` or related arguments. -`python compute_score.py --configuration --benchmark_data ` +`python compute_score.py --configuration --benchmark_data_file ` +Or, use `--benchmark_data_dir` instead, pointing to a directory containing multiple json files to compute a table of scores. ## New score versions Periodically, as more workloads have been added to the torchbenchmark suite, or as changes to relative weights or categories have been proposed, a new score configuration should be generated rather than modifying an existing score definition. -See `python generate_score_config.py -h` \ No newline at end of file +See `python generate_score_config.py -h` + +## Issues and Next Steps +For accurate score comparisons, measurements should be computed on the same machine +(or at least same machine spec) as the data used to produce normalization constants +in the score configuration. + +- compute_score.py should assert the machine type matches by default +- currently, a circleCI 'medium' gpu worker was used for the normalization data +- soon, a particular CPU/GPU config should be deliberately selected along with a + list of models/categories to be frozen for first long-living rev of the score + From 41da4ee0bf7618e857d45bb680be12a03bcf95be Mon Sep 17 00:00:00 2001 From: wconstab Date: Tue, 29 Sep 2020 14:19:58 -0700 Subject: [PATCH 12/13] Update generate_score_config.py --- score/generate_score_config.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/score/generate_score_config.py b/score/generate_score_config.py index 567de9a7d9..bc65120fc2 100644 --- a/score/generate_score_config.py +++ b/score/generate_score_config.py @@ -32,12 +32,10 @@ ------------------ Used to 'fill in the gaps' in the human written specification. -Benchmark configurations (train/eval, device, compiler/runtime) present in -this data are frozen into the configuration and weighted equally if weights -specific weights aren't provided in the specification. - -Normalization values are the benchmark measurements taken from this data, used -to produce a value of 1.0 for each benchmark before applying weights and combining. +- particular configurations (train/eval, device, compiler/runtime) present in +this data are used to compute benchmark weights +- measurements from this data are used as normalization factors in score computation + such that new data is scored relative to this data. #### TODO @@ -118,4 +116,4 @@ def generate_bench_cfg(spec, norm, target): with open(args.output_file, 'w') as out_file: bench_cfg = generate_bench_cfg(spec, norm, args.target_score) yaml.dump(bench_cfg, out_file) - \ No newline at end of file + From 78687a77286dd9a046864aa618f41ba2b9671765 Mon Sep 17 00:00:00 2001 From: Will Constable Date: Thu, 1 Oct 2020 14:12:50 -0400 Subject: [PATCH 13/13] Fix spelling of hierarchy --- score/generate_score_config.py | 14 +++++++------- score/score.yml | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/score/generate_score_config.py b/score/generate_score_config.py index bc65120fc2..539e816940 100644 --- a/score/generate_score_config.py +++ b/score/generate_score_config.py @@ -4,7 +4,7 @@ Specification File ------------------ -Score heirarchy input intended to be as easy to construct as possible, +Score hierarchy input intended to be as easy to construct as possible, relying on automatic inference of unspecified weights, benchmark configs, and normalization factors given a particular instance of benchmark data. @@ -21,7 +21,7 @@ - device | provide specific weights or - compiler/runtime _| exclude particular configs by omission -Rules for describing the weight heirarchy +Rules for describing the weight hierarchy - everything is a dict, since at any level you could specify a weight - if a weight is not specified, it is computed automatically with respect its direct siblings. @@ -56,11 +56,11 @@ def generate_bench_cfg(spec, norm, target): benchmark_names = [b['name'] for b in norm['benchmarks']] benchmark_norms = {b['name']: b['stats']['mean'] for b in norm['benchmarks']} - assert len(spec['heirarchy']) > 0, "Must specify at least one category" - category_weight = 1.0 / len(spec['heirarchy']) - for category in spec['heirarchy']: + assert len(spec['hierarchy']) > 0, "Must specify at least one category" + category_weight = 1.0 / len(spec['hierarchy']) + for category in spec['hierarchy']: - category_spec = spec['heirarchy'][category] + category_spec = spec['hierarchy'][category] assert isinstance(category_spec, dict), f"Category {category} in spec must be non-empty" assert 'weight' not in category_spec, "TODO implement manual category weights" domain_weight = 1.0 / len(category_spec) @@ -97,7 +97,7 @@ def generate_bench_cfg(spec, norm, target): if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--specification", required=True, - help="yaml file describing weight heirarchy") + help="yaml file describing weight hierarchy") parser.add_argument("--normalization_data", required=True, help="pytest-benchmark json file used for generating normalization " "values and filling in unspecified benchmark configurations") diff --git a/score/score.yml b/score/score.yml index 982695f3ea..b82d07c78c 100644 --- a/score/score.yml +++ b/score/score.yml @@ -1,5 +1,5 @@ --- -heirarchy: +hierarchy: model: computer vision: # segmentation: