Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add benchmarks to CI #481

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
96 changes: 96 additions & 0 deletions .circleci/config.yml
Expand Up @@ -238,6 +238,39 @@ commands:
- store_artifacts:
path: runs/charlstm/test-reports

benchmark_layers_integration_test:
description: "Runs benchmark end to end"
parameters:
device:
default: "cpu"
type: string
layers:
default: "mha dpmha gsm_dpmha embedding gsm_embedding instancenorm gsm_instancenorm groupnorm gsm_groupnorm layernorm gsm_layernorm lstm dplstm gsm_dplstm rnn dprnn gsm_dprnn linear gsm_linear gru dpgru gsm_dpgru"
type: string
runtime_ratio_threshold:
default: "7.0"
type: string
memory_ratio_threshold:
default: "2.0"
type: string
steps:
- run:
name: benchmarks
command: |
mkdir -p benchmarks/results/raw
echo "Using $(python -V) ($(which python))"
echo "Using $(pip -V) ($(which pip))"
python benchmarks/run_benchmarks.py --batch_size 16 --layers <<parameters.layers>> --config_file ./benchmarks/config.json --root ./benchmarks/results/raw/ --cont
IFS=$' ';layers=(<<parameters.layers>>); rm -rf /tmp/report_layers; mkdir -p /tmp/report_layers; IFS=$'\n'; files=`( echo "${layers[*]}" ) | sed 's/.*/.\/benchmarks\/results\/raw\/&*/'`
cp -v ${files[@]} /tmp/report_layers
report_id=`IFS=$'-'; echo "${layers[*]}"`
python benchmarks/generate_report.py --path-to-results /tmp/report_layers --save-path benchmarks/results/report-${report_id}.csv --format csv
python benchmarks/generate_report.py --path-to-results /tmp/report_layers --save-path benchmarks/results/report-${report_id}.pkl --format pkl
python -c "import pandas as pd; r = pd.read_pickle('./benchmarks/results/report-"$report_id".pkl').fillna(0); th="<<parameters.runtime_ratio_threshold>>"; exit(0) if (r.loc[:, ('runtime', 'dp/control')] < th).all() and (r.loc[:, ('runtime', 'gsm/control')] < th).all() else exit(1)"
python -c "import pandas as pd; r = pd.read_pickle('./benchmarks/results/report-"$report_id".pkl').fillna(0); th="<<parameters.memory_ratio_threshold>>"; exit(0) if (r.loc[:, ('memory', 'dp/control')] < th).all() and (r.loc[:, ('memory', 'gsm/control')] < th).all() else exit(1)"
when: always
- store_artifacts:
path: benchmarks/results/
# -------------------------------------------------------------------------------------
# Jobs
# -------------------------------------------------------------------------------------
Expand Down Expand Up @@ -292,6 +325,7 @@ jobs:
- image: cimg/python:3.7.5
steps:
- checkout
- py_3_7_setup
- pip_dev_install
- mnist_integration_test:
device: "cpu"
Expand All @@ -316,6 +350,66 @@ jobs:
- dcgan_integration_test:
device: "cuda"

micro_benchmarks_py37_torch_release_cuda:
machine:
resource_class: gpu.nvidia.small.multi
image: ubuntu-2004-cuda-11.4:202110-01
steps:
- checkout
- py_3_7_setup
- pip_dev_install
- run_nvidia_smi
- benchmark_layers_integration_test:
device: "cuda"
layers: "groupnorm gsm_groupnorm instancenorm gsm_instancenorm layernorm gsm_layernorm mha dpmha"
runtime_ratio_threshold: "2.6"
memory_ratio_threshold: "1.6"
- benchmark_layers_integration_test:
device: "cuda"
layers: "linear gsm_linear"
runtime_ratio_threshold: "3.6"
memory_ratio_threshold: "13.0"
- benchmark_layers_integration_test:
device: "cuda"
layers: "mha gsm_dpmha"
runtime_ratio_threshold: "3.5"
memory_ratio_threshold: "2.0"
- benchmark_layers_integration_test:
device: "cuda"
layers: "gru dpgru"
runtime_ratio_threshold: "18.5"
memory_ratio_threshold: "1.2"
- benchmark_layers_integration_test:
device: "cuda"
layers: "gru gsm_dpgru"
runtime_ratio_threshold: "40"
memory_ratio_threshold: "1.6"
- benchmark_layers_integration_test:
device: "cuda"
layers: "lstm dplstm"
runtime_ratio_threshold: "16.5"
memory_ratio_threshold: "1.2"
- benchmark_layers_integration_test:
device: "cuda"
layers: "lstm gsm_dplstm"
runtime_ratio_threshold: "38.0"
memory_ratio_threshold: "1.8"
- benchmark_layers_integration_test:
device: "cuda"
layers: "rnn dprnn"
runtime_ratio_threshold: "10.0"
memory_ratio_threshold: "1.2"
- benchmark_layers_integration_test:
device: "cuda"
layers: "rnn gsm_dprnn"
runtime_ratio_threshold: "33.0"
memory_ratio_threshold: "1.2"
- benchmark_layers_integration_test:
device: "cuda"
layers: "embedding gsm_embedding"
runtime_ratio_threshold: "8.0"
memory_ratio_threshold: "15.0"

unittest_multi_gpu:
machine:
resource_class: gpu.nvidia.medium.multi
Expand Down Expand Up @@ -392,6 +486,8 @@ workflows:
filters: *exclude_ghpages
- lint_py37_torch_release:
filters: *exclude_ghpages
- micro_benchmarks_py37_torch_release_cuda:
filters: *exclude_ghpages

website_deployment:
when:
Expand Down
16 changes: 15 additions & 1 deletion benchmarks/README.md
Expand Up @@ -33,7 +33,7 @@ Do this num_runs times:
loss.backward()

Stop timer

Return elapsed time / num_repeats and memory statistics
```

Expand Down Expand Up @@ -107,6 +107,20 @@ optional arguments:
-v, --verbose
```

`generate_report.py` will take as an input the path where `run_benchmarks.py` has written the results and it will generate a report.
```
usage: generate_report.py [-h] [--path-to-results PATH_TO_RESULTS]
[--save-path SAVE_PATH] [--format {csv,pkl}]

optional arguments:
-h, --help show this help message and exit
--path-to-results PATH_TO_RESULTS
the path that `run_benchmarks.py` has saved results
to.
--save-path SAVE_PATH
path to save the output.
--format {csv,pkl} output format
```
## Tests

```python -m pytest tests/```
Expand Down
15 changes: 6 additions & 9 deletions benchmarks/benchmark_layer.py
Expand Up @@ -62,15 +62,12 @@ def run_layer_benchmark(
)

# benchmark.Timer performs its own warmups
try:
timer = benchmark.Timer(
stmt="benchmark_fun()",
globals={"benchmark_fun": benchmark_fun},
num_threads=1,
)
runtime = timer.timeit(num_repeats).mean
except RuntimeError:
runtime = float("nan")
timer = benchmark.Timer(
stmt="benchmark_fun()",
globals={"benchmark_fun": benchmark_fun},
num_threads=1,
)
runtime = timer.timeit(num_repeats).mean

# get max memory allocated and reset memory statistics
memory_stats["max_memory"] = reset_peak_memory_stats(device).prev_max_mem
Expand Down
44 changes: 44 additions & 0 deletions benchmarks/generate_report.py
@@ -0,0 +1,44 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse

from utils import generate_report


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--path-to-results",
default="./results/raw",
type=str,
help="the path that `run_benchmarks.py` has saved results to.",
)
parser.add_argument(
"--save-path",
default="./results/report.csv",
type=str,
help="path to save the output.",
)

parser.add_argument(
"--format",
default="csv",
type=str,
help="output format",
choices=["csv", "pkl"],
)
args = parser.parse_args()

generate_report(args.path_to_results, args.save_path, args.format)
79 changes: 79 additions & 0 deletions benchmarks/utils.py
Expand Up @@ -12,10 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import glob
import pickle
from collections import namedtuple
from typing import Any, Dict, List, Optional

import numpy as np
import pandas as pd
import torch
from layers import LayerType

Expand Down Expand Up @@ -163,3 +166,79 @@ def save_results(
handle,
protocol=pickle.HIGHEST_PROTOCOL,
)


def generate_report(path_to_results: str, save_path: str, format: str) -> None:
"""Generate a report from the benchamrks outcome.
The output is a file whic contains the runtime and memory of each layer.
If multiple layer variants were run (pytorch nn, DP, or GSM).
Then we will compare the performance of both DP and GSM to pytorch.nn.

Args:
path_to_results: the path that `run_benchmarks.py` has saved results to.
save_path: path to save the output.
format: output format : csv or pkl.
"""
path_to_results = (
path_to_results if path_to_results[-1] != "/" else path_to_results[:-1]
)
files = glob.glob(f"{path_to_results}/*")

if len(files) == 0:
raise Exception(f"There were no result files in the path {path_to_results}")

raw_results = []
for result_file in files:
with open(result_file, "rb") as handle:
raw_results.append(pickle.load(handle))

results_dict = []
for raw in raw_results:
runtime = np.mean([i["runtime"] for i in raw["results"]])
memory = np.mean([i["memory_stats"]["max_memory"] for i in raw["results"]])
result = {
"layer": raw["layer"],
"batch_size": raw["batch_size"],
"num_runs": raw["num_runs"],
"num_repeats": raw["num_repeats"],
"forward_only": raw["forward_only"],
"runtime": runtime,
"memory": memory,
}
results_dict.append(result)

results = pd.DataFrame(results_dict)
results["variant"] = "control"
results["variant"][results["layer"].str.startswith("gsm")] = "gsm"
results["variant"][results["layer"].str.startswith("dp")] = "dp"
results["base_layer"] = results["layer"].str.replace("(gsm_)|(dp)", "")

pivot = results.pivot_table(
index=["batch_size", "num_runs", "num_repeats", "forward_only", "base_layer"],
columns=["variant"],
values=["runtime", "memory"],
)

def add_ratio(df, metric, variant):
if variant not in df.columns.get_level_values("variant"):
for ametric in df.columns.get_level_values(0):
df[(ametric, variant)] = np.nan

df[(metric, f"{variant}/control")] = (
df.loc[:, (metric, variant)] / df.loc[:, (metric, "control")]
)

if "control" in results["variant"].tolist():
add_ratio(pivot, "runtime", "dp")
add_ratio(pivot, "memory", "dp")
add_ratio(pivot, "runtime", "gsm")
add_ratio(pivot, "memory", "gsm")
pivot.columns = pivot.columns.set_names("value", level=1)

output = pivot.sort_index(axis=1).sort_values(
["batch_size", "num_runs", "num_repeats", "forward_only"]
)
if format == "csv":
output.to_csv(save_path)
else:
output.to_pickle(save_path)