forked from pytorch/opacus
-
Notifications
You must be signed in to change notification settings - Fork 0
/
benchmark_layer.py
121 lines (101 loc) · 3.68 KB
/
benchmark_layer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import json
from typing import Callable, Dict, Tuple
import torch
import torch.utils.benchmark as benchmark
from layers import LayerFactory, LayerType
from utils import get_layer_set, reset_peak_memory_stats
def run_layer_benchmark(
num_repeats: int,
forward_only: bool = False,
create_layer: Callable = LayerFactory.create,
**kwargs,
) -> Tuple[float, Dict[str, int]]:
"""Benchmarks a single layer for runtime and CUDA memory (if applicable).
Args:
num_repeats: how many times to repeat the forward(/backward) pass
forward_only: whether to skip the backward pass
create_layer: function for creating the layer, takes **kwargs
Returns: a tuple consisting of
Runtime as a float, and
Memory statistics as a dict
"""
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
assert reset_peak_memory_stats(device).cur_mem == 0
# setup layer
layer_fun = create_layer(**kwargs)
if forward_only:
layer_fun.module.eval()
benchmark_fun = layer_fun.forward_only
else:
layer_fun.module.train()
benchmark_fun = layer_fun.forward_backward
# move layer to device and get memory statistics
memory_stats = layer_fun.to(device=device)
assert sum(v for _, v in memory_stats.items()) == torch.cuda.memory_allocated(
device
)
# benchmark.Timer performs its own warmups
timer = benchmark.Timer(
stmt="benchmark_fun()",
globals={"benchmark_fun": benchmark_fun},
num_threads=1,
)
runtime = timer.timeit(num_repeats).mean
# get max memory allocated and reset memory statistics
memory_stats["max_memory"] = reset_peak_memory_stats(device).prev_max_mem
return runtime, memory_stats
def main(args) -> None:
with open(args.config_file) as config_file:
config = json.load(config_file)
runtime, memory_stats = run_layer_benchmark(
num_repeats=args.num_repeats,
forward_only=args.forward_only,
layer_name=args.layer,
batch_size=args.batch_size,
random_seed=args.random_seed,
**config[get_layer_set(args.layer)],
)
print(f"Runtime (seconds): {runtime}")
print(f"Memory statistics (bytes): {memory_stats}")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"layer",
type=str,
choices=[v for k, v in LayerType.__dict__.items() if not k.startswith("__")],
)
parser.add_argument("--batch_size", default=64, type=int)
parser.add_argument(
"--num_repeats",
default=20,
type=int,
help="number of forward/backward passes to run",
)
parser.add_argument(
"--forward_only", action="store_true", help="only run forward passes"
)
parser.add_argument("--random_seed", default=0, type=int)
parser.add_argument(
"-c",
"--config_file",
default="config.json",
type=str,
help="path to config file with settings for each layer",
)
args = parser.parse_args()
main(args)