Skip to content

Commit

Permalink
Run pytorch mobile benchmark in PEP (#28437)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #28437

Add target to build speed_benchmark_torch for PEP.
Added a new argument `--report_pep` to print total runtime information for PEP. Can add per-op stats under this later.

Test Plan: https://our.intern.facebook.com/intern/aibench/details/664440309179004

Reviewed By: hl475

Differential Revision: D18062059

fbshipit-source-id: ca80e980ce8e48604782a15ac44dd8d403832817
  • Loading branch information
supriyar authored and facebook-github-bot committed Oct 22, 2019
1 parent 5f15632 commit 2cc0f1b
Showing 1 changed file with 33 additions and 3 deletions.
36 changes: 33 additions & 3 deletions binaries/speed_benchmark_torch.cc
Expand Up @@ -24,6 +24,9 @@
#include "torch/csrc/jit/import.h"
#include "torch/script.h"

#include <chrono>
using namespace std::chrono;

C10_DEFINE_string(model, "", "The given torch script model to benchmark.");
C10_DEFINE_string(
input_dims,
Expand All @@ -40,6 +43,23 @@ C10_DEFINE_bool(
"Whether to print output with all one input tensor.");
C10_DEFINE_int(warmup, 0, "The number of iterations to warm up.");
C10_DEFINE_int(iter, 10, "The number of iterations to run.");
C10_DEFINE_bool(
report_pep,
false,
"Whether to print performance stats for AI-PEP.");

std::vector<std::string>
split(char separator, const std::string& string, bool ignore_empty = true) {
std::vector<std::string> pieces;
std::stringstream ss(string);
std::string item;
while (getline(ss, item, separator)) {
if (!ignore_empty || !item.empty()) {
pieces.push_back(std::move(item));
}
}
return pieces;
}

int main(int argc, char** argv) {
c10::SetUsageMessage(
Expand All @@ -59,16 +79,16 @@ int main(int argc, char** argv) {
CAFFE_ENFORCE_GE(FLAGS_input_dims.size(), 0, "Input dims must be specified.");
CAFFE_ENFORCE_GE(FLAGS_input_type.size(), 0, "Input type must be specified.");

std::vector<std::string> input_dims_list = caffe2::split(';', FLAGS_input_dims);
std::vector<std::string> input_type_list = caffe2::split(';', FLAGS_input_type);
std::vector<std::string> input_dims_list = split(';', FLAGS_input_dims);
std::vector<std::string> input_type_list = split(';', FLAGS_input_type);
CAFFE_ENFORCE_EQ(
input_dims_list.size(),
input_type_list.size(),
"Input dims and type should have the same number of items.");

std::vector<c10::IValue> inputs;
for (size_t i = 0; i < input_dims_list.size(); ++i) {
auto input_dims_str = caffe2::split(',', input_dims_list[i]);
auto input_dims_str = split(',', input_dims_list[i]);
std::vector<int64_t> input_dims;
for (const auto& s : input_dims_str) {
input_dims.push_back(c10::stoi(s));
Expand Down Expand Up @@ -112,11 +132,21 @@ int main(int argc, char** argv) {
FLAGS_iter,
".");
caffe2::Timer timer;
std::vector<float> times;
auto millis = timer.MilliSeconds();
for (int i = 0; i < FLAGS_iter; ++i) {
auto start = high_resolution_clock::now();
module.forward(inputs);
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
times.push_back(duration.count());
}
millis = timer.MilliSeconds();
if (FLAGS_report_pep) {
for (auto t : times) {
std::cout << "PyTorchObserver {\"type\": \"NET\", \"unit\": \"us\", \"metric\": \"latency\", \"value\": \"" << t << "\"}" << std::endl;
}
}
std::cout << "Main run finished. Milliseconds per iter: "
<< millis / FLAGS_iter
<< ". Iters per second: " << 1000.0 * FLAGS_iter / millis
Expand Down

0 comments on commit 2cc0f1b

Please sign in to comment.