Skip to content

Commit

Permalink
Some profiler fixes and cleanup.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 168069346
  • Loading branch information
tensorflower-gardener committed Sep 9, 2017
1 parent 0753b0c commit b76565b
Show file tree
Hide file tree
Showing 13 changed files with 115 additions and 50 deletions.
2 changes: 1 addition & 1 deletion tensorflow/core/profiler/README.md
Expand Up @@ -216,7 +216,7 @@ seq2seq_attention_model.py:363:build_graph:self._add_train_o..., cpu: 1.28sec, a
```shell
# The following example generates a timeline.
tfprof> graph -step 0 -max_depth 100000 -output timeline:outfile=<filename>
tfprof> graph -step -1 -max_depth 100000 -output timeline:outfile=<filename>

generating trace file.

Expand Down
7 changes: 6 additions & 1 deletion tensorflow/core/profiler/g3doc/command_line.md
Expand Up @@ -14,7 +14,12 @@

### Command Line Inputs

tfprof command line tool uses the following inputs:
tfprof command line tool uses the following input:

<b>--profile_path:</b> A ProfileProto binary proto file.
See QuickStart on generating the file.

<b>THE OLD WAY BELOW IS DEPRECATED:</b>

<b>--graph_path:</b> GraphDef proto file (required). Used to build in-memory
data structure of the model. For example, graph.pbtxt written by tf.Supervisor
Expand Down
1 change: 0 additions & 1 deletion tensorflow/core/profiler/internal/print_model_analysis.cc
Expand Up @@ -84,7 +84,6 @@ string RunProfile(const string& command, const string& options,
} // namespace

bool NewProfiler(const string* graph, const string* op_log) {
CHECK(!tf_stat) << "Currently only 1 living tfprof profiler is allowed";
CHECK(graph) << "graph mustn't be null";
std::unique_ptr<GraphDef> graph_ptr(new GraphDef());
if (!graph_ptr->ParseFromString(*graph)) {
Expand Down
87 changes: 58 additions & 29 deletions tensorflow/core/profiler/internal/tfprof_node.h
Expand Up @@ -175,22 +175,22 @@ class ExecStep {
std::map<int32, std::pair<int64, uint64>> output_memory_;
};

#define GRAPH_NODE_BYTES(type) \
do { \
if (execs_.empty()) { \
return 0; \
} \
if (step >= 0) { \
auto exec = execs_.find(step); \
CHECK(exec != execs_.end()) << "unknown step " << step; \
return exec->second.type##_bytes(); \
} \
\
int64 bytes = 0; \
for (const auto& exec : execs_) { \
bytes += exec.second.type##_bytes(); \
} \
return bytes / execs_.size(); \
#define GRAPH_NODE_BYTES(type) \
do { \
if (execs_.empty()) { \
return 0; \
} \
if (step >= 0) { \
auto exec = execs_.find(step); \
if (exec == execs_.end()) return 0; \
return exec->second.type##_bytes(); \
} \
\
int64 bytes = 0; \
for (const auto& exec : execs_) { \
bytes += exec.second.type##_bytes(); \
} \
return bytes / execs_.size(); \
} while (0)

class TFGraphNode {
Expand Down Expand Up @@ -372,7 +372,9 @@ class TFGraphNode {
}
if (step >= 0) {
auto exec = execs_.find(step);
CHECK(exec != execs_.end());
if (exec == execs_.end()) {
return 0;
}
return exec->second.run_count();
}
int64 total_run_count = 0;
Expand All @@ -390,7 +392,9 @@ class TFGraphNode {
}
if (step >= 0) {
auto exec = execs_.find(step);
CHECK(exec != execs_.end());
if (exec == execs_.end()) {
return 0;
}
return exec->second.exec_micros();
}

Expand All @@ -410,7 +414,9 @@ class TFGraphNode {
}
if (step >= 0) {
auto exec = execs_.find(step);
CHECK(exec != execs_.end());
if (exec == execs_.end()) {
return 0;
}
return exec->second.accelerator_exec_micros();
}

Expand All @@ -430,7 +436,9 @@ class TFGraphNode {
}
if (step >= 0) {
auto exec = execs_.find(step);
CHECK(exec != execs_.end());
if (exec == execs_.end()) {
return 0;
}
return exec->second.cpu_exec_micros();
}

Expand All @@ -448,54 +456,72 @@ class TFGraphNode {

int64 all_start_micros(int64 step) const {
auto exec = execs_.find(step);
CHECK(exec != execs_.end()) << "unknown step " << step;
if (exec == execs_.end()) {
return 0;
}
return exec->second.all_start_micros();
}

int64 latest_end_micros(int64 step) const {
auto exec = execs_.find(step);
CHECK(exec != execs_.end()) << "unknown step " << step;
if (exec == execs_.end()) {
return 0;
}
return exec->second.latest_end_micros();
}

const std::map<string, std::vector<std::pair<int64, int64>>>& op_execs(
int64 step) const {
auto exec = execs_.find(step);
CHECK(exec != execs_.end()) << "unknown step " << step;
if (exec == execs_.end()) {
return empty_op_execs_;
}
return exec->second.op_execs();
}

const std::map<int64, ExecStep>& all_op_execs() const { return execs_; }

int64 accelerator_temp_bytes(int64 step) const {
auto exec = execs_.find(step);
CHECK(exec != execs_.end()) << "unknown step " << step;
if (exec == execs_.end()) {
return 0;
}
return exec->second.accelerator_temp_bytes();
}
int64 host_temp_bytes(int64 step) const {
auto exec = execs_.find(step);
CHECK(exec != execs_.end()) << "unknown step " << step;
if (exec == execs_.end()) {
return 0;
}
return exec->second.host_temp_bytes();
}
int64 accelerator_persistent_bytes(int64 step) const {
auto exec = execs_.find(step);
CHECK(exec != execs_.end()) << "unknown step " << step;
if (exec == execs_.end()) {
return 0;
}
return exec->second.accelerator_persistent_bytes();
}
int64 host_persistent_bytes(int64 step) const {
auto exec = execs_.find(step);
CHECK(exec != execs_.end()) << "unknown step " << step;
if (exec == execs_.end()) {
return 0;
}
return exec->second.host_persistent_bytes();
}
const std::map<int32, std::pair<int64, uint64>>& output_memory(
int64 step) const {
auto exec = execs_.find(step);
CHECK(exec != execs_.end()) << "unknown step " << step;
if (exec == execs_.end()) {
return empty_output_memory_;
}
return exec->second.output_memory();
}
int64 allocator_bytes_in_use(int64 step) const {
auto exec = execs_.find(step);
CHECK(exec != execs_.end()) << "unknown step " << step;
if (exec == execs_.end()) {
return 0;
}
return exec->second.allocator_bytes_in_use();
}

Expand Down Expand Up @@ -566,6 +592,9 @@ class TFGraphNode {
std::set<string> op_types_;

std::map<int64, ExecStep> execs_;

std::map<int32, std::pair<int64, uint64>> empty_output_memory_;
std::map<string, std::vector<std::pair<int64, int64>>> empty_op_execs_;
};

class TFMultiGraphNode {
Expand Down
36 changes: 30 additions & 6 deletions tensorflow/core/profiler/internal/tfprof_stats.cc
Expand Up @@ -88,6 +88,9 @@ TFStats::TFStats(const string& filename,
node_pb.second.name(), std::move(node)));
}
has_code_traces_ = profile.has_trace();
for (int64 s : profile.steps()) {
steps_.insert(s);
}
}

void TFStats::BuildView(const string& cmd) {
Expand Down Expand Up @@ -136,6 +139,14 @@ const GraphNodeProto& TFStats::ShowGraphNode(const string& cmd,
if (cmd == kCmds[0]) {
return scope_view_->Show(opts);
} else if (cmd == kCmds[1]) {
if (opts.step < 0 && opts.output_type == kOutput[0]) {
for (int64 step : steps_) {
Options nopts = opts;
nopts.step = step;
graph_view_->Show(nopts);
}
return empty_graph_node_;
}
return graph_view_->Show(opts);
} else {
fprintf(stderr, "Unknown command: %s\n", cmd.c_str());
Expand All @@ -148,7 +159,11 @@ const MultiGraphNodeProto& TFStats::ShowMultiGraphNode(
if (!Validate(opts)) {
return empty_multi_graph_node_;
}
if (cmd == kCmds[2] && has_code_traces()) {
if (cmd == kCmds[2]) {
if (!has_code_traces()) {
fprintf(stderr, "No code trace information\n");
return empty_multi_graph_node_;
}
return code_view_->Show(opts);
} else if (cmd == kCmds[3]) {
return op_view_->Show(opts);
Expand Down Expand Up @@ -212,7 +227,9 @@ void TFStats::AddOpLogProto(std::unique_ptr<OpLogProto> op_log) {
}
if (entry.has_code_def()) {
has_code_traces_ = true;
node->second->AddCode(entry.code_def());
if (node->second->code().traces_size() == 0) {
node->second->AddCode(entry.code_def());
}
}
}
}
Expand Down Expand Up @@ -258,9 +275,11 @@ void TFStats::WriteProfile(const string& filename) {
}
(*profile.mutable_nodes())[it->second->id()].MergeFrom(
it->second->ToProto(nodes_map_));
if (it->second->code().traces_size() > 0) {
profile.set_has_trace(true);
}
}

profile.set_has_trace(has_code_traces_);
for (int64 s : steps_) {
profile.add_steps(s);
}
Status s =
WriteStringToFile(Env::Default(), filename, profile.SerializeAsString());
Expand All @@ -271,7 +290,12 @@ void TFStats::WriteProfile(const string& filename) {

bool TFStats::Validate(const Options& opts) const {
if (opts.step >= 0 && steps_.find(opts.step) == steps_.end()) {
fprintf(stderr, "Options -step=%lld not found\n", opts.step);
fprintf(stderr,
"Options -step=%lld not found.\nAvailable steps: ", opts.step);
for (int64 s : steps_) {
fprintf(stderr, "%lld ", s);
}
fprintf(stderr, "\n");
return false;
}
return true;
Expand Down
8 changes: 5 additions & 3 deletions tensorflow/core/profiler/internal/tfprof_timeline.cc
Expand Up @@ -19,6 +19,7 @@ limitations under the License.

#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/lib/strings/stringprintf.h"
#include "tensorflow/core/profiler/internal/tfprof_utils.h"

namespace tensorflow {
Expand Down Expand Up @@ -303,19 +304,20 @@ void Timeline::GenerateCodeTimeline(const CodeNode* node) {
}

void Timeline::OutputTimeline() {
string outfile = strings::Printf("%s_%lld", outfile_.c_str(), step());
Status s =
WriteStringToFile(Env::Default(), outfile_, chrome_formatter_.Format());
WriteStringToFile(Env::Default(), outfile, chrome_formatter_.Format());
if (!s.ok()) {
fprintf(stderr, "Failed to write timeline file: %s\nError: %s\n",
outfile_.c_str(), s.ToString().c_str());
outfile.c_str(), s.ToString().c_str());
return;
}
fprintf(stdout, "\n******************************************************\n");
fprintf(stdout,
"Timeline file is written to %s.\n"
"Open a Chrome browser, enter URL chrome://tracing and "
"load the timeline file.",
outfile_.c_str());
outfile.c_str());
fprintf(stdout, "\n******************************************************\n");
fflush(stdout);
}
Expand Down
4 changes: 2 additions & 2 deletions tensorflow/core/profiler/internal/tfprof_timeline_test.cc
Expand Up @@ -70,7 +70,7 @@ TEST_F(TFProfTimelineTest, GraphView) {
tf_stats_->ShowGraphNode("graph", opts);

string dump_str;
TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file, &dump_str));
TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file + "_0", &dump_str));
EXPECT_EQ(1754536562981488144ull, Hash64(dump_str));
}

Expand All @@ -84,7 +84,7 @@ TEST_F(TFProfTimelineTest, ScopeView) {
tf_stats_->ShowGraphNode("scope", opts);

string dump_str;
TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file, &dump_str));
TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file + "_0", &dump_str));
EXPECT_EQ(17545174915963890413ull, Hash64(dump_str));
}

Expand Down
2 changes: 2 additions & 0 deletions tensorflow/core/profiler/tfprof_log.proto
Expand Up @@ -42,6 +42,8 @@ message ProfileProto {
map<int64, ProfileNode> nodes = 1;
// Whether or not has code traces.
bool has_trace = 2;
// Traced steps.
repeated int64 steps = 3;
}

message ProfileNode {
Expand Down
2 changes: 2 additions & 0 deletions tensorflow/python/profiler/internal/run_metadata_test.py
Expand Up @@ -70,6 +70,7 @@ def _run_model():
opts = builder.time_and_memory()
opts['min_micros'] = 0
opts['min_bytes'] = 0
opts['order_by'] = 'name'
opts['output'] = 'none'
_ = sess.run(y,
options=config_pb2.RunOptions(
Expand All @@ -95,6 +96,7 @@ def _run_loop_model():
run_metadata=run_meta)

opts = builder.time_and_memory()
opts['order_by'] = 'name'
opts['output'] = 'none'

tfprof_node = model_analyzer.profile(
Expand Down
3 changes: 1 addition & 2 deletions tensorflow/python/profiler/model_analyzer.py
Expand Up @@ -180,8 +180,7 @@ def add_step(self, step, run_meta):
"""
# pylint: disable=protected-access
op_log = tfprof_logger._merge_default_with_oplog(
self._graph, run_meta=run_meta, add_trace=False,
add_trainable_var=False)
self._graph, run_meta=run_meta)
# pylint: enable=protected-access
# TODO(xpan): P1: Better to find the current graph.
print_mdl.AddStep(
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/python/profiler/model_analyzer_test.py
Expand Up @@ -305,7 +305,7 @@ def testTimeline(self):
_ = model_analyzer.profile(
sess.graph, run_meta, cmd='graph', options=opts)

with gfile.Open(outfile, 'r') as f:
with gfile.Open(outfile + '_0', 'r') as f:
# Test that a json file is created.
# TODO(xpan): tfprof Timeline isn't quite correct on Windows.
# Investigate why.
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/python/profiler/option_builder.py
Expand Up @@ -177,7 +177,7 @@ def time_and_memory(min_micros=1, min_bytes=1, min_accelerator_micros=0,
'min_params': 0,
'min_float_ops': 0,
'min_occurrence': 0,
'order_by': 'name',
'order_by': 'micros',
'account_type_regexes': ['.*'],
'start_name_regexes': ['.*'],
'trim_name_regexes': [],
Expand Down

0 comments on commit b76565b

Please sign in to comment.