Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sarkars/Upgrade TF #98

Merged
merged 17 commits into from
Jun 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 2 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ endif()
# libngraph_bridge.so
# ...
# /tensorflow
# libtensorflow_framework.so
# libtensorflow_framework.so.1
# python/
# _pywrap....so
# Therefore we are setting two entries in the RPATH:
Expand All @@ -119,12 +119,7 @@ endif()
# Find TensorFlow
find_package(TensorFlow REQUIRED)

add_library(tensorflow_framework_lib SHARED IMPORTED)
set_target_properties(
tensorflow_framework_lib
PROPERTIES IMPORTED_LOCATION
${TensorFlow_DIR}/libtensorflow_framework.so
)


if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(NGRAPH_TF_CXX11_ABI "${TensorFlow_CXX_ABI}")
Expand Down
3 changes: 1 addition & 2 deletions bazel/tf_configure/BUILD.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ cc_library(

cc_library(
name = "libtensorflow_framework",
srcs = [":libtensorflow_framework.so"],
#data = ["lib/libtensorflow_framework.so"],
srcs = [":libtensorflow_framework.so.1"],
visibility = ["//visibility:public"],
)

Expand Down
6 changes: 3 additions & 3 deletions bazel/tf_configure/tf_configure.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,14 @@ def _tf_pip_impl(repository_ctx):
)

tf_shared_library_dir = repository_ctx.os.environ[_TF_SHARED_LIBRARY_DIR]
tf_shared_library_path = "%s/libtensorflow_framework.so" % tf_shared_library_dir
tf_shared_library_path = "%s/libtensorflow_framework.so.1" % tf_shared_library_dir
tf_shared_library_rule = _symlink_genrule_for_dir(
repository_ctx,
None,
"",
"libtensorflow_framework.so",
"libtensorflow_framework.so.1",
[tf_shared_library_path],
["libtensorflow_framework.so"],
["libtensorflow_framework.so.1"],
)

_tpl(repository_ctx, "BUILD", {
Expand Down
2 changes: 1 addition & 1 deletion build_ngtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def main():

# Component versions
ngraph_version = "v0.20.0-rc.0"
tf_version = "v1.13.1"
tf_version = "v1.14.0-rc0"

# Command line parser options
parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter)
Expand Down
10 changes: 9 additions & 1 deletion cmake/FindTensorFlow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,17 @@ endif()
message(STATUS "TensorFlow_VERSION: " ${TensorFlow_VERSION})

# Make sure that the TF library exists
if ( APPLE )
set(TF_LIB_NAME libtensorflow_framework.dylib)
else()
set(TF_LIB_NAME libtensorflow_framework.so.1)
endif()

message(STATUS "TF_LIB: " ${TF_LIB_NAME})

find_library(
TensorFlow_FRAMEWORK_LIBRARY
NAME tensorflow_framework
NAME ${TF_LIB_NAME}
PATHS ${TensorFlow_DIR}
NO_DEFAULT_PATH
)
Expand Down
2 changes: 1 addition & 1 deletion configure_bazel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ rm -f .bazelrc
if python -c "import tensorflow" &> /dev/null; then
echo 'using installed tensorflow'
else
pip install tensorflow==1.13.1
pip install tensorflow==v1.14.0-rc0
pip install tensorflow_estimator
fi

Expand Down
2 changes: 1 addition & 1 deletion logging/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ set(SRC
add_library(${LIB_NAME} ${SRC})
target_link_libraries(
${LIB_NAME}
tensorflow_framework_lib
${TensorFlow_FRAMEWORK_LIBRARY}
)

target_include_directories(${LIB_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
Expand Down
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ message(STATUS "NGRAPH_TF_USE_GRAPPLER_OPTIMIZER: ${NGRAPH_TF_USE_GRAPPLER_OPTIM
add_library(${LIB_NAME} SHARED ${SRC})

target_link_libraries( ${LIB_NAME} ngraph_logger)
target_link_libraries( ${LIB_NAME} tensorflow_framework_lib )
target_link_libraries( ${LIB_NAME} ${TensorFlow_FRAMEWORK_LIBRARY})
target_link_libraries( ${LIB_NAME} ngraph_lib )
target_link_libraries( ${LIB_NAME} absl_algorithm )
target_link_libraries( ${LIB_NAME} absl_container )
Expand Down
2 changes: 1 addition & 1 deletion src/enable_variable_ops/ngraph_var.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class NGraphVar : public ResourceBase {
Tensor* tensor() { return &tf_tensor_; }
shared_ptr<ngraph::runtime::Tensor> ng_tensor() { return ng_tensor_; };

string DebugString() override {
string DebugString() const override {
return strings::StrCat(DataTypeString(tf_tensor_.dtype()), "/",
tf_tensor_.shape().DebugString());
}
Expand Down
46 changes: 41 additions & 5 deletions src/ngraph_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1812,8 +1812,15 @@ static Status TranslateFusedBatchNormOp(
NGRAPH_VLOG(3) << "is_training: " << tf_is_training;

shared_ptr<ng::Node> ng_input, ng_scale, ng_offset, ng_mean, ng_variance;
TF_RETURN_IF_ERROR(GetInputNodes(ng_op_map, op, &ng_input, &ng_scale,
&ng_offset, &ng_mean, &ng_variance));
bool is_v3 = op->type_string() == "FusedBatchNormV3";
if (tf_is_training) {
TF_RETURN_IF_ERROR(GetInputNode(ng_op_map, op, 0, &ng_input));
TF_RETURN_IF_ERROR(GetInputNode(ng_op_map, op, 1, &ng_scale));
TF_RETURN_IF_ERROR(GetInputNode(ng_op_map, op, 2, &ng_offset));
} else {
TF_RETURN_IF_ERROR(GetInputNodes(ng_op_map, op, &ng_input, &ng_scale,
&ng_offset, &ng_mean, &ng_variance));
}

std::string tf_data_format;
TF_RETURN_IF_ERROR(GetNodeAttr(op->attrs(), "data_format", &tf_data_format));
Expand Down Expand Up @@ -1876,12 +1883,32 @@ static Status TranslateFusedBatchNormOp(
//(inverted variance in the cuDNN case), to be reused in the gradient
// computation.
SaveNgOp(ng_op_map, op->name(), ng_variance);
if (is_v3) {
// FusedBatchNormV3 has 6 outputs (reserve_space_3)
shared_ptr<ng::Node> ng_reserved_3 =
ConstructNgNode<ngraph::op::Constant>(
op->name(), ng_mean->get_element_type(), ng::Shape{},
std::vector<std::string>{""});
SaveNgOp(ng_op_map, op->name(), ng_reserved_3);
}
} else {
ng_batch_norm = ConstructNgNode<ng::op::BatchNormInference>(
op->name(), tf_epsilon, ng_scale, ng_offset, ng_input, ng_mean,
ng_variance);
BatchToTensorflow(is_nhwc, ng_batch_norm);
SaveNgOp(ng_op_map, op->name(), ng_batch_norm);
if (is_v3) {
SaveNgOp(ng_op_map, op->name(), ng_mean);
SaveNgOp(ng_op_map, op->name(), ng_variance);
SaveNgOp(ng_op_map, op->name(), ng_mean);
SaveNgOp(ng_op_map, op->name(), ng_variance);
// FusedBatchNormV3 has 6 outputs (reserve_space_3)
shared_ptr<ng::Node> ng_reserved_3 =
ConstructNgNode<ngraph::op::Constant>(
op->name(), ng_mean->get_element_type(), ng::Shape{},
std::vector<std::string>{""});
SaveNgOp(ng_op_map, op->name(), ng_reserved_3);
}
}

return Status::OK();
Expand All @@ -1890,7 +1917,8 @@ static Status TranslateFusedBatchNormOp(
static Status TranslateFusedBatchNormGradOp(
const Node* op, const std::vector<const Tensor*>& static_input_map,
Builder::OpMap& ng_op_map) {
TF_RETURN_IF_ERROR(ValidateInputCount(op, 5));
bool is_v3 = op->type_string() == "FusedBatchNormGradV3";
TF_RETURN_IF_ERROR(ValidateInputCount(op, is_v3 ? 6 : 5));

bool tf_is_training;
// We only support is_training=true case. We marked rejection for the case
Expand All @@ -1908,8 +1936,14 @@ static Status TranslateFusedBatchNormGradOp(
shared_ptr<ng::Node> ng_scale;
shared_ptr<ng::Node> ng_mean;
shared_ptr<ng::Node> ng_variance;
TF_RETURN_IF_ERROR(GetInputNodes(ng_op_map, op, &ng_delta, &ng_input,
&ng_scale, &ng_mean, &ng_variance));
if (is_v3) {
TF_RETURN_IF_ERROR(GetInputNodes(ng_op_map, op, &ng_delta, &ng_input,
&ng_scale, &ng_mean, &ng_variance,
nullptr));
} else {
TF_RETURN_IF_ERROR(GetInputNodes(ng_op_map, op, &ng_delta, &ng_input,
&ng_scale, &ng_mean, &ng_variance));
}

std::string tf_data_format;
TF_RETURN_IF_ERROR(GetNodeAttr(op->attrs(), "data_format", &tf_data_format));
Expand Down Expand Up @@ -4546,7 +4580,9 @@ const static std::map<
{"FloorMod", TranslateFloorModOp},
{"FusedBatchNorm", TranslateFusedBatchNormOp},
{"FusedBatchNormV2", TranslateFusedBatchNormOp},
{"FusedBatchNormV3", TranslateFusedBatchNormOp},
{"FusedBatchNormGrad", TranslateFusedBatchNormGradOp},
{"FusedBatchNormGradV3", TranslateFusedBatchNormGradOp},
{"GatherV2", TranslateGatherV2Op},
{"_FusedConv2D", TranslateFusedConv2DOp},
{"Greater", TranslateBinaryOp<ngraph::op::Greater>},
Expand Down
2 changes: 1 addition & 1 deletion src/ngraph_encapsulate_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class NGraphEncapsulateOp : public OpKernel {
errors::Internal("Did not find graphdef for encapsulate ", flib_key,
" in NGraphClusterManager or function library"));
// TODO: how to convert from functiondef to graphdef. Anything easier?
FunctionBody* fnbody;
std::unique_ptr<FunctionBody> fnbody;
const auto get_func_sig = [&flib](const string& op, const OpDef** sig) {
return flib.LookUpOpDef(op, sig);
};
Expand Down
2 changes: 1 addition & 1 deletion src/ngraph_freshness_tracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class NGraphFreshnessTracker : public ResourceBase {
NGraphFreshnessTracker(const NGraphFreshnessTracker&) = delete;
NGraphFreshnessTracker& operator=(const NGraphFreshnessTracker&) = delete;

std::string DebugString() override { return "FreshnessTracker"; }
std::string DebugString() const override { return "FreshnessTracker"; }

// If freshness_map_ has the base_pointer, then inserts the user function into
// its set of user functions
Expand Down
11 changes: 10 additions & 1 deletion src/ngraph_mark_for_clustering.cc
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,18 @@ Status MarkForClustering(Graph* graph,
SimpleConfirmationFunction();
confirmation_function_map["FusedBatchNormV2"] =
SimpleConfirmationFunction();
confirmation_function_map["FusedBatchNormV3"] =
SimpleConfirmationFunction();
confirmation_function_map["FusedBatchNormGrad"] = [](Node* n,
bool* result) {
TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "is_training", result));
return Status::OK();
};
confirmation_function_map["FusedBatchNormGradV3"] = [](Node* n,
bool* result) {
TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "is_training", result));
return Status::OK();
};
confirmation_function_map["_FusedConv2D"] = SimpleConfirmationFunction();
confirmation_function_map["Greater"] = SimpleConfirmationFunction();
confirmation_function_map["GreaterEqual"] = SimpleConfirmationFunction();
Expand Down Expand Up @@ -435,10 +442,12 @@ Status MarkForClustering(Graph* graph,
type_constraint_map["FloorDiv"]["T"] = NGraphNumericDTypes();
type_constraint_map["FloorMod"]["T"] = NGraphNumericDTypes();
type_constraint_map["FusedBatchNorm"]["T"] = NGraphNumericDTypes();
// TODO (mingshan): FusedBatchNormV2 supports DT_HALF,DT_BFLOAT16,
// TODO (mingshan): FusedBatchNormV2, V3 supports DT_HALF,DT_BFLOAT16,
// DT_FLOAT
type_constraint_map["FusedBatchNormV2"]["T"] = {DT_FLOAT};
type_constraint_map["FusedBatchNormV3"]["T"] = {DT_FLOAT};
type_constraint_map["FusedBatchNormGrad"]["T"] = NGraphNumericDTypes();
type_constraint_map["FusedBatchNormGradV3"]["T"] = NGraphNumericDTypes();
type_constraint_map["GatherV2"]["Tparams"] = NGraphDTypes();
type_constraint_map["GatherV2"]["Tindices"] = NGraphIndexDTypes();
type_constraint_map["GatherV2"]["Taxis"] = NGraphIndexDTypes();
Expand Down
2 changes: 1 addition & 1 deletion src/ngraph_tracked_variable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class NGraphVar : public ResourceBase {
mutex* mu() { return &mu_; }
Tensor* tensor() { return &tensor_; }

string DebugString() override {
string DebugString() const override {
return strings::StrCat(DataTypeString(tensor_.dtype()), "/",
tensor_.shape().DebugString());
}
Expand Down
4 changes: 2 additions & 2 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ add_library(tensorflow_cc_lib SHARED IMPORTED)
set_target_properties(
tensorflow_cc_lib
PROPERTIES IMPORTED_LOCATION
${TensorFlow_SRC_DIR}/bazel-bin/tensorflow/libtensorflow_cc.so
${TensorFlow_SRC_DIR}/bazel-bin/tensorflow/libtensorflow_cc.so.1
)

set(SRC
Expand Down Expand Up @@ -94,7 +94,7 @@ target_link_libraries(
ngraph_lib
libgtest
pthread
tensorflow_framework_lib
${TensorFlow_FRAMEWORK_LIBRARY}
tensorflow_cc_lib
)

Expand Down
6 changes: 3 additions & 3 deletions test/ci/buildkite/ngtf-cpu_centos-grappler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
python3 build_ngtf.py --use_grappler --artifacts /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID

label: ":hammer_and_wrench: Build"
timeout_in_minutes: 30
timeout_in_minutes: 40
agents:
- "queue=cpu-centos"
parallelism: 1
Expand All @@ -27,7 +27,7 @@
- command: |
source /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/venv/bin/activate
pip install -U \
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.14.0rc0-cp36-cp36m-linux_x86_64.whl
PYTHONPATH=`pwd` python3 test/ci/buildkite/test_runner.py \
--artifacts /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID --test_bazel

Expand All @@ -52,7 +52,7 @@
- command: |
source /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/venv/bin/activate
pip install psutil && pip install -U \
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.14.0rc0-cp36-cp36m-linux_x86_64.whl
pip install -U /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/ngraph_tensorflow_bridge-*.whl

label: ":gear: Install"
Expand Down
6 changes: 3 additions & 3 deletions test/ci/buildkite/ngtf-cpu_centos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
python3 build_ngtf.py --artifacts /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID

label: ":hammer_and_wrench: Build"
timeout_in_minutes: 30
timeout_in_minutes: 40
agents:
- "queue=cpu-centos"
parallelism: 1
Expand All @@ -40,7 +40,7 @@
- command: |
source /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/venv/bin/activate
pip install -U \
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.14.0rc0-cp36-cp36m-linux_x86_64.whl
PYTHONPATH=`pwd` python3 test/ci/buildkite/test_runner.py \
--artifacts /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID --test_bazel

Expand All @@ -65,7 +65,7 @@
- command: |
source /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/venv/bin/activate
pip install psutil && pip install -U \
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.14.0rc0-cp36-cp36m-linux_x86_64.whl
pip install -U /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/ngraph_tensorflow_bridge-*.whl

label: ":gear: Install"
Expand Down
12 changes: 6 additions & 6 deletions test/ci/buildkite/ngtf-cpu_ubuntu-bin-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
label: ":gear: Setup"
timeout_in_minutes: 30
agents:
- "queue=cpu"
- "queue=cpu-centos"
parallelism: 1

- wait
Expand All @@ -15,9 +15,9 @@
python3 build_ngtf.py --use_prebuilt_tensorflow --artifacts /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID

label: ":hammer_and_wrench: Build"
timeout_in_minutes: 30
timeout_in_minutes: 40
agents:
- "queue=cpu"
- "queue=cpu-centos"
parallelism: 1

- wait
Expand All @@ -30,7 +30,7 @@
label: ":bazel: Bazel Build"
timeout_in_minutes: 30
agents:
- "queue=cpu"
- "queue=cpu-centos"

- wait

Expand All @@ -43,12 +43,12 @@
label: ":bar_chart: ResNet50"
timeout_in_minutes: 30
agents:
- "queue=cpu"
- "queue=cpu-centos"

- wait
- command: |
rm -rf /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID
label: ":wastebasket: Cleanup"
agents:
- "queue=cpu"
- "queue=cpu-centos"