Skip to content

Commit

Permalink
sarkars/Upgrade TF (#98)
Browse files Browse the repository at this point in the history
  • Loading branch information
sayantan-nervana authored and avijit-nervana committed Jun 6, 2019
1 parent 5876b61 commit fdc5c27
Show file tree
Hide file tree
Showing 31 changed files with 378 additions and 66 deletions.
9 changes: 2 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ endif()
# libngraph_bridge.so
# ...
# /tensorflow
# libtensorflow_framework.so
# libtensorflow_framework.so.1
# python/
# _pywrap....so
# Therefore we are setting two entries in the RPATH:
Expand All @@ -119,12 +119,7 @@ endif()
# Find TensorFlow
find_package(TensorFlow REQUIRED)

add_library(tensorflow_framework_lib SHARED IMPORTED)
set_target_properties(
tensorflow_framework_lib
PROPERTIES IMPORTED_LOCATION
${TensorFlow_DIR}/libtensorflow_framework.so
)


if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(NGRAPH_TF_CXX11_ABI "${TensorFlow_CXX_ABI}")
Expand Down
3 changes: 1 addition & 2 deletions bazel/tf_configure/BUILD.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ cc_library(

cc_library(
name = "libtensorflow_framework",
srcs = [":libtensorflow_framework.so"],
#data = ["lib/libtensorflow_framework.so"],
srcs = [":libtensorflow_framework.so.1"],
visibility = ["//visibility:public"],
)

Expand Down
6 changes: 3 additions & 3 deletions bazel/tf_configure/tf_configure.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,14 @@ def _tf_pip_impl(repository_ctx):
)

tf_shared_library_dir = repository_ctx.os.environ[_TF_SHARED_LIBRARY_DIR]
tf_shared_library_path = "%s/libtensorflow_framework.so" % tf_shared_library_dir
tf_shared_library_path = "%s/libtensorflow_framework.so.1" % tf_shared_library_dir
tf_shared_library_rule = _symlink_genrule_for_dir(
repository_ctx,
None,
"",
"libtensorflow_framework.so",
"libtensorflow_framework.so.1",
[tf_shared_library_path],
["libtensorflow_framework.so"],
["libtensorflow_framework.so.1"],
)

_tpl(repository_ctx, "BUILD", {
Expand Down
2 changes: 1 addition & 1 deletion build_ngtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def main():

# Component versions
ngraph_version = "v0.20.0-rc.0"
tf_version = "v1.13.1"
tf_version = "v1.14.0-rc0"

# Command line parser options
parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter)
Expand Down
10 changes: 9 additions & 1 deletion cmake/FindTensorFlow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,17 @@ endif()
message(STATUS "TensorFlow_VERSION: " ${TensorFlow_VERSION})

# Make sure that the TF library exists
if ( APPLE )
set(TF_LIB_NAME libtensorflow_framework.dylib)
else()
set(TF_LIB_NAME libtensorflow_framework.so.1)
endif()

message(STATUS "TF_LIB: " ${TF_LIB_NAME})

find_library(
TensorFlow_FRAMEWORK_LIBRARY
NAME tensorflow_framework
NAME ${TF_LIB_NAME}
PATHS ${TensorFlow_DIR}
NO_DEFAULT_PATH
)
Expand Down
2 changes: 1 addition & 1 deletion configure_bazel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ rm -f .bazelrc
if python -c "import tensorflow" &> /dev/null; then
echo 'using installed tensorflow'
else
pip install tensorflow==1.13.1
pip install tensorflow==v1.14.0-rc0
pip install tensorflow_estimator
fi

Expand Down
2 changes: 1 addition & 1 deletion logging/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ set(SRC
add_library(${LIB_NAME} ${SRC})
target_link_libraries(
${LIB_NAME}
tensorflow_framework_lib
${TensorFlow_FRAMEWORK_LIBRARY}
)

target_include_directories(${LIB_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
Expand Down
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ message(STATUS "NGRAPH_TF_USE_GRAPPLER_OPTIMIZER: ${NGRAPH_TF_USE_GRAPPLER_OPTIM
add_library(${LIB_NAME} SHARED ${SRC})

target_link_libraries( ${LIB_NAME} ngraph_logger)
target_link_libraries( ${LIB_NAME} tensorflow_framework_lib )
target_link_libraries( ${LIB_NAME} ${TensorFlow_FRAMEWORK_LIBRARY})
target_link_libraries( ${LIB_NAME} ngraph_lib )
target_link_libraries( ${LIB_NAME} absl_algorithm )
target_link_libraries( ${LIB_NAME} absl_container )
Expand Down
2 changes: 1 addition & 1 deletion src/enable_variable_ops/ngraph_var.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class NGraphVar : public ResourceBase {
Tensor* tensor() { return &tf_tensor_; }
shared_ptr<ngraph::runtime::Tensor> ng_tensor() { return ng_tensor_; };

string DebugString() override {
string DebugString() const override {
return strings::StrCat(DataTypeString(tf_tensor_.dtype()), "/",
tf_tensor_.shape().DebugString());
}
Expand Down
46 changes: 41 additions & 5 deletions src/ngraph_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1812,8 +1812,15 @@ static Status TranslateFusedBatchNormOp(
NGRAPH_VLOG(3) << "is_training: " << tf_is_training;

shared_ptr<ng::Node> ng_input, ng_scale, ng_offset, ng_mean, ng_variance;
TF_RETURN_IF_ERROR(GetInputNodes(ng_op_map, op, &ng_input, &ng_scale,
&ng_offset, &ng_mean, &ng_variance));
bool is_v3 = op->type_string() == "FusedBatchNormV3";
if (tf_is_training) {
TF_RETURN_IF_ERROR(GetInputNode(ng_op_map, op, 0, &ng_input));
TF_RETURN_IF_ERROR(GetInputNode(ng_op_map, op, 1, &ng_scale));
TF_RETURN_IF_ERROR(GetInputNode(ng_op_map, op, 2, &ng_offset));
} else {
TF_RETURN_IF_ERROR(GetInputNodes(ng_op_map, op, &ng_input, &ng_scale,
&ng_offset, &ng_mean, &ng_variance));
}

std::string tf_data_format;
TF_RETURN_IF_ERROR(GetNodeAttr(op->attrs(), "data_format", &tf_data_format));
Expand Down Expand Up @@ -1876,12 +1883,32 @@ static Status TranslateFusedBatchNormOp(
//(inverted variance in the cuDNN case), to be reused in the gradient
// computation.
SaveNgOp(ng_op_map, op->name(), ng_variance);
if (is_v3) {
// FusedBatchNormV3 has 6 outputs (reserve_space_3)
shared_ptr<ng::Node> ng_reserved_3 =
ConstructNgNode<ngraph::op::Constant>(
op->name(), ng_mean->get_element_type(), ng::Shape{},
std::vector<std::string>{""});
SaveNgOp(ng_op_map, op->name(), ng_reserved_3);
}
} else {
ng_batch_norm = ConstructNgNode<ng::op::BatchNormInference>(
op->name(), tf_epsilon, ng_scale, ng_offset, ng_input, ng_mean,
ng_variance);
BatchToTensorflow(is_nhwc, ng_batch_norm);
SaveNgOp(ng_op_map, op->name(), ng_batch_norm);
if (is_v3) {
SaveNgOp(ng_op_map, op->name(), ng_mean);
SaveNgOp(ng_op_map, op->name(), ng_variance);
SaveNgOp(ng_op_map, op->name(), ng_mean);
SaveNgOp(ng_op_map, op->name(), ng_variance);
// FusedBatchNormV3 has 6 outputs (reserve_space_3)
shared_ptr<ng::Node> ng_reserved_3 =
ConstructNgNode<ngraph::op::Constant>(
op->name(), ng_mean->get_element_type(), ng::Shape{},
std::vector<std::string>{""});
SaveNgOp(ng_op_map, op->name(), ng_reserved_3);
}
}

return Status::OK();
Expand All @@ -1890,7 +1917,8 @@ static Status TranslateFusedBatchNormOp(
static Status TranslateFusedBatchNormGradOp(
const Node* op, const std::vector<const Tensor*>& static_input_map,
Builder::OpMap& ng_op_map) {
TF_RETURN_IF_ERROR(ValidateInputCount(op, 5));
bool is_v3 = op->type_string() == "FusedBatchNormGradV3";
TF_RETURN_IF_ERROR(ValidateInputCount(op, is_v3 ? 6 : 5));

bool tf_is_training;
// We only support is_training=true case. We marked rejection for the case
Expand All @@ -1908,8 +1936,14 @@ static Status TranslateFusedBatchNormGradOp(
shared_ptr<ng::Node> ng_scale;
shared_ptr<ng::Node> ng_mean;
shared_ptr<ng::Node> ng_variance;
TF_RETURN_IF_ERROR(GetInputNodes(ng_op_map, op, &ng_delta, &ng_input,
&ng_scale, &ng_mean, &ng_variance));
if (is_v3) {
TF_RETURN_IF_ERROR(GetInputNodes(ng_op_map, op, &ng_delta, &ng_input,
&ng_scale, &ng_mean, &ng_variance,
nullptr));
} else {
TF_RETURN_IF_ERROR(GetInputNodes(ng_op_map, op, &ng_delta, &ng_input,
&ng_scale, &ng_mean, &ng_variance));
}

std::string tf_data_format;
TF_RETURN_IF_ERROR(GetNodeAttr(op->attrs(), "data_format", &tf_data_format));
Expand Down Expand Up @@ -4546,7 +4580,9 @@ const static std::map<
{"FloorMod", TranslateFloorModOp},
{"FusedBatchNorm", TranslateFusedBatchNormOp},
{"FusedBatchNormV2", TranslateFusedBatchNormOp},
{"FusedBatchNormV3", TranslateFusedBatchNormOp},
{"FusedBatchNormGrad", TranslateFusedBatchNormGradOp},
{"FusedBatchNormGradV3", TranslateFusedBatchNormGradOp},
{"GatherV2", TranslateGatherV2Op},
{"_FusedConv2D", TranslateFusedConv2DOp},
{"Greater", TranslateBinaryOp<ngraph::op::Greater>},
Expand Down
2 changes: 1 addition & 1 deletion src/ngraph_encapsulate_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class NGraphEncapsulateOp : public OpKernel {
errors::Internal("Did not find graphdef for encapsulate ", flib_key,
" in NGraphClusterManager or function library"));
// TODO: how to convert from functiondef to graphdef. Anything easier?
FunctionBody* fnbody;
std::unique_ptr<FunctionBody> fnbody;
const auto get_func_sig = [&flib](const string& op, const OpDef** sig) {
return flib.LookUpOpDef(op, sig);
};
Expand Down
2 changes: 1 addition & 1 deletion src/ngraph_freshness_tracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class NGraphFreshnessTracker : public ResourceBase {
NGraphFreshnessTracker(const NGraphFreshnessTracker&) = delete;
NGraphFreshnessTracker& operator=(const NGraphFreshnessTracker&) = delete;

std::string DebugString() override { return "FreshnessTracker"; }
std::string DebugString() const override { return "FreshnessTracker"; }

// If freshness_map_ has the base_pointer, then inserts the user function into
// its set of user functions
Expand Down
11 changes: 10 additions & 1 deletion src/ngraph_mark_for_clustering.cc
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,18 @@ Status MarkForClustering(Graph* graph,
SimpleConfirmationFunction();
confirmation_function_map["FusedBatchNormV2"] =
SimpleConfirmationFunction();
confirmation_function_map["FusedBatchNormV3"] =
SimpleConfirmationFunction();
confirmation_function_map["FusedBatchNormGrad"] = [](Node* n,
bool* result) {
TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "is_training", result));
return Status::OK();
};
confirmation_function_map["FusedBatchNormGradV3"] = [](Node* n,
bool* result) {
TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "is_training", result));
return Status::OK();
};
confirmation_function_map["_FusedConv2D"] = SimpleConfirmationFunction();
confirmation_function_map["Greater"] = SimpleConfirmationFunction();
confirmation_function_map["GreaterEqual"] = SimpleConfirmationFunction();
Expand Down Expand Up @@ -435,10 +442,12 @@ Status MarkForClustering(Graph* graph,
type_constraint_map["FloorDiv"]["T"] = NGraphNumericDTypes();
type_constraint_map["FloorMod"]["T"] = NGraphNumericDTypes();
type_constraint_map["FusedBatchNorm"]["T"] = NGraphNumericDTypes();
// TODO (mingshan): FusedBatchNormV2 supports DT_HALF,DT_BFLOAT16,
// TODO (mingshan): FusedBatchNormV2, V3 supports DT_HALF,DT_BFLOAT16,
// DT_FLOAT
type_constraint_map["FusedBatchNormV2"]["T"] = {DT_FLOAT};
type_constraint_map["FusedBatchNormV3"]["T"] = {DT_FLOAT};
type_constraint_map["FusedBatchNormGrad"]["T"] = NGraphNumericDTypes();
type_constraint_map["FusedBatchNormGradV3"]["T"] = NGraphNumericDTypes();
type_constraint_map["GatherV2"]["Tparams"] = NGraphDTypes();
type_constraint_map["GatherV2"]["Tindices"] = NGraphIndexDTypes();
type_constraint_map["GatherV2"]["Taxis"] = NGraphIndexDTypes();
Expand Down
2 changes: 1 addition & 1 deletion src/ngraph_tracked_variable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class NGraphVar : public ResourceBase {
mutex* mu() { return &mu_; }
Tensor* tensor() { return &tensor_; }

string DebugString() override {
string DebugString() const override {
return strings::StrCat(DataTypeString(tensor_.dtype()), "/",
tensor_.shape().DebugString());
}
Expand Down
4 changes: 2 additions & 2 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ add_library(tensorflow_cc_lib SHARED IMPORTED)
set_target_properties(
tensorflow_cc_lib
PROPERTIES IMPORTED_LOCATION
${TensorFlow_SRC_DIR}/bazel-bin/tensorflow/libtensorflow_cc.so
${TensorFlow_SRC_DIR}/bazel-bin/tensorflow/libtensorflow_cc.so.1
)

set(SRC
Expand Down Expand Up @@ -94,7 +94,7 @@ target_link_libraries(
ngraph_lib
libgtest
pthread
tensorflow_framework_lib
${TensorFlow_FRAMEWORK_LIBRARY}
tensorflow_cc_lib
)

Expand Down
6 changes: 3 additions & 3 deletions test/ci/buildkite/ngtf-cpu_centos-grappler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
python3 build_ngtf.py --use_grappler --artifacts /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID
label: ":hammer_and_wrench: Build"
timeout_in_minutes: 30
timeout_in_minutes: 40
agents:
- "queue=cpu-centos"
parallelism: 1
Expand All @@ -27,7 +27,7 @@
- command: |
source /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/venv/bin/activate
pip install -U \
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.14.0rc0-cp36-cp36m-linux_x86_64.whl
PYTHONPATH=`pwd` python3 test/ci/buildkite/test_runner.py \
--artifacts /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID --test_bazel
Expand All @@ -52,7 +52,7 @@
- command: |
source /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/venv/bin/activate
pip install psutil && pip install -U \
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.14.0rc0-cp36-cp36m-linux_x86_64.whl
pip install -U /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/ngraph_tensorflow_bridge-*.whl
label: ":gear: Install"
Expand Down
6 changes: 3 additions & 3 deletions test/ci/buildkite/ngtf-cpu_centos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
python3 build_ngtf.py --artifacts /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID
label: ":hammer_and_wrench: Build"
timeout_in_minutes: 30
timeout_in_minutes: 40
agents:
- "queue=cpu-centos"
parallelism: 1
Expand All @@ -40,7 +40,7 @@
- command: |
source /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/venv/bin/activate
pip install -U \
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.14.0rc0-cp36-cp36m-linux_x86_64.whl
PYTHONPATH=`pwd` python3 test/ci/buildkite/test_runner.py \
--artifacts /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID --test_bazel
Expand All @@ -65,7 +65,7 @@
- command: |
source /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/venv/bin/activate
pip install psutil && pip install -U \
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl
/localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/tensorflow/tensorflow-1.14.0rc0-cp36-cp36m-linux_x86_64.whl
pip install -U /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID/ngraph_tensorflow_bridge-*.whl
label: ":gear: Install"
Expand Down
12 changes: 6 additions & 6 deletions test/ci/buildkite/ngtf-cpu_ubuntu-bin-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
label: ":gear: Setup"
timeout_in_minutes: 30
agents:
- "queue=cpu"
- "queue=cpu-centos"
parallelism: 1
- wait
Expand All @@ -15,9 +15,9 @@
python3 build_ngtf.py --use_prebuilt_tensorflow --artifacts /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID
label: ":hammer_and_wrench: Build"
timeout_in_minutes: 30
timeout_in_minutes: 40
agents:
- "queue=cpu"
- "queue=cpu-centos"
parallelism: 1
- wait
Expand All @@ -30,7 +30,7 @@
label: ":bazel: Bazel Build"
timeout_in_minutes: 30
agents:
- "queue=cpu"
- "queue=cpu-centos"
- wait

Expand All @@ -43,12 +43,12 @@
label: ":bar_chart: ResNet50"
timeout_in_minutes: 30
agents:
- "queue=cpu"
- "queue=cpu-centos"
- wait
- command: |
rm -rf /localdisk/buildkite/artifacts/$BUILDKITE_BUILD_ID
label: ":wastebasket: Cleanup"
agents:
- "queue=cpu"
- "queue=cpu-centos"

0 comments on commit fdc5c27

Please sign in to comment.