diff --git a/README.md b/README.md index c97ed26c..594fb5a9 100644 --- a/README.md +++ b/README.md @@ -46,38 +46,6 @@ To compile and use TensorFlow Data Validation, you need to set up some prerequis If bazel is not installed on your system, install it now by following [these directions](https://bazel.build/versions/master/docs/install.html). -#### Packages - -To install TensorFlow Data Validation dependencies, execute the following: - - -```shell -sudo apt-get update && sudo apt-get install -y \ - automake \ - build-essential \ - curl \ - libcurl3-dev \ - git \ - libtool \ - libfreetype6-dev \ - libpng12-dev \ - libzmq3-dev \ - pkg-config \ - python-dev \ - python-numpy \ - python-pip \ - software-properties-common \ - swig \ - zip \ - zlib1g-dev -``` - -The list of packages needed to build TensorFlow changes over time, so if you -encounter any issues, refer TensorFlow's [build -instructions](https://www.tensorflow.org/install/install_sources). Pay -particular attention to `apt-get install` and `pip install` commands which you -may need to run. - ### 2. Clone the TensorFlow Data Validation repository ```shell @@ -94,17 +62,17 @@ pass `-b ` to the `git clone` command. TensorFlow Data Validation uses Bazel to build. Use Bazel commands to build individual targets or the entire source tree. -To build the entire tree, execute: +To build the Python wrappers for the C++ modules, execute: ```shell -bazel build -c opt tensorflow_data_validation/anomalies/... +bazel build -c opt tensorflow_data_validation/anomalies:pywrap_tensorflow_data_validation ``` ### 4. Copy over generated Python wrappers ```shell -cp bazel-bin/tensorflow_data_validation/anomalies/_pywrap_validation.so tensorflow_data_validation/anomalies/ -cp bazel-bin/tensorflow_data_validation/anomalies/pywrap_validation.py tensorflow_data_validation/anomalies/ +cp bazel-bin/tensorflow_data_validation/anomalies/_pywrap_tensorflow_data_validation.so tensorflow_data_validation/anomalies/ +cp bazel-bin/tensorflow_data_validation/anomalies/pywrap_tensorflow_data_validation.py tensorflow_data_validation/anomalies/ ``` ### 5. Build the pip package diff --git a/tensorflow_data_validation/anomalies/BUILD b/tensorflow_data_validation/anomalies/BUILD index 456db82a..77133978 100644 --- a/tensorflow_data_validation/anomalies/BUILD +++ b/tensorflow_data_validation/anomalies/BUILD @@ -313,8 +313,11 @@ cc_test( ], ) +# Note that the name of the target should follow specific naming +# pattern specified in tensorflow/tf_exported_symbols.lds in order +# for the init function in the generated .so file to be exported. tf_py_wrap_cc( - name = "pywrap_validation", + name = "pywrap_tensorflow_data_validation", srcs = ["validation_api.i"], deps = [ ":feature_statistics_validator", diff --git a/tensorflow_data_validation/api/validation_api.py b/tensorflow_data_validation/api/validation_api.py index f6e3cf47..29a578a1 100644 --- a/tensorflow_data_validation/api/validation_api.py +++ b/tensorflow_data_validation/api/validation_api.py @@ -21,7 +21,7 @@ from __future__ import print_function -from tensorflow_data_validation.anomalies import pywrap_validation +from tensorflow_data_validation.anomalies import pywrap_tensorflow_data_validation from tensorflow_data_validation.types_compat import Optional from tensorflow_metadata.proto.v0 import anomalies_pb2 from tensorflow_metadata.proto.v0 import schema_pb2 @@ -56,7 +56,7 @@ def infer_schema(statistics, raise ValueError('Only statistics proto with one dataset is currently ' 'supported for inferring schema.') - schema_proto_string = pywrap_validation.InferSchema( + schema_proto_string = pywrap_tensorflow_data_validation.InferSchema( statistics.datasets[0].SerializeToString(), max_string_domain_size) # Parse the serialized Schema proto. @@ -95,8 +95,10 @@ def validate_statistics(statistics, raise ValueError('Only statistics proto with one dataset is currently ' 'supported for validation.') - anomalies_proto_string = pywrap_validation.ValidateFeatureStatistics( - statistics.datasets[0].SerializeToString(), schema.SerializeToString()) + anomalies_proto_string = ( + pywrap_tensorflow_data_validation.ValidateFeatureStatistics( + statistics.datasets[0].SerializeToString(), + schema.SerializeToString())) # Parse the serialized Anomalies proto. result = anomalies_pb2.Anomalies()