From 9b5e23391dd5fce214ca68fad1ad345e52ce3cac Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 12 Dec 2020 08:12:18 -0800 Subject: [PATCH] Add initial HDFS tests (#1225) * Add initial HDFS tests Signed-off-by: Yong Tang * Install Java and libhdfs.so for tests * Fix kokorun Signed-off-by: Yong Tang --- .github/workflows/build.wheel.sh | 15 ++++++++++++ .github/workflows/build.yml | 1 + .kokorun/io_cpu.sh | 1 + tests/test_hdfs/hdfs_test.sh | 27 ++++++++++++++++++++ tests/test_hdfs_eager.py | 42 ++++++++++++++++++++++++++++++++ 5 files changed, 86 insertions(+) create mode 100755 tests/test_hdfs/hdfs_test.sh create mode 100644 tests/test_hdfs_eager.py diff --git a/.github/workflows/build.wheel.sh b/.github/workflows/build.wheel.sh index 56268aee9..1130a7cc9 100755 --- a/.github/workflows/build.wheel.sh +++ b/.github/workflows/build.wheel.sh @@ -29,5 +29,20 @@ if [[ $(uname) == "Linux" ]]; then apt-get -y -qq install $PYTHON_VERSION ffmpeg dnsutils libmp3lame0 curl -sSOL https://bootstrap.pypa.io/get-pip.py $PYTHON_VERSION get-pip.py -q + + # Install Java + apt-get -y -qq install openjdk-8-jdk + update-alternatives --config java + export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 + + # Install Hadoop + curl -OL https://archive.apache.org/dist/hadoop/common/hadoop-2.7.0/hadoop-2.7.0.tar.gz + tar -xzf hadoop-2.7.0.tar.gz -C /usr/local + export HADOOP_HOME=/usr/local/hadoop-2.7.0 + + # Update environmental variable + export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${JAVA_HOME}/jre/lib/amd64/server:${HADOOP_HOME}/lib/native + export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) + export fi run_test $PYTHON_VERSION diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b2d0cb434..fbe84c670 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -298,6 +298,7 @@ jobs: bash -x -e tests/test_sql/sql_test.sh bash -x -e tests/test_gcloud/test_gcs.sh gcs-emulator bash -x -e tests/test_pulsar/pulsar_test.sh + bash -x -e tests/test_hdfs/hdfs_test.sh - name: Test Linux run: | set -x -e diff --git a/.kokorun/io_cpu.sh b/.kokorun/io_cpu.sh index 66dde8204..9c62f914c 100755 --- a/.kokorun/io_cpu.sh +++ b/.kokorun/io_cpu.sh @@ -81,6 +81,7 @@ bash -x -e tests/test_azure/start_azure.sh bash -x -e tests/test_sql/sql_test.sh sql bash -x -e tests/test_elasticsearch/elasticsearch_test.sh start bash -x -e tests/test_mongodb/mongodb_test.sh start +bash -x -e tests/test_hdfs/hdfs_test.sh docker run -i --rm -v $PWD:/v -w /v --net=host \ buildpack-deps:20.04 bash -x -e .github/workflows/build.wheel.sh python${PYTHON_VERSION} diff --git a/tests/test_hdfs/hdfs_test.sh b/tests/test_hdfs/hdfs_test.sh new file mode 100755 index 000000000..64eae6921 --- /dev/null +++ b/tests/test_hdfs/hdfs_test.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e +set -o pipefail + +HADOOP_VERSION=2.7.0 +docker pull sequenceiq/hadoop-docker:$HADOOP_VERSION +docker run -d --rm --net=host --name=tensorflow-io-hdfs sequenceiq/hadoop-docker:$HADOOP_VERSION +echo "Waiting for 30 secs until hadoop is up and running" +sleep 30 +docker logs tensorflow-io-hdfs +echo "Hadoop up" +exit 0 diff --git a/tests/test_hdfs_eager.py b/tests/test_hdfs_eager.py new file mode 100644 index 000000000..468d04b06 --- /dev/null +++ b/tests/test_hdfs_eager.py @@ -0,0 +1,42 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# ============================================================================== +"""Tests for HDFS file system""" + +import os +import sys +import socket +import time +import tempfile +import tensorflow as tf +import tensorflow_io as tfio +import pytest + + +@pytest.mark.skipif( + sys.platform in ("win32", "darwin"), + reason="TODO HDFS not setup properly on macOS/Windows yet", +) +def test_read_file(): + """Test case for reading HDFS""" + + address = socket.gethostbyname(socket.gethostname()) + print("ADDRESS: {}".format(address)) + + body = b"1234567" + tf.io.write_file("hdfse://{}:9000/file.txt".format(address), body) + + content = tf.io.read_file("hdfse://{}:9000/file.txt".format(address)) + print("CONTENT: {}".format(content)) + assert content == body