Skip to content

Commit

Permalink
enable component build unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
gaoning777 committed Nov 13, 2018
1 parent f077201 commit c038e44
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 77 deletions.
55 changes: 23 additions & 32 deletions sdk/python/kfp/compiler/_component_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,8 @@ class DockerfileHelper(object):
arc_dockerfile_name: dockerfile filename that is stored in the tarball
gcs_path: the gcs path to store the tarball that contains both the dockerfile and the python file """

def __init__(self, arc_dockerfile_name, gcs_path):
def __init__(self, arc_dockerfile_name):
self._arc_dockerfile_name = arc_dockerfile_name
self._gcs_path = gcs_path

def _generate_dockerfile_with_py(self, target_file, base_image, python_filepath):
""" _generate_docker_file generates a simple dockerfile with the python path """
Expand All @@ -86,22 +85,17 @@ def _wrap_files_in_tarball(self, tarball_path, files={}):
for key, value in files.items():
tarball.add(value, arcname=key)

def prepare_docker_tarball_with_py(self, arc_python_filename, python_filepath, base_image):
def prepare_docker_tarball_with_py(self, arc_python_filename, python_filepath, base_image, local_tarball_path):
""" prepare_docker_tarball is the API to generate dockerfile and prepare the tarball with python scripts """
with tempfile.TemporaryDirectory() as local_build_dir:
local_dockerfile_path = os.path.join(local_build_dir, self._arc_dockerfile_name)
self._generate_dockerfile_with_py(local_dockerfile_path, base_image, arc_python_filename)
local_tarball_path = os.path.join(local_build_dir, 'docker.tmp.tar.gz')
self._wrap_files_in_tarball(local_tarball_path, {self._arc_dockerfile_name:local_dockerfile_path,
arc_python_filename:python_filepath})
GCSHelper.upload_gcs_file(local_tarball_path, self._gcs_path)

def prepare_docker_tarball(self, dockerfile_path):
def prepare_docker_tarball(self, dockerfile_path, local_tarball_path):
""" prepare_docker_tarball is the API to prepare a tarball with the dockerfile """
with tempfile.TemporaryDirectory() as local_build_dir:
local_tarball_path = os.path.join(local_build_dir, 'docker.tmp.tar.gz')
self._wrap_files_in_tarball(local_tarball_path, {self._arc_dockerfile_name:dockerfile_path})
GCSHelper.upload_gcs_file(local_tarball_path, self._gcs_path)
self._wrap_files_in_tarball(local_tarball_path, {self._arc_dockerfile_name:dockerfile_path})

class CodeGenerator(object):
""" CodeGenerator helps to generate python codes with identation """
Expand Down Expand Up @@ -144,18 +138,10 @@ def __init__(self, gcs_base, target_image):
def _check_gcs_path(self, gcs_path):
""" _check_gcs_path check both the path validity and write permissions """
logging.info('Checking path: {}...'.format(gcs_path))
if gcs_path.startswith('gs://'):
with tempfile.TemporaryDirectory() as local_build_dir:
rand_filename = str(uuid.uuid4()) + '.txt'
local_rand_file = os.path.join(local_build_dir, rand_filename)
Path(local_rand_file).touch()
random_gcs_bucket = os.path.join(gcs_path, rand_filename)
GCSHelper.upload_gcs_file(local_rand_file, random_gcs_bucket)
GCSHelper.remove_gcs_blob(random_gcs_bucket)
return True
else:
if not gcs_path.startswith('gs://'):
logging.error('Error: {} should be a GCS path.'.format(gcs_path))
return False
return True

def _generate_kaniko_spec(self, namespace, arc_dockerfile_name, gcs_path, target_image):
"""_generate_kaniko_yaml generates kaniko job yaml based on a template yaml """
Expand Down Expand Up @@ -259,10 +245,12 @@ def build_image_from_func(self, component_func, namespace, base_image, timeout):

# Prepare build files
logging.info('Generate build files.')
docker_helper = DockerfileHelper(arc_dockerfile_name=self._arc_dockerfile_name, gcs_path=self._gcs_path)
docker_helper = DockerfileHelper(arc_dockerfile_name=self._arc_dockerfile_name)
local_tarball_file = os.path.join(local_build_dir, 'docker.tmp.tar.gz')
docker_helper.prepare_docker_tarball_with_py(python_filepath=local_python_filepath,
arc_python_filename=self._arc_python_filepath,
base_image=base_image)
base_image=base_image, local_tarball_file=local_tarball_file)
GCSHelper.upload_gcs_file(local_tarball_file, self._gcs_path)

kaniko_spec = self._generate_kaniko_spec(namespace=namespace,
arc_dockerfile_name=self._arc_dockerfile_name,
Expand All @@ -279,19 +267,22 @@ def build_image_from_func(self, component_func, namespace, base_image, timeout):

def build_image_from_dockerfile(self, dockerfile_path, timeout, namespace):
""" build_image_from_dockerfile builds an image directly """
logging.info('Generate build files.')
docker_helper = DockerfileHelper(arc_dockerfile_name=self._arc_dockerfile_name, gcs_path=self._gcs_path)
docker_helper.prepare_docker_tarball(dockerfile_path)
with tempfile.TemporaryDirectory() as local_build_dir:
logging.info('Generate build files.')
docker_helper = DockerfileHelper(arc_dockerfile_name=self._arc_dockerfile_name)
local_tarball_file = os.path.join(local_build_dir, 'docker.tmp.tar.gz')
docker_helper.prepare_docker_tarball(dockerfile_path, local_tarball_file=local_tarball_file)
GCSHelper.upload_gcs_file(local_tarball_file, self._gcs_path)

kaniko_spec = self._generate_kaniko_spec(namespace=namespace, arc_dockerfile_name=self._arc_dockerfile_name,
kaniko_spec = self._generate_kaniko_spec(namespace=namespace, arc_dockerfile_name=self._arc_dockerfile_name,
gcs_path=self._gcs_path, target_image=self._target_image)
logging.info('Start a kaniko job for build.')
k8s_helper = K8sHelper()
k8s_helper.run_job(kaniko_spec, timeout)
logging.info('Kaniko job complete.')
logging.info('Start a kaniko job for build.')
k8s_helper = K8sHelper()
k8s_helper.run_job(kaniko_spec, timeout)
logging.info('Kaniko job complete.')

# Clean up
GCSHelper.remove_gcs_blob(self._gcs_path)
# Clean up
GCSHelper.remove_gcs_blob(self._gcs_path)

def _generate_pythonop(component_func, target_image):
""" Generate operator for the pipeline authors
Expand Down
59 changes: 16 additions & 43 deletions sdk/python/tests/compiler/component_builder_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,30 +24,7 @@
from pathlib import Path
import inspect

GCS_BASE = 'gs://ngao-mlpipeline-testing/'

class TestGCSHelper(unittest.TestCase):

def test_upload_gcs_path(self):
""" test uploading gcs file """
# prepare
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
temp_file = os.path.join(test_data_dir, 'test_data.tmp')
temp_downloaded_file = os.path.join(test_data_dir, 'test_data.tmp.downloaded')
Path(temp_file).touch()
gcs_path = os.path.join(GCS_BASE, 'test_data.tmp')

# check
try:
GCSHelper.upload_gcs_file(temp_file, gcs_path)
GCSHelper.download_gcs_blob(temp_downloaded_file, gcs_path)
GCSHelper.remove_gcs_blob(gcs_path)
except:
self.fail('GCS helper failure')

# clean up
os.remove(temp_file)
os.remove(temp_downloaded_file)
GCS_BASE = 'gs://kfp-testing/'

class TestDockerfileHelper(unittest.TestCase):

Expand All @@ -65,8 +42,8 @@ def test_wrap_files_in_tarball(self):
f.write('temporary file two content')

# check
docker_helper = DockerfileHelper(arc_dockerfile_name='', gcs_path='')
self.assertTrue(docker_helper._wrap_files_in_tarball(temp_tarball, {'dockerfile':temp_file_one, 'main.py':temp_file_two}))
docker_helper = DockerfileHelper(arc_dockerfile_name='')
docker_helper._wrap_files_in_tarball(temp_tarball, {'dockerfile':temp_file_one, 'main.py':temp_file_two})
self.assertTrue(os.path.exists(temp_tarball))
temp_tarball_handler = tarfile.open(temp_tarball)
temp_files = temp_tarball_handler.getmembers()
Expand All @@ -93,7 +70,7 @@ def test_generate_dockerfile(self):
ENTRYPOINT ["python3", "/ml/main.py"]'''

# check
docker_helper = DockerfileHelper(arc_dockerfile_name=target_dockerfile, gcs_path='')
docker_helper = DockerfileHelper(arc_dockerfile_name=target_dockerfile)
docker_helper._generate_dockerfile_with_py(target_file=target_dockerfile, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.10.0', python_filepath='main.py')
with open(target_dockerfile, 'r') as f:
target_dockerfile_payload = f.read()
Expand All @@ -108,22 +85,21 @@ def test_prepare_docker_with_py(self):
# prepare
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
python_filepath = os.path.join(test_data_dir, 'basic.py')
downloaded_tarball = os.path.join(test_data_dir, 'test_docker.tar.gz')
gcs_tar_path = os.path.join(GCS_BASE, 'test_docker.tar.gz')
generated_tarball = os.path.join(test_data_dir, 'test_docker.tar.gz')

# check
docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile', gcs_path=gcs_tar_path)
docker_helper.prepare_docker_tarball_with_py(arc_python_filename='main.py', python_filepath=python_filepath, base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0')
GCSHelper.download_gcs_blob(downloaded_tarball, gcs_tar_path)
temp_tarball_handler = tarfile.open(downloaded_tarball)
docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile')
docker_helper.prepare_docker_tarball_with_py(arc_python_filename='main.py', python_filepath=python_filepath,
base_image='gcr.io/ngao-mlpipeline-testing/tensorflow:1.8.0',
local_tarball_path=generated_tarball)
temp_tarball_handler = tarfile.open(generated_tarball)
temp_files = temp_tarball_handler.getmembers()
self.assertTrue(len(temp_files) == 2)
for temp_file in temp_files:
self.assertTrue(temp_file.name in ['dockerfile', 'main.py'])

# clean up
os.remove(downloaded_tarball)
GCSHelper.remove_gcs_blob(gcs_tar_path)
os.remove(generated_tarball)

def test_prepare_docker_tarball(self):
""" Test the whole prepare docker tarball """
Expand All @@ -132,23 +108,20 @@ def test_prepare_docker_tarball(self):
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
dockerfile_path = os.path.join(test_data_dir, 'component.target.dockerfile')
Path(dockerfile_path).touch()
downloaded_tarball = os.path.join(test_data_dir, 'test_docker.tar.gz')
gcs_tar_path = os.path.join(GCS_BASE, 'test_docker.tar.gz')
generated_tarball = os.path.join(test_data_dir, 'test_docker.tar.gz')

# check
docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile', gcs_path=gcs_tar_path)
docker_helper.prepare_docker_tarball(dockerfile_path=dockerfile_path)
GCSHelper.download_gcs_blob(downloaded_tarball, gcs_tar_path)
temp_tarball_handler = tarfile.open(downloaded_tarball)
docker_helper = DockerfileHelper(arc_dockerfile_name='dockerfile')
docker_helper.prepare_docker_tarball(dockerfile_path=dockerfile_path, local_tarball_path=generated_tarball)
temp_tarball_handler = tarfile.open(generated_tarball)
temp_files = temp_tarball_handler.getmembers()
self.assertTrue(len(temp_files) == 1)
for temp_file in temp_files:
self.assertTrue(temp_file.name in ['dockerfile'])

# clean up
os.remove(downloaded_tarball)
os.remove(generated_tarball)
os.remove(dockerfile_path)
GCSHelper.remove_gcs_blob(gcs_tar_path)

# hello function is used by the TestCodeGenerator to verify the auto generated python function
def hello():
Expand Down
3 changes: 1 addition & 2 deletions sdk/python/tests/compiler/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
if __name__ == '__main__':
suite = unittest.TestSuite()
suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(compiler_tests))
#TODO: enable the test once a gcs mock class is created.
#suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_builder_test))
suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(component_builder_test))
runner = unittest.TextTestRunner()
if not runner.run(suite).wasSuccessful():
sys.exit(1)
Expand Down

0 comments on commit c038e44

Please sign in to comment.