In [64]:
from dateutil.parser import parse
from kubernetes import client, config
from io import BytesIO
from os.path import join as path
from requests import get as GET
import tarfile
from tarfile import TarError
from urllib.parse import urlparse
import yaml
import zipfile

import kfp
from kfp.components import load_component

config.load_kube_config()
api = client.CustomObjectsApi()

In [65]:
url = 'gs://ml-pipeline-playground/coin.tar.gz'
http = 'https://storage.googleapis.com/ml-pipeline-playground/coin.tar.gz'
dir = '/Users/ryan/c/pipelines/backend'
tar_gz = path(dir, 'coin.tar.gz')
tar = path(dir, 'coin.tar')
zip = path(dir, 'coin.zip')
yaml = path(dir, 'pipeline.yaml')

In [278]:
def url_or_file_to_bytes(url):
    parsed = urlparse(url)
    if not parsed.scheme:
        with open(url, 'rb') as f:
            return f.read()
    else:
        resp = GET(url)
        assert resp.ok
        return resp.content

In [217]:
def try_extract_pipeline_tar(bytes):
    try:
        with tarfile.open(fileobj=BytesIO(bytes), mode='r') as f:
            names = f.getnames()
            if names == ['pipeline.yaml']:
                tar_info = f.getmember('pipeline.yaml')
                if tar_info.isfile():
                    return f.extractfile(tar_info).read()
                raise Exception('"pipeline.yaml" in TAR archive is not a regular file')
            raise Exception('Expected TAR archive to contain only a "pipeline.yaml"; found %s' % names)
    except TarError:
        return None

In [218]:
def try_extract_pipeline_zip(bytes):
    try:
        with ZipFile(BytesIO(bytes), mode='r') as f:
            names = f.namelist()
            if names == ['pipeline.yaml']:
                return f.read('pipeline.yaml')
            raise Exception('Expected ZIP archive to contain only a "pipeline.yaml"; found %s' % names)
    except BadZipFile:
        return None

In [192]:
def get_metadata(pipeline):
    if 'metadata' not in pipeline:
        raise Exception('No "metadata" found in pipeline')
    return pipeline['metadata']

In [214]:
def get_annotations(pipeline):
    metadata = get_metadata(pipeline)
    if 'annotations' not in metadata:
        raise Exception('No "annotations" found in pipeline "metadata"')
    annotations = metadata['annotations']
    if 'pipelines.kubeflow.org/pipeline_spec' not in annotations:
        raise Exception('"pipelines.kubeflow.org/pipeline_spec" not found in pipeline metadata.annotations')
    annotations = json.loads(annotations['pipelines.kubeflow.org/pipeline_spec'])
    return annotations['name'], annotations['description']

In [215]:
def get_name(pipeline):
    metadata = get_metadata(pipeline)

    if 'generateName' not in metadata:
        raise Exception('No "generateName" found in pipeline metadata')

    name = metadata['generateName']
    if name[-1] == '-':
        name = name[:-1]

    return name

In [216]:
def get_description(pipeline):
    (_, description) = get_annotations(pipeline)
    return description

In [220]:
def load_pipeline_yaml(path):
    bytes = url_or_file_to_bytes(path)
    pipeline_yaml = try_extract_pipeline_tar(bytes)
    if pipeline_yaml is None:
        pipeline_yaml = try_extract_pipeline_zip(bytes)
        if pipeline_yaml is None:
            pipeline_yaml = bytes
    return yaml.safe_load(pipeline_yaml)

In [279]:
def load_yaml(path):
    return yaml.safe_load(url_or_file_to_bytes(path))

In [280]:
SWF_TEMPLATE_PATH = '/Users/ryan/c/pipelines/backend/src/crd/samples/scheduledworkflow/kfp.yaml'
SWF_TEMPLATE_YAML = load_yaml(SWF_TEMPLATE_PATH)

In [297]:
DEFAULT_CRON_SCHEDULE = "1 * * * * *"
def make_swf_kfp(
    pipeline_path, 
    name=None, description=None, 
    cron=None, intervalSecond=None, 
    start=None, end=None,
    maxHistory=10, enabled=True
):
    """Create a ScheduledWorkflow resource that will run a given pipeline on a desired schedule.
    
    :param str pipeline_path Path to a file or (zip or TAR) archive containing the pipeline's YAML specification. It can be a local file or "gs"- or "http"-schemed URL.
    :param str name Name of the ScheduledWorkflow resource to create; constructed from the underlying pipeline's name by default.
    :param str description Description of the ScheduledWorkflow resource to create; constructed from the underlying pipeline's description by default.
    :param str cron Crontab-formatted string specifying the schedule the pipeline should be run on; if neither `cron` nor `intervalSecond` is provided, the `DEFAULT_CRON_SCHEDULE` above is used. At most one of `cron` and `intervalSecond` should be provided.
    :param int intervalSecond Interval at which to trigger runs of the provided pipeline. At most one of `cron` and `intervalSecond` should be provided.
    :param datetime|str start If provided, begin scheduling pipelines at this date+time
    :param datetime|str end If provided, stop scheduling pipelines at this date+time
    :param int maxHistory Limit stored run-history to this many runs of the pipeline (default: 10)
    :param bool enabled Whether the generated ScheduledWorkflow should be enabled when it is created (default: True)    
    """
    pipeline = load_pipeline_yaml(pipeline_path)
    
    with open(SWF_TEMPLATE_PATH, 'r') as f:
        swf = yaml.safe_load(f)
    
    spec = swf['spec']
    
    if name is None:
        name = get_name(pipeline)
    
    if description is None:
        description = get_description(pipeline)
    
    if (cron is not None) and (intervalSecond is not None):
        raise Exception('At most one of {"cron","interval"} should be provided; received cron %s, interval %s' % (cron, intervalSecond))
    
    if (cron is None) and (intervalSecond is None):
        cron = DEFAULT_CRON_SCHEDULE
        
    schedule = {}
    if start is not None:
        if isinstance(start, str):
            start = parse(start)
        schedule['startTime'] = start
    else:
        schedule['startTime'] = ''
    if end is not None:
        if isinstance(end, str):
            end = parse(end)
        schedule['endTime'] = end
    else:
        schedule['endTime'] = ''
    
    msg_parts = []  # store pieces of scheduling metadata for inclusion in the ScheduledWorkflow's description
    trigger = {}
    if cron is not None:
        schedule['cron'] = cron
        msg_parts.append('cron: %s' % cron)
        trigger['cronSchedule'] = schedule
    else:
        schedule['intervalSecond'] = intervalSecond
        msg_parts.append('interval: %d' % intervalSecond)
        trigger['periodicSchedule'] = schedule
        
    spec['enabled'] = enabled
    spec['maxHistory'] = maxHistory
    spec['trigger'] = trigger

    if start is not None:
        msg_parts.append('start: %s' % str(start))
    if end is not None:
        msg_parts.append('end: %s' % str(end))

    spec['description'] = 'ScheduledWorkflow (%s): %s' % (', '.join(msg_parts), description)

    swf_name = 'swf-%s' % name
    spec['name'] = swf_name
    metadata = swf['metadata']
    metadata['name'] = swf_name

    workflow = spec['workflow']

    parameters = workflow['parameters']
    parameters[1]['value'] = yaml.dump(pipeline)
    
    workflow_spec = workflow['spec']
    templates = workflow_spec['templates']
    template = templates[0]
    container = template['container']
    args = container['args']
    args[-1] = name

    return swf

In [219]:
condition_zip_path = '/Users/ryan/c/pipelines/samples/core/condition/condition.py.zip'

In [312]:
swf = make_swf_kfp(condition_zip_path, cron="0 * * * * *", start='2019-09-28T00:00:00Z00:00', end='2019-09-29T04:40:00Z00:00')

In [313]:
args = dict(
    group="kubeflow.org",
    version="v1beta1",
    namespace="default",
    plural="scheduledworkflows",
    body=swf,
)

In [314]:
api.patch_namespaced_custom_object(name=name, **args)

{'apiVersion': 'kubeflow.org/v1beta1',
 'kind': 'ScheduledWorkflow',
 'metadata': {'creationTimestamp': '2019-09-29T02:43:59Z',
  'generation': 323,
  'labels': {'scheduledworkflows.kubeflow.org/enabled': 'true',
   'scheduledworkflows.kubeflow.org/status': 'Enabled'},
  'name': 'swf-conditional-execution-pipeline',
  'namespace': 'default',
  'resourceVersion': '111462',
  'selfLink': '/apis/kubeflow.org/v1beta1/namespaces/default/scheduledworkflows/swf-conditional-execution-pipeline',
  'uid': '1d7b3943-795f-4b65-b67d-865c8ac10e3f'},
 'spec': {'description': 'ScheduledWorkflow (cron: 0 * * * * *, start: 2019-09-28 00:00:00+00:00, end: 2019-09-29 00:00:00+00:00): Shows how to use dsl.Condition().',
  'enabled': True,
  'maxHistory': 10,
  'name': 'swf-conditional-execution-pipeline',
  'trigger': {'cronSchedule': {'cron': '0 * * * * *',
    'endTime': '2019-09-29T00:00:00+00:00',
    'startTime': '2019-09-28T00:00:00+00:00'}},
  'workflow': {'parameters': [{'name': 'datetime',
     'v

In [254]:
obj = api.create_namespaced_custom_object(**args)
name = obj['metadata']['name']

{'apiVersion': 'kubeflow.org/v1beta1',
 'kind': 'ScheduledWorkflow',
 'metadata': {'creationTimestamp': '2019-09-29T02:43:59Z',
  'generation': 1,
  'name': 'swf-conditional-execution-pipeline',
  'namespace': 'default',
  'resourceVersion': '93936',
  'selfLink': '/apis/kubeflow.org/v1beta1/namespaces/default/scheduledworkflows/swf-conditional-execution-pipeline',
  'uid': '1d7b3943-795f-4b65-b67d-865c8ac10e3f'},
 'spec': {'description': 'ScheduledWorkflow (cron: 1 * * * * *): Shows how to use dsl.Condition().',
  'enabled': True,
  'maxHistory': 10,
  'name': 'swf-conditional-execution-pipeline',
  'trigger': {'cronSchedule': {'cron': '1 * * * * *'}},
  'workflow': {'parameters': [{'name': 'datetime',
     'value': '[[ScheduledTime.20060102-15:04:05]]'},
    {'name': 'pipeline_yaml',
     'value': 'apiVersion: argoproj.io/v1alpha1\nkind: Workflow\nmetadata:\n  annotations: {pipelines.kubeflow.org/pipeline_spec: \'{"description": "Shows how\n      to use dsl.Condition().", "name": "Co

In [303]:
del args['body']

In [304]:
args

{'group': 'kubeflow.org',
 'namespace': 'default',
 'plural': 'scheduledworkflows',
 'version': 'v1beta1'}

In [305]:
api.delete_namespaced_custom_object(name=name, **args)

TypeError: delete_namespaced_custom_object() missing 1 required positional argument: 'body'

In [172]:
pipeline

'/var/folders/m0/mj2x82p1527349z6mn8btgtr0000gr/T/tmp8uhbrus1/condition.py.zip'

In [130]:
from os.path import basename
import sys
from tempfile import mkdtemp

In [131]:
tmp = mkdtemp()

In [143]:
module_name = basename(module_url)

In [134]:
with open(path(tmp, basename(module_url)), 'w') as f:
    f.write(module_src)

In [136]:
with open(path(tmp, '__init__.py'), 'w') as f:
    f.write('')

In [137]:
sys.path.append(tmp)
from condition import flipcoin_pipeline
sys.path.pop()