In [1]:
%%file run_status.py
class RunStatus(object):
    """Enum for status of an :py:class:`mlflow.entities.Run`."""
    RUNNING, SCHEDULED, FINISHED, FAILED = range(1, 5)
    _STRING_TO_STATUS = {
        "RUNNING": RUNNING,
        "SCHEDULED": SCHEDULED,
        "FINISHED": FINISHED,
        "FAILED": FAILED,
    }
    _STATUS_TO_STRING = {value: key for key, value in _STRING_TO_STATUS.items()}
    _TERMINATED_STATUSES = set([FINISHED, FAILED])

    @staticmethod
    def from_string(status_str):
        if status_str not in RunStatus._STRING_TO_STATUS:
            raise Exception(
                "Could not get run status corresponding to string %s. Valid run "
                "status strings: %s" % (status_str, list(RunStatus._STRING_TO_STATUS.keys())))
        return RunStatus._STRING_TO_STATUS[status_str]

    @staticmethod
    def to_string(status):
        if status not in RunStatus._STATUS_TO_STRING:
            raise Exception("Could not get string corresponding to run status %s. Valid run "
                            "statuses: %s" % (status, list(RunStatus._STATUS_TO_STRING.keys())))
        return RunStatus._STATUS_TO_STRING[status]

    @staticmethod
    def is_terminated(status):
        return status in RunStatus._TERMINATED_STATUSES

Writing run_status.py


In [2]:
%%file spinner.py
import sys
import time
import itertools
import threading


class Spinner:
    def __init__(self, delay=0.1):
        self.spinner_generator = itertools.cycle(['-', '/', '|', '\\'])
        if delay and float(delay): self.delay = delay

    def spinner_task(self):
        while self.busy:
            sys.stdout.write(next(self.spinner_generator))
            sys.stdout.flush()
            time.sleep(self.delay)
            sys.stdout.write('\b')

    def start(self):
        self.busy = True
        threading.Thread(target=self.spinner_task).start()
        return True

    def stop(self):
        self.busy = False
        time.sleep(self.delay)
        return True

Writing spinner.py


In [3]:
%%file submitted_run.py
from abc import abstractmethod
from .spinner import Spinner
import os
import signal
import logging
from .run_status import RunStatus
#from mlflow.entities import RunStatus


_logger = logging.getLogger(__name__)


class SubmittedRun(object):
    """
    Wrapper around an MLflow project run (e.g. a subprocess running an entry point
    command or a Databricks job run) and exposing methods for waiting on and cancelling the run.
    This class defines the interface that the MLflow project runner uses to manage the lifecycle
    of runs launched in different environments (e.g. runs launched locally or on Databricks).
    ``SubmittedRun`` is not thread-safe. That is, concurrent calls to wait() / cancel()
    from multiple threads may inadvertently kill resources (e.g. local processes) unrelated to the
    run.
    NOTE:
        Subclasses of ``SubmittedRun`` must expose a ``run_id`` member containing the
        run's MLflow run ID.
    """
    @abstractmethod
    def wait(self):
        """
        Wait for the run to finish, returning True if the run succeeded and false otherwise. Note
        that in some cases (e.g. remote execution on Databricks), we may wait until the remote job
        completes rather than until the MLflow run completes.
        """
        pass

    @abstractmethod
    def get_status(self):
        """
        Get status of the run.
        """
        pass

    @abstractmethod
    def cancel(self):
        """
        Cancel the run (interrupts the command subprocess, cancels the Databricks run, etc) and
        waits for it to terminate. The MLflow run status may not be set correctly
        upon run cancellation.
        """
        pass

    @property
    @abstractmethod
    def run_id(self):
        pass


class LocalSubmittedRun(SubmittedRun):
    """
    Instance of ``SubmittedRun`` corresponding to a subprocess launched to run an entry point
    command locally.
    """
    def __init__(self, run_id, command_proc):
        super(LocalSubmittedRun, self).__init__()

        self._run_id = run_id
        self.spinner = Spinner()
        try:
            self.spinner.start()
            self.command_proc = command_proc
        finally:
            self.spinner.stop()

    @property
    def run_id(self):
        return self._run_id

    def wait(self):
        return self.command_proc.wait() == 0

    def cancel(self):
        # Interrupt child process if it hasn't already exited
        if self.command_proc.poll() is None:
            # Kill the the process tree rooted at the child if it's the leader of its own process
            # group, otherwise just kill the child
            try:
                if self.command_proc.pid == os.getpgid(self.command_proc.pid):
                    os.killpg(self.command_proc.pid, signal.SIGTERM)
                else:
                    self.command_proc.terminate()
            except OSError:
                # The child process may have exited before we attempted to terminate it, so we
                # ignore OSErrors raised during child process termination
                _logger.info(
                    "Failed to terminate child process (PID %s) corresponding to Arthur.io "
                    "run with ID %s. The process may have already exited.",
                    self.command_proc.pid, self._run_id)
            self.command_proc.wait()

    def _get_status(self):
        exit_code = self.command_proc.poll()
        if exit_code is None:
            return RunStatus.RUNNING
        if exit_code == 0:
            return RunStatus.FINISHED
        return RunStatus.FAILED

    def get_status(self):
        return RunStatus.to_string(self._get_status()),self.command_proc.pid

Writing submitted_run.py


In [13]:
%%file store/exp_file_process.py
import os,sys
import yaml
from .file_utils import (build_path,exists,mkdir,is_directory,write_yaml,
                        find,read_yaml,list_subdirs,mv,get_parent_dir,_copy_file_or_tree)
from Arthur.core.utils.experiment.experiment import Experiment
from Arthur.core.utils.store.abstract_store import AbstractStore
from Arthur.core.utils.env import get_env
from Arthur.core.utils.entities.ViewType import ViewType
from Arthur.core.utils.experiment.validation import _validate_run_id,_validate_experiment_id
PY2 = (sys.version_info.major == 2)
if PY2:
    from backports import tempfile
else:
    import tempfile

_TRACKING_DIR_ENV_VAR = "ARTHUR_TRACKING_DIR"


def _default_root_dir():
    return get_env(_TRACKING_DIR_ENV_VAR) or os.path.abspath("arthur_runs")


class FileStore(AbstractStore):
    TRASH_FOLDER_NAME = ".trash"
    ARTIFACTS_FOLDER_NAME = "artifacts"
    METRICS_FOLDER_NAME = "metrics"
    PARAMS_FOLDER_NAME = "params"
    TAGS_FOLDER_NAME = "tags"
    META_DATA_FILE_NAME = "meta.yaml"

    def __init__(self, root_directory=None, artifact_root_uri=None):
        """
        Create a new FileStore with the given root directory and a given default artifact root URI.
        """
        super(FileStore, self).__init__()
        self.root_directory = root_directory or _default_root_dir()
        self.artifact_root_uri = artifact_root_uri or self.root_directory
        self.trash_folder = build_path(self.root_directory, FileStore.TRASH_FOLDER_NAME)
        # Create root directory if needed
        if not exists(self.root_directory):
            mkdir(self.root_directory)
            #self._create_experiment_with_id(name="Default",
            #                                experiment_id=Experiment.DEFAULT_EXPERIMENT_ID,
            #                                artifact_uri=None)
        # Create trash folder if needed
        if not exists(self.trash_folder):
            mkdir(self.trash_folder)
    def _check_root_dir(self):
        """
        Run checks before running directory operations.
        """
        if not exists(self.root_directory):
            raise Exception("'%s' does not exist." % self.root_directory)
        if not is_directory(self.root_directory):
            raise Exception("'%s' is not a directory." % self.root_directory)
    def _get_experiment_path(self, experiment_id, view_type=ViewType.ALL):
        parents = []
        if view_type == ViewType.ACTIVE_ONLY or view_type == ViewType.ALL:
            parents.append(self.root_directory)
        if view_type == ViewType.DELETED_ONLY or view_type == ViewType.ALL:
            parents.append(self.trash_folder)
        for parent in parents:
            exp_list = find(parent, str(experiment_id), full_path=True)
            if len(exp_list) > 0:
                return exp_list[0]
        return None
       
    def _get_run_dir(self, experiment_id, run_uuid):
        #_validate_run_id(run_uuid)
        if not self._has_experiment(experiment_id):
            return None
        return build_path(self._get_experiment_path(experiment_id), run_uuid)
    def _get_artifact_dir(self, experiment_id, run_uuid):
        #_validate_run_id(run_uuid)
        artifacts_dir = build_path(self.get_experiment(experiment_id).artifact_location,
                                   run_uuid,
                                   FileStore.ARTIFACTS_FOLDER_NAME)
        return artifacts_dir
    def _get_active_experiments(self, full_path=False):
        exp_list = list_subdirs(self.root_directory, full_path)
        return [exp for exp in exp_list if not exp.endswith(FileStore.TRASH_FOLDER_NAME)]

    def _get_deleted_experiments(self, full_path=False):
        return list_subdirs(self.trash_folder, full_path)

    def list_experiments(self, view_type=ViewType.ACTIVE_ONLY):
        self._check_root_dir()
        rsl = []
        if view_type == ViewType.ACTIVE_ONLY or view_type == ViewType.ALL:
            rsl += self._get_active_experiments(full_path=False)
        if view_type == ViewType.DELETED_ONLY or view_type == ViewType.ALL:
            rsl += self._get_deleted_experiments(full_path=False)
        experiments = []
        for exp_id in rsl:
            try:
                # trap and warn known issues, will raise unexpected exceptions to caller
                experiment = self._get_experiment(exp_id, view_type)
                if experiment:
                    experiments.append(experiment)
            except MissingConfigException as rnfe:
                # Trap malformed experiments and log warnings.
                logging.warning("Malformed experiment '%s'. Detailed error %s",
                                str(exp_id), str(rnfe), exc_info=True)
        return experiments

    def _create_experiment_with_id(self, project_path,name, experiment_id, artifact_uri=None):
        self._check_root_dir()
        meta_dir = mkdir(self.root_directory, str(experiment_id))
        artifact_uri = artifact_uri or build_path(self.artifact_root_uri, str(experiment_id))
        experiment = Experiment(experiment_id, name, artifact_uri, Experiment.ACTIVE_LIFECYCLE)
        write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, dict(experiment))
        #copy project files to experiment path from web source
        _copy_file_or_tree(project_path,artifact_uri,dst_dir=name)
        return experiment_id

    def create_experiment(self, project_path,name, artifact_location=None):
        self._check_root_dir()
        if name is None or name == "":
            raise Exception("Invalid experiment name '%s'" % name)
        experiment = self.get_experiment_by_name(name)
        if experiment is not None:
            raise Exception("Experiment '%s' already exists." % experiment.name)
        # Get all existing experiments and find the one with largest ID.
        # len(list_all(..)) would not work when experiments are deleted.
        experiments_ids = [e.experiment_id for e in self.list_experiments(ViewType.ALL)]
        experiment_id = max(experiments_ids) + 1 if experiments_ids else 0
        return self._create_experiment_with_id(project_path,name, experiment_id, artifact_location)

    def _has_experiment(self, experiment_id):
        return self._get_experiment_path(experiment_id) is not None 
    def get_experiment(self, experiment_id):
        """
        Fetches the experiment. This will search for active as well as deleted experiments.
        :param experiment_id: Integer id for the experiment
        :return: A single Experiment object if it exists, otherwise raises an Exception.
        """
        experiment = self._get_experiment(experiment_id)
        if experiment is None:
            raise Exception("Experiment '%s' does not exist." % experiment_id)
        return experiment
    def get_experiment_by_name(self, name):
        self._check_root_dir()
        for experiment in self.list_experiments(ViewType.ALL):
            if experiment.name == name:
                return experiment
        return None
    def _get_experiment(self, experiment_id, view_type=ViewType.ALL):
        self._check_root_dir()
        _validate_experiment_id(experiment_id)
        experiment_dir = self._get_experiment_path(experiment_id, view_type)
        if experiment_dir is None:
            raise Exception("Could not find experiment with ID %s" % experiment_id)
        meta = read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
        if experiment_dir.startswith(self.trash_folder):
            meta['lifecycle_stage'] = Experiment.DELETED_LIFECYCLE
        else:
            meta['lifecycle_stage'] = Experiment.ACTIVE_LIFECYCLE
        experiment = Experiment.from_dictionary(meta)
        if int(experiment_id) != experiment.experiment_id:
            logging.warning("Experiment ID mismatch for exp %s. ID recorded as '%s' in meta data. "
                            "Experiment will be ignored.",
                            str(experiment_id), str(experiment.experiment_id), exc_info=True)
            return None
        return experiment
    def delete_experiment(self, experiment_id):
        experiment_dir = self._get_experiment_path(experiment_id, ViewType.ACTIVE_ONLY)
        if experiment_dir is None:
            raise Exception("Could not find experiment with ID %s" % experiment_id)
        mv(experiment_dir, self.trash_folder)
    def restore_experiment(self, experiment_id):
        experiment_dir = self._get_experiment_path(experiment_id, ViewType.DELETED_ONLY)
        if experiment_dir is None:
            raise Exception("Could not find deleted experiment with ID %d" % experiment_id)
        conflict_experiment = self._get_experiment_path(experiment_id, ViewType.ACTIVE_ONLY)
        if conflict_experiment is not None:
            raise Exception(
                    "Cannot restore eperiment with ID %d. "
                    "An experiment with same ID already exists." % experiment_id)
        mv(experiment_dir, self.root_directory)
    def rename_experiment(self, experiment_id, new_name):
        meta_dir = os.path.join(self.root_directory, str(experiment_id))
        # if experiment is malformed, will raise error
        experiment = self._get_experiment(experiment_id)
        if experiment is None:
            raise Exception("Experiment '%s' does not exist." % experiment_id)
        experiment._set_name(new_name)
        if experiment.lifecycle_stage != Experiment.ACTIVE_LIFECYCLE:
            raise Exception("Cannot rename experiment in non-active lifecycle stage."
                            " Current stage: %s" % experiment.lifecycle_stage)
        write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, dict(experiment), overwrite=True)
    def _find_experiment_folder(self, run_path):
        """
        Given a run path, return the parent directory for its experiment.
        """
        parent = get_parent_dir(run_path)
        if os.path.basename(parent) == FileStore.TRASH_FOLDER_NAME:
            return get_parent_dir(parent)
        return parent
#test=FileStore()
#test._get_experiment_path(0)
#test._get_run_dir(0,'110')
#test._get_artifact_dir(0,'110')
#test._get_active_experiments()
#test._get_deleted_experiments()
#test.list_experiments()
#test._create_experiment_with_id('leepand',4)
#test.create_experiment('arthur')
#test.delete_experiment(4)
#test.restore_experiment(4)
#test.rename_experiment(5,'arthur_new')
#test._find_experiment_folder('../')
#test.list_experiments()
#test.create_experiment('../tttt','arthur')
#_copy_file_or_tree('../tttt','arthur_runs/',dst_dir='code')

Overwriting store/exp_file_process.py


In [9]:
%%file experiment.py
from ._arthur_object import _ArthurObject


class Experiment(_ArthurObject):
    """
    Experiment object.
    """
    DEFAULT_EXPERIMENT_ID = 0
    ACTIVE_LIFECYCLE = 'active'
    DELETED_LIFECYCLE = 'deleted'

    def __init__(self, experiment_id, name, artifact_location, lifecycle_stage):
        super(Experiment, self).__init__()
        self._experiment_id = experiment_id
        self._name = name
        self._artifact_location = artifact_location
        self._lifecycle_stage = lifecycle_stage

    @property
    def experiment_id(self):
        """Integer ID of the experiment."""
        return self._experiment_id

    @property
    def name(self):
        """String name of the experiment."""
        return self._name

    def _set_name(self, new_name):
        self._name = new_name

    @property
    def artifact_location(self):
        """String corresponding to the root artifact URI for the experiment."""
        return self._artifact_location

    @property
    def lifecycle_stage(self):
        """Lifecycle stage of the experiment. Can either be 'active' or 'deleted'."""
        return self._lifecycle_stage

    @classmethod
    def from_proto(cls, proto):
        return cls(proto.experiment_id, proto.name, proto.artifact_location, proto.lifecycle_stage)



    @classmethod
    def _properties(cls):
        # TODO: Hard coding this list of props for now. There has to be a clearer way...
        return ["experiment_id", "name", "artifact_location", "lifecycle_stage"]

Writing experiment.py


In [14]:
%%file env.py
import os


def get_env(variable_name):
    return os.environ.get(variable_name)


def unset_variable(variable_name):
    if variable_name in os.environ:
        del os.environ[variable_name]

Writing env.py


In [12]:
%%file experiment/validation.py
"""
Utilities for validating user inputs such as metric names and parameter names.
"""
import os.path
import re

# Regex for valid run IDs: must be a 32-character hex string.
_RUN_ID_REGEX = re.compile(r"^[0-9a-f]{32}$")
def _validate_run_id(run_id):
    """Check that `run_id` is a valid run ID and raise an exception if it isn't."""
    if _RUN_ID_REGEX.match(run_id) is None:
        raise Exception("Invalid run ID: '%s'" % run_id)


def _validate_experiment_id(exp_id):
    """Check that `experiment_id`is a valid integer and raise an exception if it isn't."""
    try:
        int(exp_id)
    except ValueError:
        raise Exception("Invalid experiment ID: '%s'" % exp_id)

Writing experiment/validation.py


In [10]:
%%file _arthur_object.py
from abc import abstractmethod
import pprint


class _ArthurObject(object):
    def __iter__(self):
        # Iterate through list of properties and yield as key -> value
        for prop in self._properties():
            yield prop, self.__getattribute__(prop)

    @classmethod
    @abstractmethod
    def _properties(cls):
        pass

    @classmethod
    @abstractmethod
    def from_proto(cls, proto):
        pass

    @classmethod
    def from_dictionary(cls, the_dict):
        filtered_dict = {key: value for key, value in the_dict.items() if key in cls._properties()}
        return cls(**filtered_dict)

    def __repr__(self):
        return to_string(self)


def to_string(obj):
    return _ArthurObjectPrinter().to_string(obj)


def get_classname(obj):
    return type(obj).__name__


class _ArthurObjectPrinter(object):
    _MAX_LIST_LEN = 2

    def __init__(self):
        super(_ArthurObjectPrinter, self).__init__()
        self.printer = pprint.PrettyPrinter()

    def to_string(self, obj):
        if isinstance(obj, _MLflowObject):
            return "<%s: %s>" % (get_classname(obj), self._entity_to_string(obj))
        # Handle nested lists inside MLflow entities (e.g. lists of metrics/params)
        if isinstance(obj, list):
            res = [self.to_string(elem) for elem in obj[:self._MAX_LIST_LEN]]
            if len(obj) > self._MAX_LIST_LEN:
                res.append("...")
            return "[%s]" % ", ".join(res)
        return self.printer.pformat(obj)

    def _entity_to_string(self, entity):
        return ", ".join(["%s=%s" % (key, self.to_string(value)) for key, value in entity])

Writing _arthur_object.py


In [8]:
%%file store/file_utils.py
import os
import yaml
import shutil

ENCODING = "utf-8"


def is_directory(name):
    return os.path.isdir(name)


def is_file(name):
    return os.path.isfile(name)


def exists(name):
    return os.path.exists(name)

def build_path(*path_segments):
    """ Returns the path formed by joining the passed-in path segments. """
    return os.path.join(*path_segments)


def mkdir(root, name=None):  # noqa
    """
    Make directory with name "root/name", or just "root" if name is None.
    :param root: Name of parent directory
    :param name: Optional name of leaf directory
    :return: Path to created directory
    """
    target = os.path.join(root, name) if name is not None else root
    try:
        if not exists(target):
            os.makedirs(target)
            return target
    except OSError as e:
        raise e
def list_all(root, filter_func=lambda x: True, full_path=False):
    """
    List all entities directly under 'dir_name' that satisfy 'filter_func'
    :param root: Name of directory to start search
    :param filter_func: function or lambda that takes path
    :param full_path: If True will return results as full path including `root`
    :return: list of all files or directories that satisfy the criteria.
    """
    if not is_directory(root):
        raise Exception("Invalid parent directory '%s'" % root)
    matches = [x for x in os.listdir(root) if filter_func(os.path.join(root, x))]
    return [os.path.join(root, m) for m in matches] if full_path else matches


def list_subdirs(dir_name, full_path=False):
    """
    Equivalent to UNIX command:
      ``find $dir_name -depth 1 -type d``
    :param dir_name: Name of directory to start search
    :param full_path: If True will return results as full path including `root`
    :return: list of all directories directly under 'dir_name'
    """
    return list_all(dir_name, os.path.isdir, full_path)


def list_files(dir_name, full_path=False):
    """
    Equivalent to UNIX command:
      ``find $dir_name -depth 1 -type f``
    :param dir_name: Name of directory to start search
    :param full_path: If True will return results as full path including `root`
    :return: list of all files directly under 'dir_name'
    """
    return list_all(dir_name, os.path.isfile, full_path)

def find(root, name, full_path=False):
    """
    Search for a file in a root directory. Equivalent to:
      ``find $root -name "$name" -depth 1``
    :param root: Name of root directory for find
    :param name: Name of file or directory to find directly under root directory
    :param full_path: If True will return results as full path including `root`
    :return: list of matching files or directories
    """
    path_name = os.path.join(root, name)
    return list_all(root, lambda x: x == path_name, full_path)
        
        
def write_yaml(root, file_name, data, overwrite=False):
    """
    Write dictionary data in yaml format.
    :param root: Directory name.
    :param file_name: Desired file name. Will automatically add .yaml extension if not given
    :param data: data to be dumped as yaml format
    :param overwrite: If True, will overwrite existing files
    """
    if not exists(root):
        raise MissingConfigException("Parent directory '%s' does not exist." % root)

    file_path = os.path.join(root, file_name)
    yaml_file_name = file_path if file_path.endswith(".yaml") else file_path + ".yaml"

    if exists(yaml_file_name) and not overwrite:
        raise Exception("Yaml file '%s' exists as '%s" % (file_path, yaml_file_name))

    try:
        with open(yaml_file_name, 'w') as yaml_file:
            yaml.safe_dump(data, yaml_file, default_flow_style=False, allow_unicode=True)
    except Exception as e:
        raise e


def read_yaml(root, file_name):
    """
    Read data from yaml file and return as dictionary
    :param root: Directory name
    :param file_name: File name. Expects to have '.yaml' extension
    :return: Data in yaml file as dictionary
    """
    if not exists(root):
        raise MissingConfigException(
            "Cannot read '%s'. Parent dir '%s' does not exist." % (file_name, root))

    file_path = os.path.join(root, file_name)
    if not exists(file_path):
        raise MissingConfigException("Yaml file '%s' does not exist." % file_path)

    try:
        with open(file_path, 'r') as yaml_file:
            return yaml.safe_load(yaml_file)
    except Exception as e:
        raise e
def mv(target, new_parent):
    shutil.move(target, new_parent)
    
def write_to(filename, data):
    with codecs.open(filename, mode="w", encoding=ENCODING) as handle:
        handle.write(data)


def append_to(filename, data):
    with open(filename, "a") as handle:
        handle.write(data)


def make_tarfile(output_filename, source_dir, archive_name, custom_filter=None):
    # Helper for filtering out modification timestamps
    def _filter_timestamps(tar_info):
        tar_info.mtime = 0
        return tar_info if custom_filter is None else custom_filter(tar_info)

    unzipped_filename = tempfile.mktemp()
    try:
        with tarfile.open(unzipped_filename, "w") as tar:
            tar.add(source_dir, arcname=archive_name, filter=_filter_timestamps)
        # When gzipping the tar, don't include the tar's filename or modification time in the
        # zipped archive (see https://docs.python.org/3/library/gzip.html#gzip.GzipFile)
        with gzip.GzipFile(filename="", fileobj=open(output_filename, 'wb'), mode='wb', mtime=0)\
                as gzipped_tar, open(unzipped_filename, 'rb') as tar:
            gzipped_tar.write(tar.read())
    finally:
        os.remove(unzipped_filename)


def _copy_project(src_path, dst_path=""):
    """
    Internal function used to copy MLflow project during development.
    Copies the content of the whole directory tree except patterns defined in .dockerignore.
    The MLflow is assumed to be accessible as a local directory in this case.
    :param dst_path: MLflow will be copied here
    :return: name of the MLflow project directory
    """

    def _docker_ignore(mlflow_root):
        docker_ignore = os.path.join(mlflow_root, '.dockerignore')
        patterns = []
        if os.path.exists(docker_ignore):
            with open(docker_ignore, "r") as f:
                patterns = [x.strip() for x in f.readlines()]

        def ignore(_, names):
            import fnmatch
            res = set()
            for p in patterns:
                res.update(set(fnmatch.filter(names, p)))
            return list(res)

        return ignore if patterns else None

    mlflow_dir = "arthur-project"
    # check if we have project root
    assert os.path.isfile(os.path.join(src_path, "setup.py")), "file not found " + str(
        os.path.abspath(os.path.join(src_path, "setup.py")))
    shutil.copytree(src_path, os.path.join(dst_path, mlflow_dir),
                    ignore=_docker_ignore(src_path))
    return mlflow_dir


def _copy_file_or_tree(src, dst, dst_dir=None):
    """
    :return: The path to the copied artifacts, relative to `dst`
    """
    dst_subpath = os.path.basename(os.path.abspath(src))
    if dst_dir is not None:
        dst_subpath = os.path.join(dst_dir, dst_subpath)
    dst_path = os.path.join(dst, dst_subpath)

    dst_dirpath = os.path.dirname(dst_path)
    if not os.path.exists(dst_dirpath):
        os.makedirs(dst_dirpath)

    if os.path.isfile(src):
        shutil.copy(src=src, dst=dst_path)
    else:
        shutil.copytree(src=src, dst=dst_path)
    return dst_subpath


def get_parent_dir(path):
    return os.path.abspath(os.path.join(path, os.pardir))

def list_all_filepaths(absolute_dirpath):
    """Returns all filepaths within dir relative to dir root"""
    return [
        os.path.relpath(os.path.join(dirpath, file), absolute_dirpath)
        for (dirpath, dirnames, filenames) in os.walk(absolute_dirpath)
        for file in filenames
    ]

Writing store/file_utils.py


In [6]:
%%file store/abstract_store.py
from abc import abstractmethod, ABCMeta
from Arthur.core.utils.entities.ViewType import ViewType


class AbstractStore:
    """
    Abstract class for Backend Storage
    This class will define API interface for front ends to connect with various types of backends
    """

    __metaclass__ = ABCMeta

    def __init__(self):
        """
        Empty constructor for now. This is deliberately not marked as abstract, else every
        derived class would be forced to create one.
        """
        pass

Writing store/abstract_store.py


In [5]:
%%file entities/ViewType.py
class ViewType(object):
    """Enum to filter requested experiment types."""
    ACTIVE_ONLY, DELETED_ONLY, ALL = range(1, 4)
    _VIEW_TO_STRING = {
        ACTIVE_ONLY: "active_only",
        DELETED_ONLY: "deleted_only",
        ALL: "all",
    }
    _STRING_TO_VIEW = {value: key for key, value in _VIEW_TO_STRING.items()}

    @classmethod
    def from_string(cls, view_str):
        if view_str not in cls._STRING_TO_VIEW:
            raise Exception(
                "Could not get valid view type corresponding to string %s. "
                "Valid view types are %s" % (view_str, list(cls._STRING_TO_VIEW.keys())))
        return cls._STRING_TO_VIEW[view_str]

    @classmethod
    def to_string(cls, view_type):
        if view_type not in cls._VIEW_TO_STRING:
            raise Exception(
                "Could not get valid view type corresponding to string %s. "
                "Valid view types are %s" % (view_type, list(cls._VIEW_TO_STRING.keys())))
        return cls._VIEW_TO_STRING[view_type]
#ViewType.ALL
#ViewType._STRING_TO_VIEW
#ViewType.from_string('active_only')
#ViewType.to_string(2)

Writing entities/ViewType.py


In [4]:
%%file algo_publish.py
import logging
import os
import subprocess
from submitted_run import LocalSubmittedRun
import shlex

__version__ = '0.0.8'
APIFLY_FUNCTIONS="APIFLY_FUNCTIONS"
APIFLY_TOKEN="APIFLY_TOKEN"
STATIC_PREFIX_ENV_VAR="STATIC_PREFIX_ENV_VAR"

_logger = logging.getLogger(__name__)
def _runServerCmdbase(apifly_function, apifly_token, host, port, workers, static_prefix,gunicorn_opts):
    """
    Run the Arthur api server, wrapping it in gunicorn
    :param static_prefix: If set, the index.html asset will be served from the path static_prefix.
                          If left None, the index.html asset will be served from the root path.
    :return: None
    """
    env_map = {}
    if apifly_function:
        env_map[APIFLY_FUNCTIONS] = apifly_function
    if apifly_token:
        env_map[APIFLY_TOKEN] = apifly_token
    if static_prefix:
        env_map[STATIC_PREFIX_ENV_VAR] = static_prefix
    
    bind_address = "%s:%s" % (host, port)
    
    opts = shlex.split(gunicorn_opts) if gunicorn_opts else []
    exec_cmd(["gunicorn"] + opts + ["-b", bind_address, "-w", "%s" % workers,
                                    "--worker-class","gevent",
                                    "Arthur.core.apiserver.main:app"],
             env=env_map, stream_output=True)
    
class model_publish(object):
    def _run_arthur_run_cmd(self,arthur_run_arr,apifly_function, apifly_token='',static_prefix='',env_map={}):
        """
        Invoke ``arthur run`` in a subprocess, which in turn runs the entry point in a child process.
        Returns a handle to the subprocess. Popen launched to invoke ``arthur run``.
        """
        #final_env = os.environ.copy()
        #env_map = {}
        if apifly_function:
            env_map[APIFLY_FUNCTIONS] = apifly_function
        if apifly_token:
            env_map[APIFLY_TOKEN] = apifly_token
        if static_prefix:
            env_map[STATIC_PREFIX_ENV_VAR] = static_prefix
        cmd_env = os.environ.copy()
        if env_map:
            cmd_env.update(env_map)
        #final_env.update(env_map)
        # Launch `mlflow run` command as the leader of its own process group so that we can do a
        # best-effort cleanup of all its descendant processes if needed
        return subprocess.Popen(
            arthur_run_arr, env=cmd_env, universal_newlines=True, preexec_fn=os.setsid)

    def _build_arthur_run_cmd(self,runpath='', port=None, workers=None, parameters=None,gunicorn_opts=''):
        """
        Build and return an array containing an ``Arthur run`` command that can be invoked to locally
        run the project at the specified URI.
        Run the Arthur api server, wrapping it in gunicorn
    :param static_prefix: If set, the index.html asset will be served from the path static_prefix.
        If left None, the index.html asset will be served from the root path.
    :return: None
    
        """
        if workers is not None:
            #arthur_run_arr.extend(["-w", str(workers)])
            workers=str(workers)
        else:
            #arthur_run_arr.extend(["-w", '2'])
            workers=str(2)
        if port is not None:
            #arthur_run_arr.extend(["-p", str(port)])
            port=port
        else:
            #arthur_run_arr.extend(["-p", '5002']) 
            port ='5002'

        host ="0.0.0.0"
        bind_address = "%s:%s" % (host, port)
        gunicorn_opts+="--chdir %s"%runpath  #"--chdir ../dep_test/"
        opts = shlex.split(gunicorn_opts) if gunicorn_opts else []
        base_arr=["gunicorn"] + opts + ["-b", bind_address, "-w", "%s" % workers,
                                    "--worker-class","gevent",
                                    "Arthur.core.apiserver.main:app"]
        
        #arthur_run_arr = ["Arthur", "apiserver", "-f", apifuncs,"-h","0.0.0.0"]

        #if not use_conda:
        #    pass
            #mlflow_run_arr.append("--no-conda")
        if parameters is not None:
            for key, value in parameters.items():
                base_arr.extend(["-P", "%s=%s" % (key, value)])
        return base_arr#arthur_run_arr
    def _invoke_arthur_run_subprocess(self,apifuncs,experiment_id, run_id,token='',runpath='',port=None, workers=None, parameters=None,\
                                      gunicorn_opts='',static_prefix=''):
        """
        Run an Arthur project asynchronously by invoking ``Arthur run`` in a subprocess, returning
        a SubmittedRun that can be used to query run status.
        """
        _logger.info("=== Asynchronously launching Arthur run with ID %s ===", run_id)
        arthur_run_arr = self._build_arthur_run_cmd(runpath=runpath,port=port, workers=workers,parameters=parameters,\
                                                   gunicorn_opts=gunicorn_opts)
        #print 'arthur_run_arr',arthur_run_arr
        arthur_run_subprocess = self._run_arthur_run_cmd(arthur_run_arr,apifuncs,token,static_prefix=static_prefix)
        #print 'arthur_run_subprocess',arthur_run_subprocess.stdout.read()#打印结果
        return LocalSubmittedRun(run_id, arthur_run_subprocess)
    def run_nohup(self,arthur_run_arr,run_id):
        """
        Run an Arthur project asynchronously by invoking ``Arthur run`` in a subprocess, returning
        a SubmittedRun that can be used to query run status.
        """
        _logger.info("=== Asynchronously launching Arthur run with ID %s ===", run_id)
        arthur_run_subprocess = self._run_arthur_run_cmd(
            [arthur_run_arr])
        return LocalSubmittedRun(run_id, arthur_run_subprocess)

Overwriting algo_publish.py



In [2]:
from Arthur.core.utils.algo_publish import model_publish

In [2]:
publish_class=model_publish()
#x=Spinner()
#x.start()
publish_status = publish_class._invoke_arthur_run_subprocess('fib.fib',1,2,runpath='/Users/leepand/Downloads/recom/python_web/Arthur.io/Arthur/core/entities/base/tests')
#_wait_for(publish_status)
#x.stop()

arthur_run_arr ['gunicorn', '--chdir', '/Users/leepand/Downloads/recom/python_web/Arthur.io/Arthur/core/entities/base/tests', '-b', '0.0.0.0:5002', '-w', '2', '--worker-class', 'gevent', 'Arthur.core.apiserver.main:app']


In [5]:
publish_status.get_status()

NameError: name 'publish_status' is not defined

In [6]:
publish_class=model_publish()

publish_status2 = publish_class._invoke_arthur_run_subprocess('fib.fib',1,2,token='test',port=5005,runpath='/Users/leepand/Downloads/recom/python_web/Arthur.io/Arthur/core/entities/base/tests')




In [8]:
publish_status2.get_status()

('RUNNING', 30617)

In [11]:
import subprocess
pp = subprocess.Popen(['ls','./'], stdout=subprocess.PIPE)
stdout, stderr = pp.communicate()
lines = stdout.decode('ascii').split('\n')
out = dict([[x.strip().rsplit('.')[-1] for x in l.split(':')] for l in lines if l])

ValueError: dictionary update sequence element #0 has length 1; 2 is required

In [12]:
lines

[u'__init__.py',
 u'algo_publish.py',
 u'entities',
 u'env.py',
 u'experiment',
 u'port_for',
 u'spinner.py',
 u'store',
 u'submitted_run.py',
 u'test.ipynb',
 u'token_util.py',
 u'']

In [18]:
from Arthur.core.utils.store.exp_file_process import FileStore
project_path=os.path.abspath('arthur_runs/'+'leepand')
rel_path='arthur_runs/'+'leepand'
#project_path='./'
create_exp=FileStore(root_directory=project_path)

In [19]:
create_exp

<Arthur.core.utils.store.exp_file_process.FileStore at 0x10dcf0750>

In [20]:
import os
projectname='new_leepand'
funcspath='/Users/leepand/Downloads/recom/python_web/Arthur.io/Arthur/core/entities/base/tests'
create_exp.create_experiment(funcspath,projectname)
funcspath_bath = os.path.join(create_exp.get_experiment_by_name(projectname).artifact_location,projectname)

    
        
        #shell_cmd=nohup command > logfile.txt & echo $! > pidfile.txt
        
        
        
        
        
        
        build_shell_cmd=publish_class._build_arthur_run_cmd(apifuncs=self.funclist,token=Token_info,runpath=run_path, port=self.port, workers=None, parameters=None)
        #将执行命令写入项目目录
        codepath=create_exp.get_experiment_by_name(self.projectname).artifact_location
        generated_code_filename = os.path.join(codepath,"run.sh")
        with open(generated_code_filename, "w") as f:
            f.write(' '.join(build_shell_cmd))
        #remarks=self.remark
        #remarks.replaceAll("[^0-9a-zA-Z\u4e00-\u9fa5.，,。？“”]", "");
        #return HjsOrder.order_add(self.cId, self.otype, self.order_tm, self.start_tm, self.end_tm, self.amount, self.cash, self.remark)
        return 
    

In [None]:

def cmdDeploy(projectName,funcsPath,userName,):
 

In [20]:
import os
from Arthur.core.utils.store.exp_file_process import FileStore
from Arthur.core.utils.store.file_utils import list_subdirs
from Arthur.core.utils import  port_for
from Arthur.core.utils.token_util import Connector
from Arthur.core.entities.bean.Arthur_service import ArthurService
from Arthur.core.entities.base.bs_time import get_cur_day
from Arthur.core.entities.bean.hjs_user import HjsUser


def algoInfoAdd(userName,projectName,funcsPath,funcList,algoField=1,port=None):
    _bRet,uId=HjsUser.get_user_uid(userName)
    if not _bRet:
        return False, uId  
    project_path=os.path.abspath('arthur_runs/'+userName)
    rel_path='arthur_runs/'+userName
    create_exp=FileStore(root_directory=project_path)
    #迁移源项目路径至新建路径
    create_exp.create_experiment(funcsPath,projectName)
    Token_gen=Connector.encrypt_token( 1, str(uId), 'session_token')
    Token_info=Token_gen['token']
    funcspath_bath = os.path.join(create_exp.get_experiment_by_name(projectName).artifact_location,projectName)
    funcs_sub=list_subdirs(funcspath_bath)
    run_path=os.path.join(funcspath_bath,funcs_sub[0])#一个项目只有一个主目录-default
    if port is None:
        port=port_for.select_random()
    projecttm = get_cur_day(0, format="%Y-%m-%d %H:%M:%S")
    remark="请到主页编辑添加"
    emailmsg="是"
    opertype="publish"
    projectdesc="暂无描述，请到主页编辑添加"
    tags='Machine Learning'
    ArthurService.service_add(uId,projectName, projectdesc,opertype,rel_path,
                                     funcList,tags,algoField,'0.0.0.0',port,projecttm,emailmsg,str(remark))
    return create_exp,uId

In [16]:
_bRet,algoInfo = ArthurService.algo_proj_info(1,'leepand3')


[28/01/2019 11:03:12][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_exec_cmdstr(106): select * from tb_algo where uid = %s and algoname = %s	[1, 'leepand3']
[28/01/2019 11:03:12][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_new_conn(31): conn(host: 127.0.0.1, port: 3306, user: root, passwd: , db: Arthur_manage)


In [17]:
algoInfo['aid']

24

In [42]:
from Arthur.core.utils.algo_publish import model_publish

def algoDeploy(userName,projectName,funcsPath,funcList):
    create_exp_info,uId = algoInfoAdd(userName,projectName,funcsPath,funcList)
    _bRet,algoInfo = ArthurService.algo_proj_info(uId,projectName)#AlgoName=projectName
    #funcslist=algoInfo['funcslist']
    port=algoInfo['port']
    token=algoInfo['token']
    aId = algoInfo['aid']
    experiment_id=create_exp_info.get_experiment_by_name(projectName).experiment_id
    runId= aId
    funcspath_bath = os.path.join(create_exp_info.get_experiment_by_name(projectName).artifact_location,projectName)        
    funcs_sub=list_subdirs(funcspath_bath)
    run_path=os.path.join(funcspath_bath,funcs_sub[0])
    publish_class=model_publish()
    publish_status = publish_class._invoke_arthur_run_subprocess(funcList,experiment_id,runId,token=token,port=port,runpath=run_path)
    #将pid写入项目目录
    pidPath=create_exp_info.get_experiment_by_name(projectName).artifact_location
    publish_status,pid_info = publish_status.get_status()
    generated_pid_filename = os.path.join(pidPath,"pid.pid")
    with open(generated_pid_filename, "w") as f:
        f.write(str(pid_info))
    wait_result = wait_until_algo_published(publish_status,aId)
    if wait_result:
        return True,'algo deploy success'
    else:
        return False,'algo deploy failed'

In [45]:
projectName='leepand10'
userName="leepand6"
funcsPath='/Users/leepand/Downloads/recom/python_web/Arthur.io/Arthur/core/entities/base/tests'
funcList='fib.fib'
#c = algoInfoAdd(userName,projectName,funcsPath,funcList)
publish,_ = algoDeploy(userName,projectName,funcsPath,funcList)

[28/01/2019 12:24:18][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_exec_cmdstr(106): select * from tb_user where username = %s	['leepand6']
[28/01/2019 12:24:18][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_new_conn(31): conn(host: 127.0.0.1, port: 3306, user: root, passwd: , db: Arthur_manage)
[28/01/2019 12:24:18][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_exec_cmdstr(106): insert into tb_algo(uid, algoname, algodesc,opertype,token,pyfile,funcslist,tags,field,host,port,atype, algo_tm, is_email,remark, insert_tm) values(%s, %s, %s, %s, %s, %s,%s, %s, %s, %s, %s, %s,%s, %s,%s,%s)	[1, 'leepand10', '\xe6\x9a\x82\xe6\x97\xa0\xe6\x8f\x8f\xe8\xbf\xb0\xef\xbc\x8c\xe8\xaf\xb7\xe5\x88\xb0\xe4\xb8\xbb\xe9\xa1\xb5\xe7\xbc\x96\xe8\x

\

[28/01/2019 12:24:40][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_exec_cmdstr(106): update tb_algo set status = %s,insert_tm = %s where aid = %s	['normal', '2019-01-28 12:24:40', 31]
[28/01/2019 12:24:40][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_new_conn(31): conn(host: 127.0.0.1, port: 3306, user: root, passwd: , db: Arthur_manage)




In [46]:
publish

True

In [None]:
def _wait_for(submitted_run_obj):
    """Wait on the passed-in submitted run, reporting its status to the tracking server."""
    run_id = submitted_run_obj.run_id
    active_run = None
    # Note: there's a small chance we fail to report the run's status to the tracking server if
    # we're interrupted before we reach the try block below
    try:
        active_run = tracking.MlflowClient().get_run(run_id) if run_id is not None else None
        if submitted_run_obj.wait():
            _logger.info("=== Run (ID '%s') succeeded ===", run_id)
            _maybe_set_run_terminated(active_run, "FINISHED")
        else:
            _maybe_set_run_terminated(active_run, "FAILED")
            raise ExecutionException("Run (ID '%s') failed" % run_id)
    except KeyboardInterrupt:
        _logger.error("=== Run (ID '%s') interrupted, cancelling run ===", run_id)
        submitted_run_obj.cancel()
        _maybe_set_run_terminated(active_run, "FAILED")
        raise


In [44]:
import itertools
import time
import sys
def wait_until_algo_published(
    publish_status,aId, max_wait_sec=20, interval_sec=0.1
):
    """
    wait util the algo published or timeout
    :param publish_status:
        publish status for wait :RUNNING,FAILED,FINISHED
    :param max_wait_sec:
        max wating time until timeout
    :param interval_sec:
        check interval
    :param recursive:
        recursively search or not
    :return:
        True if found.
    """
    curr_wait_sec = 0
    spinner_generator = itertools.cycle(['-', '/', '|', '\\'])
    while curr_wait_sec < max_wait_sec:
        
        if publish_status=="FAILED" or publish_status=="FINISHED" :
            ArthurService.algo_publish_status_update(aId,'stop')
            return False
        curr_wait_sec += interval_sec
        sys.stdout.write(next(spinner_generator))
        sys.stdout.flush()
        time.sleep(interval_sec)
        sys.stdout.write('\b')  
    ArthurService.algo_publish_status_update(aId,'normal')
    return True


In [34]:
publish

'RUNNING'

In [None]:
import sys
import time
import itertools
import threading


class wait_for_algodeploy:
    def __init__(self, delay=0.1):
        self.spinner_generator = itertools.cycle(['-', '/', '|', '\\'])
        if delay and float(delay): self.delay = delay

    def spinner_task(self):
        while self.busy:
            sys.stdout.write(next(self.spinner_generator))
            sys.stdout.flush()
            time.sleep(self.delay)
            sys.stdout.write('\b')

    def start(self):
        self.busy = True
        threading.Thread(target=self.spinner_task).start()
        return True

    def stop(self):
        self.busy = False
        time.sleep(self.delay)
        return True

In [30]:
import sys
def get_filename(backstep=0):
    """
    Get the file name of the current code line.
    :param backstep:
        will go backward (one layer) from the current function call stack
    """
    return os.path.basename(
        sys._getframe(backstep + 1).f_code.co_filename)  # pylint:disable=W0212
get_filename()

'<ipython-input-30-db63534e46cb>'

In [13]:
c.get_experiment_by_name(projectName).experiment_id

3

In [21]:
funcspath_bath

'/Users/leepand/Downloads/recom/python_web/Arthur.io/Arthur/core/utils/arthur_runs/leepand/0/new_leepand'

In [23]:
create_exp.get_experiment_by_name(projectname).experiment_id

0

In [25]:
from Arthur.core.entities.bean.hjs_user import HjsUser

_bRet,uId=HjsUser.get_user_uid('leepand6')

[28/01/2019 08:46:39][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_exec_cmdstr(106): select * from tb_user where username = %s	['leepand6']
[28/01/2019 08:46:39][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_new_conn(31): conn(host: 127.0.0.1, port: 3306, user: root, passwd: , db: Arthur_manage)


In [26]:
uId

1

In [2]:
from Arthur.core.controller.deploy.driver.arthur_microservice import ArthurMicroserviceDeployDriver
test = ArthurMicroserviceDeployDriver('remore')


In [4]:
funcsPath='/Users/leepand/Downloads/recom/python_web/Arthur.io/Arthur/core/entities/base/tests'
funcList='fib.fib'
token='haha'
test.algoDeployCli(funcsPath,funcList,userName='leepand6',projectName='None',port=None,token=token)

\

[29/01/2019 08:26:49][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_exec_cmdstr(106): select * from tb_user where username = %s	['leepand6']
[29/01/2019 08:26:49][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_new_conn(31): conn(host: 127.0.0.1, port: 3306, user: root, passwd: , db: Arthur_manage)


/

[29/01/2019 08:26:50][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_exec_cmdstr(106): insert into tb_algo(uid, algoname, algodesc,opertype,token,pyfile,funcslist,tags,field,host,port,atype, algo_tm, is_email,remark, insert_tm) values(%s, %s, %s, %s, %s, %s,%s, %s, %s, %s, %s, %s,%s, %s,%s,%s)	[1, 'None', '\xe6\x9a\x82\xe6\x97\xa0\xe6\x8f\x8f\xe8\xbf\xb0\xef\xbc\x8c\xe8\xaf\xb7\xe5\x88\xb0\xe4\xb8\xbb\xe9\xa1\xb5\xe7\xbc\x96\xe8\xbe\x91\xe6\xb7\xbb\xe5\x8a\xa0', 'publish', '20bf7679146eaef99136cde84ccc1eba', 'arthur_runs/leepand6', 'fib.fib', 'Machine Learning', 1, '0.0.0.0', 48322, 'REST', '2019-01-29 08:26:50', '\xe6\x98\xaf', '\xe8\xaf\xb7\xe5\x88\xb0\xe4\xb8\xbb\xe9\xa1\xb5\xe7\xbc\x96\xe8\xbe\x91\xe6\xb7\xbb\xe5\x8a\xa0', '2019-01-29 08:26:50']
[29/01/2019 08:26:50][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py



[29/01/2019 08:26:50][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_exec_cmdstr(106): select * from tb_algo where uid = %s and algoname = %s	[1, 'None']
[29/01/2019 08:26:50][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_new_conn(31): conn(host: 127.0.0.1, port: 3306, user: root, passwd: , db: Arthur_manage)


\

[29/01/2019 08:27:11][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_exec_cmdstr(106): update tb_algo set status = %s,insert_tm = %s where aid = %s	['normal', '2019-01-29 08:27:11', 35]
[29/01/2019 08:27:11][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_new_conn(31): conn(host: 127.0.0.1, port: 3306, user: root, passwd: , db: Arthur_manage)
[29/01/2019 08:27:11][INFO]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/controller/deploy/driver/arthur_microservice.py::wait_until_algo_published(61): ****RESP: algorithm service publish success!
[29/01/2019 08:27:11][INFO]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/controller/deploy/driver/arthur_microservice.py::algoDeployCli(156): algo deploy success
[29/01/2019 08:27:11][INFO]/



(True, 'algo deploy success')

In [3]:
projectName='leepand13'
userName="Boris"
funcsPath='/Users/leepand/Downloads/recom/python_web/Arthur.io/Arthur/core/entities/base/tests'
funcList='fib.fib'
#c = algoInfoAdd(userName,projectName,funcsPath,funcList)
test2 = ArthurMicroserviceDeployDriver('remote')
token='aa'
test2.algoDeployCli(funcsPath,funcList,userName,projectName,port=None,token=token)

[28/01/2019 14:51:36][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_exec_cmdstr(106): select * from tb_user where username = %s	['Boris']
[28/01/2019 14:51:36][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_new_conn(31): conn(host: 127.0.0.1, port: 3306, user: root, passwd: , db: Arthur_manage)
[28/01/2019 14:51:36][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_exec_cmdstr(106): insert into tb_algo(uid, algoname, algodesc,opertype,token,pyfile,funcslist,tags,field,host,port,atype, algo_tm, is_email,remark, insert_tm) values(%s, %s, %s, %s, %s, %s,%s, %s, %s, %s, %s, %s,%s, %s,%s,%s)	[2, 'leepand13', '\xe6\x9a\x82\xe6\x97\xa0\xe6\x8f\x8f\xe8\xbf\xb0\xef\xbc\x8c\xe8\xaf\xb7\xe5\x88\xb0\xe4\xb8\xbb\xe9\xa1\xb5\xe7\xbc\x96\xe8\xbe\

\

[28/01/2019 14:51:57][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_exec_cmdstr(106): update tb_algo set status = %s,insert_tm = %s where aid = %s	['normal', '2019-01-28 14:51:57', 33]
[28/01/2019 14:51:57][DEBUG]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/entities/base/bs_database_pid.py::_new_conn(31): conn(host: 127.0.0.1, port: 3306, user: root, passwd: , db: Arthur_manage)
[28/01/2019 14:51:57][INFO]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/controller/deploy/driver/arthur_microservice.py::wait_until_algo_published(60): ****RESP: algorithm service publish success!
[28/01/2019 14:51:57][INFO]/Users/leepand/anaconda2/lib/python2.7/site-packages/Arthur-2.0.1.dev1-py2.7.egg/Arthur/core/controller/deploy/driver/arthur_microservice.py::algoDeployCli(135): algo deploy success
[28/01/2019 14:51:57][INFO]/



(True, 'algo deploy success')

In [5]:
from Arthur.core.apiserver.client import Client


In [6]:
%%file misc_functions.py
from Arthur.core.entities.base.bs_log import Log
import subprocess
# class for colors
class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
class Commands(object):
    def __init__(self):
        Log.info("handling command cmd script")

    def run_cmd(self, shell_cmd):
        try:
            if type(shell_cmd) is list:
                p = subprocess.Popen(shell_cmd, stdout=subprocess.PIPE)
                out, e = p.communicate()
                Log.info("%s"%
                        (shell_cmd))
                if e:
                    Log.info(e)
                    Log.info(
                        bcolors.FAIL + "error while running the command %s" %
                        (shell_cmd))
                else:
                    return {'output': out, 'status': True}
            else:
                process_returncode = subprocess.Popen(
                    shell_cmd, shell=True).wait()
                Log.info("")
                if process_returncode == 0:
                    return {'status': True}
                else:
                    return {'status': False}
        except Exception as e:
            Log.info(e)
            Log.info(bcolors.FAIL + "error while running the command %s" %
                          (shell_cmd))
            return {'status': False}

Writing misc_functions.py


In [10]:
from Arthur.core.apiserver.client import Client
fibclient=Client('http://127.0.0.01:33520',auth_token="2a9f9670106e9ef9d456491840cc9eba")
fibclient.fib(n=9)

55

In [4]:
class test:
    @staticmethod
    def func1():
        print('1')
    
    @staticmethod
    def func2(e):
        print(e)

def other(func):
    result = func()
    return result


if __name__ == '__main__':
    # print(test.func1('hello'))
    other(test.func2('hello'))

TypeError: func2() takes exactly 2 arguments (1 given)

In [7]:
class test:
    @staticmethod
    def func1(p1):
        return p1
    
    @staticmethod
    def func2(p2):
        return p2

def other(func, func_param):
    exec_statement = 'result=test.' + func + '("' + func_param +'")'
    exec(exec_statement)
    loc = locals()
    return loc['result']

x={'hello':10}
if __name__ == '__main__':
    print(other('func1', x))
    print(other('func2', 'world'))

TypeError: cannot concatenate 'str' and 'dict' objects

In [5]:
%%file dynamically_load_class.py
"""
# https://stackoverflow.com/questions/547829/how-to-dynamically-load-a-python-class
"""
import importlib
import time
#from misc_functions import TraceUsedTime
def get_class_contructor(class_location):
    mod_path = class_location[:class_location.rfind('.')]
    class_name = class_location[class_location.rfind('.') + 1:]
    module = importlib.import_module(mod_path)
    return getattr(module, class_name)

test_de=get_class_contructor("misc_functions.TraceUsedTime")
@test_de(True)
def test():
    time.sleep(3)
test()   

Writing dynamically_load_class.py


In [7]:
%%file json_store.py
import os
import json
import yaml
from io import open
try:
    to_unicode = unicode
except NameError:
    to_unicode = str
try:

    def to_bytes(val):
        return bytes(val)

    to_bytes("test")
except TypeError:

    def to_bytes(val):
        return bytes(val, "utf-8")

    to_bytes("test")

from Arthur.core.utils.exceptions import (SaveSettingError, FileIOError)


class JSONStore():
    # TODO:  add file locking
    # https://stackoverflow.com/questions/186202/what-is-the-best-way-to-open-a-file-for-exclusive-access-in-python
    # Alternatives to JSON??
    # https://martin-thoma.com/configuration-files-in-python/
    def __init__(self, filepath, initial_dict={}):
        self.filepath = filepath
        # Ensure filepath directories exist
        directory = os.path.dirname(filepath)
        if not os.path.exists(directory):
            os.makedirs(directory)
        # save initial dictionary
        if initial_dict:
            self.to_file(initial_dict)
        # keep file in memory until a write occurs
        self.in_memory_settings = False

    def to_file(self, dictionary):
        with open(self.filepath, "wb") as outfile:
            str_ = json.dumps(
                dictionary,
                indent=4,
                sort_keys=True,
                separators=(',', ': '),
                ensure_ascii=False)
            outfile.write(to_bytes(str_))
        return

    def save(self, key, value):
        self.in_memory_settings = False
        settings_dict = {}
        if not os.path.exists(self.filepath):
            open(self.filepath, 'w+').close()
        else:
            settings_dict = json.load(open(self.filepath, 'r'))
        settings_dict[key] = value
        with open(self.filepath, "wb") as outfile:
            str_ = json.dumps(
                settings_dict,
                indent=4,
                sort_keys=True,
                separators=(',', ': '),
                ensure_ascii=False)
            outfile.write(to_bytes(str_))
        return

    def get(self, name):
        if self.in_memory_settings and name in self.in_memory_settings:
            return self.in_memory_settings[name]

        if not os.path.exists(self.filepath):
            return None

        with open(self.filepath) as settings_file:
            try:
                settings = json.load(settings_file)
                # save in memory
                self.in_memory_settings = settings
            except Exception as err:
                raise SaveSettingError(err)
            if name in settings:
                return settings[name]
            else:
                return None

    def remove(self, name):
        if not os.path.exists(self.filepath):
            return None
        else:
            settings_dict = json.load(open(self.filepath, 'r'))
        settings_dict.pop(name, None)
        with open(self.filepath, "wb") as outfile:
            str_ = json.dumps(
                settings_dict,
                indent=4,
                sort_keys=True,
                separators=(',', ': '),
                ensure_ascii=False)
            outfile.write(to_bytes(str_))
        return

    def to_dict(self):
        output_dict = dict()
        # reading json file
        if os.path.exists(self.filepath):
            with open(self.filepath) as data_file:
                meta_data_string = data_file.read()
            if not meta_data_string:
                return {}
            try:
                output_dict = json.loads(meta_data_string)
                output_dict = yaml.safe_load(json.dumps(output_dict))
            except Exception as err:
                raise FileIOError(err)
            return output_dict

Writing json_store.py


In [6]:
%%file exceptions.py
#!/usr/bin/python
#from datmo.core.util.i18n import get as __


class ArgumentError(Exception):
    pass


class TaskNotComplete(ArgumentError):
    pass


class TaskNoCommandGiven(ArgumentError):
    pass


class TaskInteractiveDetachError(ArgumentError):
    pass


class SnapshotCreateFromTaskArgs(ArgumentError):
    pass


class RequiredArgumentMissing(ArgumentError):
    pass


class GitUrlArgumentError(ArgumentError):
    pass


class TooManyArgumentsFound(ArgumentError):
    pass


class ProjectException(Exception):
    pass


class InvalidProjectPath(ProjectException):
    pass


class ProjectNotInitialized(ProjectException):
    pass


class DatmoModelNotInitialized(ProjectException):
    pass


class InvalidOperation(Exception):
    pass


class ClassMethodNotFound(Exception):
    pass


class CLIArgumentError(ArgumentError):
    pass


class UnrecognizedCLIArgument(CLIArgumentError):
    pass


class InvalidArgumentType(ArgumentError):
    pass


class MutuallyExclusiveArguments(ArgumentError):
    pass


class ValidationSchemaMissing(ArgumentError):
    pass


class IncorrectType(Exception):
    pass


class InputError(Exception):
    pass


class EntityNotFound(Exception):
    pass


class MoreThanOneEntityFound(Exception):
    pass


class EntityCollectionNotFound(Exception):
    pass


class SaveSettingError(Exception):
    pass


class FileExecutionError(Exception):
    pass


class FileAlreadyExistsError(Exception):
    pass


class DirAlreadyExistsError(Exception):
    pass


class DoesNotExist(Exception):
    pass


class CodeDoesNotExist(DoesNotExist):
    pass


class EnvironmentDoesNotExist(DoesNotExist):
    pass


class SnapshotDoesNotExist(DoesNotExist):
    pass


class PathDoesNotExist(FileExecutionError):
    def __init__(self, file_path=None):
        self.file_path = file_path

    def __str__(self):
        if self.file_path:
            return "Path being passed doesn't exist: %s" % self.file_path
        else:
            return "Path being passed doesn't exist"


class LoggingPathDoesNotExist(PathDoesNotExist):
    pass


class FileIOError(FileExecutionError):
    pass


class FileStructureError(FileExecutionError):
    pass


class FileNotInitialized(FileExecutionError):
    pass


class DALException(Exception):
    pass


class DALNotInitialized(DALException):
    pass


class EnvironmentException(Exception):
    pass


class EnvironmentImageNotFound(EnvironmentException):
    pass


class EnvironmentContainerNotFound(EnvironmentException):
    pass


class EnvironmentExecutionError(EnvironmentException):
    pass


class EnvironmentRequirementsCreateError(EnvironmentException):
    pass


class EnvironmentInitFailed(EnvironmentExecutionError):
    pass


class EnvironmentConnectFailed(EnvironmentExecutionError):
    pass


class EnvironmentNotConnected(EnvironmentExecutionError):
    pass


class EnvironmentNotInitialized(EnvironmentExecutionError):
    pass


class TaskRunError(EnvironmentException):
    pass


class GPUSupportNotEnabled(EnvironmentExecutionError):
    pass


class CodeException(Exception):
    pass


class CodeNotInitialized(CodeException):
    pass


class GitExecutionError(CodeException):
    pass


class CommitDoesNotExist(CodeException):
    pass


class CommitFailed(CodeException):
    pass


class InvalidDestinationName(ArgumentError):
    pass


class ValidationFailed(ArgumentError):
    def __init__(self, error_obj):
        self.errors = error_obj
        super(ValidationFailed, self).__init__(
            __("error", "exception.validationfailed", self.get_error_str()))

    def get_error_str(self):
        err_str = ''
        for name in self.errors:
            err_str += "'%s': %s\n" % (name, self.errors[name])
        return err_str


class DatmoFolderInWorkTree(CodeException):
    pass


class UnstagedChanges(Exception):
    def __str__(self):
        return "Unstaged changes exists. Create a snapshot to remove any unstaged changes"


class NothingToStage(Exception):
    pass

Writing exceptions.py
