mlflow · aarondav · May 7, 2019 · May 2, 2019 · May 2, 2019 · May 2, 2019
diff --git a/.travis.yml b/.travis.yml
@@ -53,6 +53,8 @@ matrix:
       after_success:
         - export COVR_RUNNING=true
         - Rscript -e 'covr::codecov()'
+      after_failure:
+        - "[ -r /home/travis/build/mlflow/mlflow/mlflow/R/mlflow/mlflow.Rcheck/00check.log ] && cat /home/travis/build/mlflow/mlflow/mlflow/R/mlflow/mlflow.Rcheck/00check.log"
     - language: java
       name: "Java"
       install:

diff --git a/mlflow/R/mlflow/R/databricks-utils.R b/mlflow/R/mlflow/R/databricks-utils.R
@@ -95,8 +95,7 @@ get_databricks_config <- function(profile) {
   config
 }
 
-mlflow_get_run_context.mlflow_databricks_client <- function(client, source_name, source_version,
-                                                            source_type, experiment_id, ...) {
+mlflow_get_run_context.mlflow_databricks_client <- function(client, experiment_id, ...) {
   if (exists(".databricks_internals")) {
     notebook_info <- do.call(".get_notebook_info", list(), envir = get(".databricks_internals",
                                                                        envir = .GlobalEnv))
@@ -105,11 +104,11 @@ mlflow_get_run_context.mlflow_databricks_client <- function(client, source_name,
       tags[[MLFLOW_DATABRICKS_TAGS$MLFLOW_DATABRICKS_NOTEBOOK_ID]] <- notebook_info$id
       tags[[MLFLOW_DATABRICKS_TAGS$MLFLOW_DATABRICKS_NOTEBOOK_PATH]] <- notebook_info$path
       tags[[MLFLOW_DATABRICKS_TAGS$MLFLOW_DATABRICKS_WEBAPP_URL]] <- notebook_info$webapp_url
+      tags[[MLFLOW_TAGS$MLFLOW_SOURCE_NAME]] <- notebook_info$path
+      tags[[MLFLOW_TAGS$MLFLOW_SOURCE_VERSION]] <- get_source_version()
+      tags[[MLFLOW_TAGS$MLFLOW_SOURCE_TYPE]] <- MLFLOW_SOURCE_TYPE$NOTEBOOK
       list(
         client = client,
-        source_version = source_version %||% get_source_version(),
-        source_type =  MLFLOW_SOURCE_TYPE$NOTEBOOK,
-        source_name = notebook_info$path,
         tags = tags,
         experiment_id = experiment_id %||% notebook_info$id,
         ...

diff --git a/mlflow/R/mlflow/R/tracking-runs.R b/mlflow/R/mlflow/R/tracking-runs.R
@@ -29,9 +29,8 @@ mlflow_log_metric <- function(key, value, timestamp = NULL, run_id = NULL, clien
 
 
 
-mlflow_create_run <- function(user_id = NULL, run_name = NULL, source_type = NULL,
-                              source_name = NULL, entry_point_name = NULL, start_time = NULL,
-                              source_version = NULL, tags = NULL, experiment_id = NULL, client) {
+mlflow_create_run <- function(user_id = NULL, start_time = NULL, tags = NULL,
+                              experiment_id = NULL, client) {
   experiment_id <- resolve_experiment_id(experiment_id)
   tags <- if (!is.null(tags)) tags %>%
     purrr::imap(~ list(key = .y, value = .x)) %>%
@@ -46,12 +45,7 @@ mlflow_create_run <- function(user_id = NULL, run_name = NULL, source_type = NUL
     data = list(
       experiment_id = experiment_id,
       user_id = user_id,
-      run_name = run_name,
-      source_type = source_type,
-      source_name = source_name,
-      entry_point_name = entry_point_name,
       start_time = start_time,
-      source_version = source_version,
       tags = tags
     )
   )
@@ -463,7 +457,6 @@ mlflow_log_artifact <- function(path, artifact_path = NULL, run_id = NULL, clien
 #' @param entry_point_name Optional name of the entry point for to the current run.
 #' @param source_type Integer enum value describing the type of the run  ("local", "project", etc.).
 #' @param user_id User ID or LDAP for the user executing the run. Only used when `client` is specified.
-#' @param run_name Human readable name for run. Only used when `client` is specified.
 #' @param start_time Unix timestamp of when the run started in milliseconds. Only used when `client` is specified.
 #' @param tags Additional metadata for run in key-value pairs. Only used when `client` is specified.
 #' @template roxlate-client
@@ -476,29 +469,24 @@ mlflow_log_artifact <- function(path, artifact_path = NULL, run_id = NULL, clien
 #' }
 #'
 #' @export
-mlflow_start_run <- function(run_id = NULL, experiment_id = NULL, source_name = NULL,
-                             source_version = NULL, entry_point_name = NULL,
-                             source_type = NULL, user_id = NULL, run_name = NULL, start_time = NULL,
-                             tags = NULL, client = NULL) {
+mlflow_start_run <- function(run_id = NULL, experiment_id = NULL, user_id = NULL,
+                             start_time = NULL, tags = NULL, client = NULL) {
 
   # When `client` is provided, this function acts as a wrapper for `runs/create` and does not register
   #  an active run.
   if (!is.null(client)) {
     if (!is.null(run_id)) stop("`run_id` should not be specified when `client` is specified.", call. = FALSE)
-    run <- mlflow_create_run(client = client, user_id = user_id, run_name = run_name, source_type = source_type,
-                             source_name = source_name, entry_point_name = entry_point_name, start_time = start_time,
-                             source_version = source_version, tags = tags, experiment_id = experiment_id)
+    run <- mlflow_create_run(client = client, user_id = user_id, start_time = start_time,
+                             tags = tags, experiment_id = experiment_id)
     return(run)
   }
 
   # Fluent mode, check to see if extraneous params passed.
 
   if (!is.null(user_id)) stop("`user_id` should only be specified when `client` is specified.", call. = FALSE)
-  if (!is.null(run_name)) stop("`run_name` should only be specified when `client` is specified.", call. = FALSE)
   if (!is.null(start_time)) stop("`start_time` should only be specified when `client` is specified.", call. = FALSE)
   if (!is.null(tags)) stop("`tags` should only be specified when `client` is specified.", call. = FALSE)
 
-  source_type <- source_type %||% "LOCAL"
   active_run_id <- mlflow_get_active_run_id()
   if (!is.null(active_run_id)) {
     stop("Run with UUID ", active_run_id, " is already active.",
@@ -522,11 +510,7 @@ mlflow_start_run <- function(run_id = NULL, experiment_id = NULL, source_name =
 
     args <- mlflow_get_run_context(
       client,
-      experiment_id = experiment_id,
-      source_name = source_name,
-      source_version = source_version,
-      entry_point_name = entry_point_name,
-      source_type = source_type
+      experiment_id = experiment_id
     )
     do.call(mlflow_create_run, args)
   }
@@ -539,12 +523,14 @@ mlflow_get_run_context <- function(client, ...) {
   UseMethod("mlflow_get_run_context")
 }
 
-mlflow_get_run_context.default <- function(client, source_name, source_version, experiment_id,
-                                           ...) {
+mlflow_get_run_context.default <- function(client, experiment_id, ...) {
+  tags <- list()
+  tags[[MLFLOW_TAGS$MLFLOW_SOURCE_NAME]] <- get_source_name()
+  tags[[MLFLOW_TAGS$MLFLOW_SOURCE_VERSION]] <- get_source_version()
+  tags[[MLFLOW_TAGS$MLFLOW_SOURCE_TYPE]] <- MLFLOW_SOURCE_TYPE$LOCAL
   list(
     client = client,
-    source_name = source_name %||% get_source_name(),
-    source_version = source_version %||% get_source_version(),
+    tags = tags,
     experiment_id = experiment_id %||% 0,
     ...
   )
@@ -586,3 +572,9 @@ mlflow_end_run <- function(status = c("FINISHED", "SCHEDULED", "FAILED", "KILLED
   if (identical(run_id, active_run_id)) mlflow_set_active_run_id(NULL)
   run
 }
+
+MLFLOW_TAGS <- list(
+  MLFLOW_SOURCE_NAME = "mlflow.source.name",
+  MLFLOW_SOURCE_VERSION = "mlflow.source.version",
+  MLFLOW_SOURCE_TYPE = "mlflow.source.type"
+)
diff --git a/mlflow/R/mlflow/R/tracking-utils.R b/mlflow/R/mlflow/R/tracking-utils.R
@@ -109,11 +109,11 @@ mlflow_user <- function() {
 }
 
 MLFLOW_SOURCE_TYPE <- list(
-  NOTEBOOK = 1,
-  JOB = 2,
-  PROJECT = 3,
-  LOCAL = 4,
-  UNKNOWN = 5
+  NOTEBOOK = "NOTEBOOK",
+  JOB = "JOB",
+  PROJECT = "PROJECT",
+  LOCAL = "LOCAL",
+  UNKNOWN = "UNKNOWN"
 )
 
 resolve_client_and_run_id <- function(client, run_id) {

diff --git a/mlflow/R/mlflow/tests/testthat/helpers.R b/mlflow/R/mlflow/tests/testthat/helpers.R
@@ -1,6 +1,6 @@
 mlflow_clear_test_dir <- function(path) {
   purrr::safely(mlflow_end_run)()
-  mlflow_set_active_experiment_id(NULL)
+  mlflow:::mlflow_set_active_experiment_id(NULL)
   if (dir.exists(path)) {
     unlink(path, recursive = TRUE)
   }

diff --git a/mlflow/R/mlflow/tests/testthat/test-tracking-runs.R b/mlflow/R/mlflow/tests/testthat/test-tracking-runs.R
@@ -54,8 +54,8 @@ test_that("logging functionality", {
 
   run <- mlflow_get_run()
   run_id <- run$run_uuid
-  expect_identical(run$tags[[1]]$key, "tag_key")
-  expect_identical(run$tags[[1]]$value, "tag_value")
+  tags <- run$tags[[1]]
+  expect_identical("tag_value", tags$value[tags$key == "tag_key"])
   expect_identical(run$params[[1]]$key, "param_key")
   expect_identical(run$params[[1]]$value, "param_value")
 
@@ -162,15 +162,8 @@ test_that("mlflow_log_batch() works", {
     c("adam", "0.01")
   )
 
-  expect_setequal(
-    tags$key,
-    c("model_type", "data_year")
-  )
-
-  expect_setequal(
-    tags$value,
-    c("regression", "2015")
-  )
+  expect_identical("regression", tags$value[tags$key == "model_type"])
+  expect_identical("2015", tags$value[tags$key == "data_year"])
 })
 
 test_that("mlflow_log_batch() works with timestamp", {

diff --git a/mlflow/entities/run_info.py b/mlflow/entities/run_info.py
@@ -22,17 +22,12 @@ class RunInfo(_MLflowObject):
     Metadata about a run.
     """
 
-    def __init__(self, run_uuid, experiment_id, name, source_type, source_name,
-                 entry_point_name, user_id, status, start_time, end_time, source_version,
+    def __init__(self, run_uuid, experiment_id, user_id, status, start_time, end_time,
                  lifecycle_stage, artifact_uri=None, run_id=None):
+        if run_uuid is None:
+            raise Exception("run_uuid cannot be None")
         if experiment_id is None:
             raise Exception("experiment_id cannot be None")
-        if name is None:
-            raise Exception("name cannot be None")
-        if source_type is None:
-            raise Exception("source_type cannot be None")
-        if source_name is None:
-            raise Exception("source_name cannot be None")
         if user_id is None:
             raise Exception("user_id cannot be None")
         if status is None:
@@ -45,15 +40,10 @@ def __init__(self, run_uuid, experiment_id, name, source_type, source_name,
         self._run_uuid = actual_run_id
         self._run_id = actual_run_id
         self._experiment_id = experiment_id
-        self._name = name
-        self._source_type = source_type
-        self._source_name = source_name
-        self._entry_point_name = entry_point_name
         self._user_id = user_id
         self._status = status
         self._start_time = start_time
         self._end_time = end_time
-        self._source_version = source_version
         self._lifecycle_stage = lifecycle_stage
         self._artifact_uri = artifact_uri
 
@@ -89,31 +79,6 @@ def experiment_id(self):
         """String ID of the experiment for the current run."""
         return self._experiment_id
 
-    @property
-    def name(self):
-        """String name of the run."""
-        return self._name
-
-    @property
-    def source_type(self):
-        """
-        :py:class:`mlflow.entities.SourceType` describing the source of the run.
-        """
-        return self._source_type
-
-    @property
-    def source_name(self):
-        """
-        String name of the source of the run (GitHub URI of the project corresponding to the run,
-        etc).
-        """
-        return self._source_name
-
-    @property
-    def entry_point_name(self):
-        """String name of the entry point for the run."""
-        return self._entry_point_name
-
     @property
     def user_id(self):
         """String ID of the user who initiated this run."""
@@ -137,11 +102,6 @@ def end_time(self):
         """End time of the run, in number of milliseconds since the UNIX epoch."""
         return self._end_time
 
-    @property
-    def source_version(self):
-        """String Git commit hash of the code used for the run, if available."""
-        return self._source_version
-
     @property
     def artifact_uri(self):
         """String root artifact URI of the run."""
@@ -156,18 +116,11 @@ def to_proto(self):
         proto.run_uuid = self.run_uuid
         proto.run_id = self.run_id
         proto.experiment_id = self.experiment_id
-        proto.name = self.name
-        proto.source_type = self.source_type
-        proto.source_name = self.source_name
-        if self.entry_point_name:
-            proto.entry_point_name = self.entry_point_name
         proto.user_id = self.user_id
         proto.status = self.status
         proto.start_time = self.start_time
         if self.end_time:
             proto.end_time = self.end_time
-        if self.source_version:
-            proto.source_version = self.source_version
         if self.artifact_uri:
             proto.artifact_uri = self.artifact_uri
         proto.lifecycle_stage = self.lifecycle_stage
@@ -181,8 +134,6 @@ def from_proto(cls, proto):
         if end_time == 0:
             end_time = None
         return cls(run_uuid=proto.run_uuid, run_id=proto.run_id, experiment_id=proto.experiment_id,
-                   name=proto.name, source_type=proto.source_type, source_name=proto.source_name,
-                   entry_point_name=proto.entry_point_name, user_id=proto.user_id,
-                   status=proto.status, start_time=proto.start_time, end_time=end_time,
-                   source_version=proto.source_version, lifecycle_stage=proto.lifecycle_stage,
+                   user_id=proto.user_id, status=proto.status, start_time=proto.start_time,
+                   end_time=end_time, lifecycle_stage=proto.lifecycle_stage,
                    artifact_uri=proto.artifact_uri)