diff --git a/mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py b/mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py new file mode 100644 index 0000000000000..84adbc0e549d4 --- /dev/null +++ b/mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py @@ -0,0 +1,34 @@ +"""change param value length to 500 + +Revision ID: cc1f77228345 +Revises: 0c779009ac13 +Create Date: 2022-08-04 22:40:56.960003 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "cc1f77228345" +down_revision = "0c779009ac13" +branch_labels = None +depends_on = None + + +def upgrade(): + """ + Enlarge the maximum param value length to 500. + """ + with op.batch_alter_table("params") as batch_op: + batch_op.alter_column( + "value", + existing_type=sa.String(250), + type_=sa.String(500), + existing_nullable=False, + nullable=False, + ) + + +def downgrade(): + pass diff --git a/mlflow/store/tracking/dbmodels/models.py b/mlflow/store/tracking/dbmodels/models.py index d9bfdd87ae62f..c513430a45f8a 100644 --- a/mlflow/store/tracking/dbmodels/models.py +++ b/mlflow/store/tracking/dbmodels/models.py @@ -420,9 +420,9 @@ class SqlParam(Base): """ Param key: `String` (limit 250 characters). Part of *Primary Key* for ``params`` table. """ - value = Column(String(250), nullable=False) + value = Column(String(500), nullable=False) """ - Param value: `String` (limit 250 characters). Defined as *Non-null* in schema. + Param value: `String` (limit 500 characters). Defined as *Non-null* in schema. """ run_uuid = Column(String(32), ForeignKey("runs.run_uuid")) """ diff --git a/mlflow/store/tracking/file_store.py b/mlflow/store/tracking/file_store.py index 003c944f97f13..ab92b2630b0e0 100644 --- a/mlflow/store/tracking/file_store.py +++ b/mlflow/store/tracking/file_store.py @@ -41,6 +41,7 @@ _validate_metric, _validate_metric_name, _validate_param_name, + _validate_param, _validate_run_id, _validate_tag_name, _validate_experiment_id, @@ -857,7 +858,7 @@ def _writeable_value(self, tag_value): def log_param(self, run_id, param): _validate_run_id(run_id) - _validate_param_name(param.key) + _validate_param(param.key, param.value) run_info = self._get_run_info(run_id) check_run_is_active(run_info) self._log_run_param(run_info, param) diff --git a/mlflow/store/tracking/sqlalchemy_store.py b/mlflow/store/tracking/sqlalchemy_store.py index eca59e44e5fc8..e798519392ef1 100644 --- a/mlflow/store/tracking/sqlalchemy_store.py +++ b/mlflow/store/tracking/sqlalchemy_store.py @@ -51,6 +51,7 @@ _validate_tag, _validate_list_experiments_max_results, _validate_param_keys_unique, + _validate_param, _validate_experiment_name, ) from mlflow.utils.mlflow_tags import MLFLOW_LOGGED_MODELS @@ -891,6 +892,7 @@ def get_metric_history(self, run_id, metric_key): return [metric.to_mlflow_entity() for metric in metrics] def log_param(self, run_id, param): + _validate_param(param.key, param.value) with self.ManagedSessionMaker() as session: run = self._get_run(run_uuid=run_id, session=session) self._check_run_is_active(run) diff --git a/mlflow/tracking/client.py b/mlflow/tracking/client.py index 00981aac277bf..ceab624f53235 100644 --- a/mlflow/tracking/client.py +++ b/mlflow/tracking/client.py @@ -808,7 +808,7 @@ def log_param(self, run_id: str, key: str, value: Any) -> None: All backend stores support keys up to length 250, but some may support larger keys. :param value: Parameter value (string, but will be string-ified if not). - All backend stores support values up to length 250, but some + All backend stores support values up to length 500, but some may support larger values. .. code-block:: python diff --git a/mlflow/tracking/fluent.py b/mlflow/tracking/fluent.py index d5a588861f74b..7a0262bfe8574 100644 --- a/mlflow/tracking/fluent.py +++ b/mlflow/tracking/fluent.py @@ -524,7 +524,7 @@ def log_param(key: str, value: Any) -> None: All backend stores support keys up to length 250, but some may support larger keys. :param value: Parameter value (string, but will be string-ified if not). - All backend stores support values up to length 250, but some + All backend stores support values up to length 500, but some may support larger values. .. code-block:: python diff --git a/mlflow/utils/validation.py b/mlflow/utils/validation.py index 29103b3499a78..727b6fe867605 100644 --- a/mlflow/utils/validation.py +++ b/mlflow/utils/validation.py @@ -28,7 +28,7 @@ MAX_METRICS_PER_BATCH = 1000 MAX_ENTITIES_PER_BATCH = 1000 MAX_BATCH_LOG_REQUEST_SIZE = int(1e6) -MAX_PARAM_VAL_LENGTH = 250 +MAX_PARAM_VAL_LENGTH = 500 MAX_TAG_VAL_LENGTH = 5000 MAX_EXPERIMENT_TAG_KEY_LENGTH = 250 MAX_EXPERIMENT_TAG_VAL_LENGTH = 5000 @@ -265,7 +265,7 @@ def _validate_tag_name(name): def _validate_length_limit(entity_name, limit, value): - if len(value) > limit: + if value is not None and len(value) > limit: raise MlflowException( "%s '%s' had length %s, which exceeded length limit of %s" % (entity_name, value[:250], len(value), limit), diff --git a/tests/db/schemas/mssql.sql b/tests/db/schemas/mssql.sql index 2e34acafd86d2..f70e22f4ba1b1 100644 --- a/tests/db/schemas/mssql.sql +++ b/tests/db/schemas/mssql.sql @@ -115,7 +115,7 @@ CREATE TABLE model_version_tags ( CREATE TABLE params ( key VARCHAR(250) COLLATE "SQL_Latin1_General_CP1_CI_AS" NOT NULL, - value VARCHAR(250) COLLATE "SQL_Latin1_General_CP1_CI_AS" NOT NULL, + value VARCHAR(500) COLLATE "SQL_Latin1_General_CP1_CI_AS" NOT NULL, run_uuid VARCHAR(32) COLLATE "SQL_Latin1_General_CP1_CI_AS" NOT NULL, CONSTRAINT param_pk PRIMARY KEY (key, run_uuid), CONSTRAINT "FK__params__run_uuid__45F365D3" FOREIGN KEY(run_uuid) REFERENCES runs (run_uuid) diff --git a/tests/db/schemas/mysql.sql b/tests/db/schemas/mysql.sql index 8ce5065e41916..3aa74d8d501eb 100644 --- a/tests/db/schemas/mysql.sql +++ b/tests/db/schemas/mysql.sql @@ -122,7 +122,7 @@ CREATE TABLE model_version_tags ( CREATE TABLE params ( key VARCHAR(250) NOT NULL, - value VARCHAR(250) NOT NULL, + value VARCHAR(500) NOT NULL, run_uuid VARCHAR(32) NOT NULL, PRIMARY KEY (key, run_uuid), CONSTRAINT params_ibfk_1 FOREIGN KEY(run_uuid) REFERENCES runs (run_uuid) diff --git a/tests/db/schemas/postgresql.sql b/tests/db/schemas/postgresql.sql index 5c8c3992a6c2c..8c8b013ac9625 100644 --- a/tests/db/schemas/postgresql.sql +++ b/tests/db/schemas/postgresql.sql @@ -120,7 +120,7 @@ CREATE TABLE model_version_tags ( CREATE TABLE params ( key VARCHAR(250) NOT NULL, - value VARCHAR(250) NOT NULL, + value VARCHAR(500) NOT NULL, run_uuid VARCHAR(32) NOT NULL, CONSTRAINT param_pk PRIMARY KEY (key, run_uuid), CONSTRAINT params_run_uuid_fkey FOREIGN KEY(run_uuid) REFERENCES runs (run_uuid) diff --git a/tests/db/schemas/sqlite.sql b/tests/db/schemas/sqlite.sql index 6508702864dc6..51a8c0777a286 100644 --- a/tests/db/schemas/sqlite.sql +++ b/tests/db/schemas/sqlite.sql @@ -79,8 +79,8 @@ CREATE TABLE runs ( experiment_id INTEGER, CONSTRAINT run_pk PRIMARY KEY (run_uuid), FOREIGN KEY(experiment_id) REFERENCES experiments (experiment_id), - CONSTRAINT runs_lifecycle_stage CHECK (lifecycle_stage IN ('active', 'deleted')), CONSTRAINT source_type CHECK (source_type IN ('NOTEBOOK', 'JOB', 'LOCAL', 'UNKNOWN', 'PROJECT')), + CONSTRAINT runs_lifecycle_stage CHECK (lifecycle_stage IN ('active', 'deleted')), CHECK (status IN ('SCHEDULED', 'FAILED', 'FINISHED', 'RUNNING', 'KILLED')) ) @@ -123,7 +123,7 @@ CREATE TABLE model_version_tags ( CREATE TABLE params ( key VARCHAR(250) NOT NULL, - value VARCHAR(250) NOT NULL, + value VARCHAR(500) NOT NULL, run_uuid VARCHAR(32) NOT NULL, CONSTRAINT param_pk PRIMARY KEY (key, run_uuid), FOREIGN KEY(run_uuid) REFERENCES runs (run_uuid) diff --git a/tests/resources/db/latest_schema.sql b/tests/resources/db/latest_schema.sql index 928dd92b33db0..da776a6e73e27 100644 --- a/tests/resources/db/latest_schema.sql +++ b/tests/resources/db/latest_schema.sql @@ -123,7 +123,7 @@ CREATE TABLE model_version_tags ( CREATE TABLE params ( key VARCHAR(250) NOT NULL, - value VARCHAR(250) NOT NULL, + value VARCHAR(500) NOT NULL, run_uuid VARCHAR(32) NOT NULL, CONSTRAINT param_pk PRIMARY KEY (key, run_uuid), FOREIGN KEY(run_uuid) REFERENCES runs (run_uuid) diff --git a/tests/store/tracking/test_file_store.py b/tests/store/tracking/test_file_store.py index 8ecd00a5299e2..6f395c563cb5b 100644 --- a/tests/store/tracking/test_file_store.py +++ b/tests/store/tracking/test_file_store.py @@ -951,6 +951,17 @@ def test_log_param_enforces_value_immutability(self): run = fs.get_run(run_id) assert run.data.params[param_name] == "value1" + def test_log_param_max_length_value(self): + param_name = "new param" + param_value = "x" * 500 + fs = FileStore(self.test_root) + run_id = self.exp_data[FileStore.DEFAULT_EXPERIMENT_ID]["runs"][0] + fs.log_param(run_id, Param(param_name, param_value)) + run = fs.get_run(run_id) + assert run.data.params[param_name] == param_value + with pytest.raises(MlflowException, match="exceeded length"): + fs.log_param(run_id, Param(param_name, "x" * 1000)) + def test_weird_metric_names(self): WEIRD_METRIC_NAME = "this is/a weird/but valid metric" fs = FileStore(self.test_root) @@ -1201,6 +1212,21 @@ def _create_run(self, fs): experiment_id=FileStore.DEFAULT_EXPERIMENT_ID, user_id="user", start_time=0, tags=[] ) + def test_log_batch_max_length_value(self): + param_entities = [Param("long param", "x" * 500), Param("short param", "xyz")] + expected_param_entities = [ + Param("long param", "x" * 500), + Param("short param", "xyz"), + ] + fs = FileStore(self.test_root) + run = self._create_run(fs) + fs.log_batch(run.info.run_id, (), param_entities, ()) + self._verify_logged(fs, run.info.run_id, (), expected_param_entities, ()) + + with pytest.raises(MlflowException, match="exceeded length"): + param_entities = [Param("long param", "x" * 1000), Param("short param", "xyz")] + fs.log_batch(run.info.run_id, (), param_entities, ()) + def test_log_batch_internal_error(self): # Verify that internal errors during log_batch result in MlflowExceptions fs = FileStore(self.test_root) diff --git a/tests/store/tracking/test_sqlalchemy_store.py b/tests/store/tracking/test_sqlalchemy_store.py index 384fda3d3ba9b..5dcdb6c5fcf5f 100644 --- a/tests/store/tracking/test_sqlalchemy_store.py +++ b/tests/store/tracking/test_sqlalchemy_store.py @@ -1069,6 +1069,17 @@ def test_log_null_param(self): self.store.log_param(run.info.run_id, param) assert exception_context.exception.error_code == ErrorCode.Name(BAD_REQUEST) + def test_log_param_max_length_value(self): + run = self._run_factory() + tkey = "blahmetric" + tval = "x" * 500 + param = entities.Param(tkey, tval) + self.store.log_param(run.info.run_id, param) + run = self.store.get_run(run.info.run_id) + assert run.data.params[tkey] == str(tval) + with pytest.raises(MlflowException, match="exceeded length"): + self.store.log_param(run.info.run_id, entities.Param(tkey, "x" * 1000)) + def test_set_experiment_tag(self): exp_id = self._experiment_factory("setExperimentTagExp") tag = entities.ExperimentTag("tag0", "value0") @@ -2077,6 +2088,16 @@ def test_log_batch_null_metrics(self): self.store.log_batch(run.info.run_id, metrics=metrics, params=[], tags=[]) assert exception_context.exception.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE) + def test_log_batch_params_max_length_value(self): + run = self._run_factory() + param_entities = [Param("long param", "x" * 500), Param("short param", "xyz")] + expected_param_entities = [Param("long param", "x" * 500), Param("short param", "xyz")] + self.store.log_batch(run.info.run_id, [], param_entities, []) + self._verify_logged(self.store, run.info.run_id, [], expected_param_entities, []) + with pytest.raises(MlflowException, match="exceeded length"): + param_entities = [Param("long param", "x" * 1000)] + self.store.log_batch(run.info.run_id, [], param_entities, []) + def test_upgrade_cli_idempotence(self): # Repeatedly run `mlflow db upgrade` against our database, verifying that the command # succeeds and that the DB has the latest schema