Skip to content

Commit

Permalink
Increase maximum param value length to 500 (#6358)
Browse files Browse the repository at this point in the history
* increase validation of maximum param value length from 250 to 500 (squashed)

Signed-off-by: Jan Sindlar <jan.sindlar@eureka.ai>

* 1. added test for exceeding max length. 2. lint. 3. validation logic addresses null

Signed-off-by: Jan Sindlar <jan.sindlar@eureka.ai>

* file store should test already stringified params

Signed-off-by: Jan Sindlar <jan.sindlar@eureka.ai>

* update alembic chain

Signed-off-by: Jan Sindlar <jan.sindlar@eureka.ai>

* Autoformat: https://github.com/mlflow/mlflow/actions/runs/2800398158

Signed-off-by: mlflow-automation <mlflow-automation@users.noreply.github.com>

* Update tests/store/tracking/test_file_store.py

Co-authored-by: Harutaka Kawamura <hkawamura0130@gmail.com>
Signed-off-by: Jan Sindlar <jan.sindlar@eureka.ai>

Co-authored-by: Jan Sindlar <jan.sindlar@eureka.ai>
Co-authored-by: mlflow-automation <mlflow-automation@users.noreply.github.com>
Co-authored-by: Harutaka Kawamura <hkawamura0130@gmail.com>
  • Loading branch information
4 people committed Aug 5, 2022
1 parent 80564db commit d4109d0
Show file tree
Hide file tree
Showing 14 changed files with 97 additions and 13 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""change param value length to 500
Revision ID: cc1f77228345
Revises: 0c779009ac13
Create Date: 2022-08-04 22:40:56.960003
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "cc1f77228345"
down_revision = "0c779009ac13"
branch_labels = None
depends_on = None


def upgrade():
"""
Enlarge the maximum param value length to 500.
"""
with op.batch_alter_table("params") as batch_op:
batch_op.alter_column(
"value",
existing_type=sa.String(250),
type_=sa.String(500),
existing_nullable=False,
nullable=False,
)


def downgrade():
pass
4 changes: 2 additions & 2 deletions mlflow/store/tracking/dbmodels/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,9 +420,9 @@ class SqlParam(Base):
"""
Param key: `String` (limit 250 characters). Part of *Primary Key* for ``params`` table.
"""
value = Column(String(250), nullable=False)
value = Column(String(500), nullable=False)
"""
Param value: `String` (limit 250 characters). Defined as *Non-null* in schema.
Param value: `String` (limit 500 characters). Defined as *Non-null* in schema.
"""
run_uuid = Column(String(32), ForeignKey("runs.run_uuid"))
"""
Expand Down
3 changes: 2 additions & 1 deletion mlflow/store/tracking/file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
_validate_metric,
_validate_metric_name,
_validate_param_name,
_validate_param,
_validate_run_id,
_validate_tag_name,
_validate_experiment_id,
Expand Down Expand Up @@ -857,7 +858,7 @@ def _writeable_value(self, tag_value):

def log_param(self, run_id, param):
_validate_run_id(run_id)
_validate_param_name(param.key)
_validate_param(param.key, param.value)
run_info = self._get_run_info(run_id)
check_run_is_active(run_info)
self._log_run_param(run_info, param)
Expand Down
2 changes: 2 additions & 0 deletions mlflow/store/tracking/sqlalchemy_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
_validate_tag,
_validate_list_experiments_max_results,
_validate_param_keys_unique,
_validate_param,
_validate_experiment_name,
)
from mlflow.utils.mlflow_tags import MLFLOW_LOGGED_MODELS
Expand Down Expand Up @@ -891,6 +892,7 @@ def get_metric_history(self, run_id, metric_key):
return [metric.to_mlflow_entity() for metric in metrics]

def log_param(self, run_id, param):
_validate_param(param.key, param.value)
with self.ManagedSessionMaker() as session:
run = self._get_run(run_uuid=run_id, session=session)
self._check_run_is_active(run)
Expand Down
2 changes: 1 addition & 1 deletion mlflow/tracking/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,7 @@ def log_param(self, run_id: str, key: str, value: Any) -> None:
All backend stores support keys up to length 250, but some may
support larger keys.
:param value: Parameter value (string, but will be string-ified if not).
All backend stores support values up to length 250, but some
All backend stores support values up to length 500, but some
may support larger values.
.. code-block:: python
Expand Down
2 changes: 1 addition & 1 deletion mlflow/tracking/fluent.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ def log_param(key: str, value: Any) -> None:
All backend stores support keys up to length 250, but some may
support larger keys.
:param value: Parameter value (string, but will be string-ified if not).
All backend stores support values up to length 250, but some
All backend stores support values up to length 500, but some
may support larger values.
.. code-block:: python
Expand Down
4 changes: 2 additions & 2 deletions mlflow/utils/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
MAX_METRICS_PER_BATCH = 1000
MAX_ENTITIES_PER_BATCH = 1000
MAX_BATCH_LOG_REQUEST_SIZE = int(1e6)
MAX_PARAM_VAL_LENGTH = 250
MAX_PARAM_VAL_LENGTH = 500
MAX_TAG_VAL_LENGTH = 5000
MAX_EXPERIMENT_TAG_KEY_LENGTH = 250
MAX_EXPERIMENT_TAG_VAL_LENGTH = 5000
Expand Down Expand Up @@ -265,7 +265,7 @@ def _validate_tag_name(name):


def _validate_length_limit(entity_name, limit, value):
if len(value) > limit:
if value is not None and len(value) > limit:
raise MlflowException(
"%s '%s' had length %s, which exceeded length limit of %s"
% (entity_name, value[:250], len(value), limit),
Expand Down
2 changes: 1 addition & 1 deletion tests/db/schemas/mssql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ CREATE TABLE model_version_tags (

CREATE TABLE params (
key VARCHAR(250) COLLATE "SQL_Latin1_General_CP1_CI_AS" NOT NULL,
value VARCHAR(250) COLLATE "SQL_Latin1_General_CP1_CI_AS" NOT NULL,
value VARCHAR(500) COLLATE "SQL_Latin1_General_CP1_CI_AS" NOT NULL,
run_uuid VARCHAR(32) COLLATE "SQL_Latin1_General_CP1_CI_AS" NOT NULL,
CONSTRAINT param_pk PRIMARY KEY (key, run_uuid),
CONSTRAINT "FK__params__run_uuid__45F365D3" FOREIGN KEY(run_uuid) REFERENCES runs (run_uuid)
Expand Down
2 changes: 1 addition & 1 deletion tests/db/schemas/mysql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ CREATE TABLE model_version_tags (

CREATE TABLE params (
key VARCHAR(250) NOT NULL,
value VARCHAR(250) NOT NULL,
value VARCHAR(500) NOT NULL,
run_uuid VARCHAR(32) NOT NULL,
PRIMARY KEY (key, run_uuid),
CONSTRAINT params_ibfk_1 FOREIGN KEY(run_uuid) REFERENCES runs (run_uuid)
Expand Down
2 changes: 1 addition & 1 deletion tests/db/schemas/postgresql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ CREATE TABLE model_version_tags (

CREATE TABLE params (
key VARCHAR(250) NOT NULL,
value VARCHAR(250) NOT NULL,
value VARCHAR(500) NOT NULL,
run_uuid VARCHAR(32) NOT NULL,
CONSTRAINT param_pk PRIMARY KEY (key, run_uuid),
CONSTRAINT params_run_uuid_fkey FOREIGN KEY(run_uuid) REFERENCES runs (run_uuid)
Expand Down
4 changes: 2 additions & 2 deletions tests/db/schemas/sqlite.sql
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ CREATE TABLE runs (
experiment_id INTEGER,
CONSTRAINT run_pk PRIMARY KEY (run_uuid),
FOREIGN KEY(experiment_id) REFERENCES experiments (experiment_id),
CONSTRAINT runs_lifecycle_stage CHECK (lifecycle_stage IN ('active', 'deleted')),
CONSTRAINT source_type CHECK (source_type IN ('NOTEBOOK', 'JOB', 'LOCAL', 'UNKNOWN', 'PROJECT')),
CONSTRAINT runs_lifecycle_stage CHECK (lifecycle_stage IN ('active', 'deleted')),
CHECK (status IN ('SCHEDULED', 'FAILED', 'FINISHED', 'RUNNING', 'KILLED'))
)

Expand Down Expand Up @@ -123,7 +123,7 @@ CREATE TABLE model_version_tags (

CREATE TABLE params (
key VARCHAR(250) NOT NULL,
value VARCHAR(250) NOT NULL,
value VARCHAR(500) NOT NULL,
run_uuid VARCHAR(32) NOT NULL,
CONSTRAINT param_pk PRIMARY KEY (key, run_uuid),
FOREIGN KEY(run_uuid) REFERENCES runs (run_uuid)
Expand Down
2 changes: 1 addition & 1 deletion tests/resources/db/latest_schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ CREATE TABLE model_version_tags (

CREATE TABLE params (
key VARCHAR(250) NOT NULL,
value VARCHAR(250) NOT NULL,
value VARCHAR(500) NOT NULL,
run_uuid VARCHAR(32) NOT NULL,
CONSTRAINT param_pk PRIMARY KEY (key, run_uuid),
FOREIGN KEY(run_uuid) REFERENCES runs (run_uuid)
Expand Down
26 changes: 26 additions & 0 deletions tests/store/tracking/test_file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -951,6 +951,17 @@ def test_log_param_enforces_value_immutability(self):
run = fs.get_run(run_id)
assert run.data.params[param_name] == "value1"

def test_log_param_max_length_value(self):
param_name = "new param"
param_value = "x" * 500
fs = FileStore(self.test_root)
run_id = self.exp_data[FileStore.DEFAULT_EXPERIMENT_ID]["runs"][0]
fs.log_param(run_id, Param(param_name, param_value))
run = fs.get_run(run_id)
assert run.data.params[param_name] == param_value
with pytest.raises(MlflowException, match="exceeded length"):
fs.log_param(run_id, Param(param_name, "x" * 1000))

def test_weird_metric_names(self):
WEIRD_METRIC_NAME = "this is/a weird/but valid metric"
fs = FileStore(self.test_root)
Expand Down Expand Up @@ -1201,6 +1212,21 @@ def _create_run(self, fs):
experiment_id=FileStore.DEFAULT_EXPERIMENT_ID, user_id="user", start_time=0, tags=[]
)

def test_log_batch_max_length_value(self):
param_entities = [Param("long param", "x" * 500), Param("short param", "xyz")]
expected_param_entities = [
Param("long param", "x" * 500),
Param("short param", "xyz"),
]
fs = FileStore(self.test_root)
run = self._create_run(fs)
fs.log_batch(run.info.run_id, (), param_entities, ())
self._verify_logged(fs, run.info.run_id, (), expected_param_entities, ())

with pytest.raises(MlflowException, match="exceeded length"):
param_entities = [Param("long param", "x" * 1000), Param("short param", "xyz")]
fs.log_batch(run.info.run_id, (), param_entities, ())

def test_log_batch_internal_error(self):
# Verify that internal errors during log_batch result in MlflowExceptions
fs = FileStore(self.test_root)
Expand Down
21 changes: 21 additions & 0 deletions tests/store/tracking/test_sqlalchemy_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1069,6 +1069,17 @@ def test_log_null_param(self):
self.store.log_param(run.info.run_id, param)
assert exception_context.exception.error_code == ErrorCode.Name(BAD_REQUEST)

def test_log_param_max_length_value(self):
run = self._run_factory()
tkey = "blahmetric"
tval = "x" * 500
param = entities.Param(tkey, tval)
self.store.log_param(run.info.run_id, param)
run = self.store.get_run(run.info.run_id)
assert run.data.params[tkey] == str(tval)
with pytest.raises(MlflowException, match="exceeded length"):
self.store.log_param(run.info.run_id, entities.Param(tkey, "x" * 1000))

def test_set_experiment_tag(self):
exp_id = self._experiment_factory("setExperimentTagExp")
tag = entities.ExperimentTag("tag0", "value0")
Expand Down Expand Up @@ -2077,6 +2088,16 @@ def test_log_batch_null_metrics(self):
self.store.log_batch(run.info.run_id, metrics=metrics, params=[], tags=[])
assert exception_context.exception.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE)

def test_log_batch_params_max_length_value(self):
run = self._run_factory()
param_entities = [Param("long param", "x" * 500), Param("short param", "xyz")]
expected_param_entities = [Param("long param", "x" * 500), Param("short param", "xyz")]
self.store.log_batch(run.info.run_id, [], param_entities, [])
self._verify_logged(self.store, run.info.run_id, [], expected_param_entities, [])
with pytest.raises(MlflowException, match="exceeded length"):
param_entities = [Param("long param", "x" * 1000)]
self.store.log_batch(run.info.run_id, [], param_entities, [])

def test_upgrade_cli_idempotence(self):
# Repeatedly run `mlflow db upgrade` against our database, verifying that the command
# succeeds and that the DB has the latest schema
Expand Down

0 comments on commit d4109d0

Please sign in to comment.