diff --git a/.gitignore b/.gitignore index 6f6361ef706..9453e1114b4 100644 --- a/.gitignore +++ b/.gitignore @@ -22,4 +22,4 @@ result*.html tests/test_results/ venv mlrun/utils/version/version.json -mlrun/api/migrations/mlrun.db +mlrun/api/migrations_sqlite/mlrun.db diff --git a/Makefile b/Makefile index ba41a98cefe..79231d4f7d1 100644 --- a/Makefile +++ b/Makefile @@ -69,14 +69,37 @@ install-requirements: ## Install all requirements needed for development -r dockerfiles/mlrun-api/requirements.txt \ -r docs/requirements.txt -.PHONY: create-migration -create-migration: export MLRUN_HTTPDB__DSN="sqlite:///$(PWD)/mlrun/api/migrations/mlrun.db?check_same_thread=false" -create-migration: ## Create a DB migration (MLRUN_MIGRATION_MESSAGE must be set) +.PHONY: create-migration-sqlite +create-migration-sqlite: export MLRUN_HTTPDB__DSN="sqlite:///$(PWD)/mlrun/api/migrations/mlrun.db?check_same_thread=false" +create-migration-sqlite: ## Create a DB migration (MLRUN_MIGRATION_MESSAGE must be set) ifndef MLRUN_MIGRATION_MESSAGE $(error MLRUN_MIGRATION_MESSAGE is undefined) endif - alembic -c ./mlrun/api/alembic.ini upgrade head - alembic -c ./mlrun/api/alembic.ini revision --autogenerate -m "$(MLRUN_MIGRATION_MESSAGE)" + alembic -c ./mlrun/api/alembic_sqlite.ini upgrade head + alembic -c ./mlrun/api/alembic_sqlite.ini revision --autogenerate -m "$(MLRUN_MIGRATION_MESSAGE)" + +.PHONY: create-migration-mysql +create-migration-mysql: export MLRUN_HTTPDB__DSN="mysql+pymysql://root:pass@localhost:3306/mlrun" +create-migration-mysql: ## Create a DB migration (MLRUN_MIGRATION_MESSAGE must be set) +ifndef MLRUN_MIGRATION_MESSAGE + $(error MLRUN_MIGRATION_MESSAGE is undefined) +endif + docker run \ + --name=migration-db \ + --rm \ + -v $(pwd):/mlrun \ + -p 3306:3306 \ + -e MYSQL_ROOT_PASSWORD="pass" \ + -e MYSQL_ROOT_HOST=% \ + -e MYSQL_DATABASE="mlrun" \ + -d \ + mysql/mysql-server:5.7 \ + --character-set-server=utf8 \ + --collation-server=utf8_bin + alembic -c ./mlrun/api/alembic_mysql.ini upgrade head + alembic -c ./mlrun/api/alembic_mysql.ini revision --autogenerate -m "$(MLRUN_MIGRATION_MESSAGE)" + docker kill migration-db + docker rm migration-db .PHONY: bump-version bump-version: ## Bump version in all needed places in code @@ -489,7 +512,7 @@ test-migrations: clean ## Run mlrun db migrations tests --durations=100 \ -rf \ --test-alembic \ - migrations/tests/* + migrations_sqlite/tests/* .PHONY: test-system-dockerized test-system-dockerized: build-test-system ## Run mlrun system tests in docker container diff --git a/dockerfiles/mlrun-api/requirements.txt b/dockerfiles/mlrun-api/requirements.txt index 73120968302..03ae51266b8 100644 --- a/dockerfiles/mlrun-api/requirements.txt +++ b/dockerfiles/mlrun-api/requirements.txt @@ -3,3 +3,4 @@ dask-kubernetes~=0.11.0 # 3.0 iguazio system is running k8s 1.17 so ideally we would use 17.X, but kfp limiting to <12.0 kubernetes-asyncio~=11.0 apscheduler~=3.6 +sqlite3-to-mysql~=1.4 diff --git a/mlrun/api/alembic.ini b/mlrun/api/alembic.ini index 1a591ea6a02..4e6efb1848a 100644 --- a/mlrun/api/alembic.ini +++ b/mlrun/api/alembic.ini @@ -2,7 +2,7 @@ [alembic] # path to migration scripts -script_location = %(here)s/migrations +script_location = %(here)s/migrations_sqlite # template used to generate migration files # file_template = %%(rev)s_%%(slug)s diff --git a/mlrun/api/alembic_mysql.ini b/mlrun/api/alembic_mysql.ini new file mode 100644 index 00000000000..0c923b54160 --- /dev/null +++ b/mlrun/api/alembic_mysql.ini @@ -0,0 +1,86 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = %(here)s/migrations_mysql + +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# timezone to use when rendering the date +# within the migration file as well as the filename. +# string value is passed to dateutil.tz.gettz() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; this defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path +# version_locations = %(here)s/bar %(here)s/bat alembic/versions + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# this is overridden anyway in the alembic env.py by what ever is configured in mlrun.mlconf +sqlalchemy.url = sqlite:///test.db + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks=black +# black.type=console_scripts +# black.entrypoint=black +# black.options=-l 79 + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/mlrun/api/db/sqldb/models.py b/mlrun/api/db/sqldb/models.py index 2cbb1f4098c..47afc49a626 100644 --- a/mlrun/api/db/sqldb/models.py +++ b/mlrun/api/db/sqldb/models.py @@ -33,6 +33,7 @@ from sqlalchemy.orm import class_mapper, relationship from mlrun.api import schemas +from mlrun.api.utils.db.sql_collation import SQLCollationUtil Base = declarative_base() NULL = None # Avoid flake8 issuing warnings when comparing in filter @@ -81,8 +82,8 @@ class Label(Base, BaseModel): ) id = Column(Integer, primary_key=True) - name = Column(String) - value = Column(String) + name = Column(String(255, collation=SQLCollationUtil.collation())) + value = Column(String(255, collation=SQLCollationUtil.collation())) parent = Column(Integer, ForeignKey(f"{table}.id")) return Label @@ -96,8 +97,8 @@ class Tag(Base, BaseModel): ) id = Column(Integer, primary_key=True) - project = Column(String) - name = Column(String) + project = Column(String(255, collation=SQLCollationUtil.collation())) + name = Column(String(255, collation=SQLCollationUtil.collation())) obj_id = Column(Integer, ForeignKey(f"{table}.id")) return Tag @@ -113,10 +114,13 @@ class Tag(Base, BaseModel): ) id = Column(Integer, primary_key=True) - project = Column(String) - name = Column(String) + project = Column(String(255, collation=SQLCollationUtil.collation())) + name = Column(String(255, collation=SQLCollationUtil.collation())) obj_id = Column(Integer, ForeignKey(f"{table}.id")) - obj_name = Column(String, ForeignKey(f"{table}.name")) + obj_name = Column( + String(255, collation=SQLCollationUtil.collation()), + ForeignKey(f"{table}.name"), + ) return Tag @@ -135,9 +139,9 @@ class Artifact(Base, HasStruct): Tag = make_tag(__tablename__) id = Column(Integer, primary_key=True) - key = Column(String) - project = Column(String) - uid = Column(String) + key = Column(String(255, collation=SQLCollationUtil.collation())) + project = Column(String(255, collation=SQLCollationUtil.collation())) + uid = Column(String(255, collation=SQLCollationUtil.collation())) updated = Column(TIMESTAMP) # TODO: change to JSON, see mlrun/api/schemas/function.py::FunctionState for reasoning body = Column(BLOB) @@ -156,9 +160,9 @@ class Function(Base, HasStruct): Tag = make_tag_v2(__tablename__) id = Column(Integer, primary_key=True) - name = Column(String) - project = Column(String) - uid = Column(String) + name = Column(String(255, collation=SQLCollationUtil.collation())) + project = Column(String(255, collation=SQLCollationUtil.collation())) + uid = Column(String(255, collation=SQLCollationUtil.collation())) # TODO: change to JSON, see mlrun/api/schemas/function.py::FunctionState for reasoning body = Column(BLOB) updated = Column(TIMESTAMP) @@ -171,8 +175,8 @@ class Log(Base, BaseModel): __tablename__ = "logs" id = Column(Integer, primary_key=True) - uid = Column(String) - project = Column(String) + uid = Column(String(255, collation=SQLCollationUtil.collation())) + project = Column(String(255, collation=SQLCollationUtil.collation())) # TODO: change to JSON, see mlrun/api/schemas/function.py::FunctionState for reasoning body = Column(BLOB) @@ -189,10 +193,10 @@ class Run(Base, HasStruct): Tag = make_tag(__tablename__) id = Column(Integer, primary_key=True) - uid = Column(String) - project = Column(String) + uid = Column(String(255, collation=SQLCollationUtil.collation())) + project = Column(String(255, collation=SQLCollationUtil.collation())) iteration = Column(Integer) - state = Column(String) + state = Column(String(255, collation=SQLCollationUtil.collation())) # TODO: change to JSON, see mlrun/api/schemas/function.py::FunctionState for reasoning body = Column(BLOB) start_time = Column(TIMESTAMP) @@ -208,14 +212,18 @@ class Schedule(Base, BaseModel): Label = make_label(__tablename__) id = Column(Integer, primary_key=True) - project = Column(String, nullable=False) - name = Column(String, nullable=False) - kind = Column(String) - desired_state = Column(String) - state = Column(String) + project = Column( + String(255, collation=SQLCollationUtil.collation()), nullable=False + ) + name = Column( + String(255, collation=SQLCollationUtil.collation()), nullable=False + ) + kind = Column(String(255, collation=SQLCollationUtil.collation())) + desired_state = Column(String(255, collation=SQLCollationUtil.collation())) + state = Column(String(255, collation=SQLCollationUtil.collation())) creation_time = Column(TIMESTAMP) - cron_trigger_str = Column(String) - last_run_uri = Column(String) + cron_trigger_str = Column(String(255, collation=SQLCollationUtil.collation())) + last_run_uri = Column(String(255, collation=SQLCollationUtil.collation())) # TODO: change to JSON, see mlrun/api/schemas/function.py::FunctionState for reasoning struct = Column(BLOB) labels = relationship(Label, cascade="all, delete-orphan") @@ -253,7 +261,7 @@ class User(Base, BaseModel): __table_args__ = (UniqueConstraint("name", name="_users_uc"),) id = Column(Integer, primary_key=True) - name = Column(String) + name = Column(String(255, collation=SQLCollationUtil.collation())) class Project(Base, BaseModel): __tablename__ = "projects" @@ -261,16 +269,16 @@ class Project(Base, BaseModel): __table_args__ = (UniqueConstraint("name", name="_projects_uc"),) id = Column(Integer, primary_key=True) - name = Column(String) - description = Column(String) - owner = Column(String) - source = Column(String) + name = Column(String(255, collation=SQLCollationUtil.collation())) + description = Column(String(255, collation=SQLCollationUtil.collation())) + owner = Column(String(255, collation=SQLCollationUtil.collation())) + source = Column(String(255, collation=SQLCollationUtil.collation())) # the attribute name used to be _spec which is just a wrong naming, the attribute was renamed to _full_object # leaving the column as is to prevent redundant migration # TODO: change to JSON, see mlrun/api/schemas/function.py::FunctionState for reasoning _full_object = Column("spec", BLOB) created = Column(TIMESTAMP, default=datetime.utcnow) - state = Column(String) + state = Column(String(255, collation=SQLCollationUtil.collation())) users = relationship(User, secondary=project_users) Label = make_label(__tablename__) @@ -294,8 +302,8 @@ class Feature(Base, BaseModel): id = Column(Integer, primary_key=True) feature_set_id = Column(Integer, ForeignKey("feature_sets.id")) - name = Column(String) - value_type = Column(String) + name = Column(String(255, collation=SQLCollationUtil.collation())) + value_type = Column(String(255, collation=SQLCollationUtil.collation())) Label = make_label(__tablename__) labels = relationship(Label, cascade="all, delete-orphan") @@ -308,8 +316,8 @@ class Entity(Base, BaseModel): id = Column(Integer, primary_key=True) feature_set_id = Column(Integer, ForeignKey("feature_sets.id")) - name = Column(String) - value_type = Column(String) + name = Column(String(255, collation=SQLCollationUtil.collation())) + value_type = Column(String(255, collation=SQLCollationUtil.collation())) Label = make_label(__tablename__) labels = relationship(Label, cascade="all, delete-orphan") @@ -324,12 +332,12 @@ class FeatureSet(Base, BaseModel): ) id = Column(Integer, primary_key=True) - name = Column(String) - project = Column(String) + name = Column(String(255, collation=SQLCollationUtil.collation())) + project = Column(String(255, collation=SQLCollationUtil.collation())) created = Column(TIMESTAMP, default=datetime.now(timezone.utc)) updated = Column(TIMESTAMP, default=datetime.now(timezone.utc)) - state = Column(String) - uid = Column(String) + state = Column(String(255, collation=SQLCollationUtil.collation())) + uid = Column(String(255, collation=SQLCollationUtil.collation())) _full_object = Column("object", JSON) @@ -360,12 +368,12 @@ class FeatureVector(Base, BaseModel): ) id = Column(Integer, primary_key=True) - name = Column(String) - project = Column(String) + name = Column(String(255, collation=SQLCollationUtil.collation())) + project = Column(String(255, collation=SQLCollationUtil.collation())) created = Column(TIMESTAMP, default=datetime.now(timezone.utc)) updated = Column(TIMESTAMP, default=datetime.now(timezone.utc)) - state = Column(String) - uid = Column(String) + state = Column(String(255, collation=SQLCollationUtil.collation())) + uid = Column(String(255, collation=SQLCollationUtil.collation())) _full_object = Column("object", JSON) @@ -391,7 +399,7 @@ class MarketplaceSource(Base, BaseModel): __table_args__ = (UniqueConstraint("name", name="_marketplace_sources_uc"),) id = Column(Integer, primary_key=True) - name = Column(String) + name = Column(String(255, collation=SQLCollationUtil.collation())) index = Column(Integer) created = Column(TIMESTAMP, default=datetime.now(timezone.utc)) updated = Column(TIMESTAMP, default=datetime.now(timezone.utc)) diff --git a/mlrun/api/initial_data.py b/mlrun/api/initial_data.py index 67e7a55cdf0..30868997932 100644 --- a/mlrun/api/initial_data.py +++ b/mlrun/api/initial_data.py @@ -15,19 +15,29 @@ from mlrun.config import config from mlrun.utils import logger -from .utils.alembic import AlembicUtil +from .utils.db.alembic import AlembicUtil +from .utils.db.mysql import MySQLUtil +from .utils.db.sqlite_migration import SQLiteMigrationUtil def init_data(from_scratch: bool = False) -> None: logger.info("Creating initial data") + alembic_config_file_name = "alembic.ini" + if MySQLUtil.get_mysql_dsn_data(): + alembic_config_file_name = "alembic_mysql.ini" + # run schema migrations on existing DB or create it with alembic dir_path = pathlib.Path(os.path.dirname(os.path.realpath(__file__))) - alembic_config_path = dir_path / "alembic.ini" + alembic_config_path = dir_path / alembic_config_file_name alembic_util = AlembicUtil(alembic_config_path) alembic_util.init_alembic(from_scratch=from_scratch) + if not from_scratch: + sqlite_migration_util = SQLiteMigrationUtil() + sqlite_migration_util.transfer() + db_session = create_session() try: init_db(db_session) diff --git a/mlrun/api/migrations/env.py b/mlrun/api/migrations_mysql/env.py similarity index 100% rename from mlrun/api/migrations/env.py rename to mlrun/api/migrations_mysql/env.py diff --git a/mlrun/api/migrations/script.py.mako b/mlrun/api/migrations_mysql/script.py.mako similarity index 100% rename from mlrun/api/migrations/script.py.mako rename to mlrun/api/migrations_mysql/script.py.mako diff --git a/mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py b/mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py new file mode 100644 index 00000000000..69a13c2c903 --- /dev/null +++ b/mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py @@ -0,0 +1,656 @@ +"""init + +Revision ID: c4af40b0bf61 +Revises: +Create Date: 2021-09-30 10:55:51.956636 + +""" +import sqlalchemy as sa +from alembic import op + +from mlrun.api.utils.db.sql_collation import SQLCollationUtil + +# revision identifiers, used by Alembic. +revision = "c4af40b0bf61" +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "artifacts", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "key", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "project", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "uid", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("updated", sa.TIMESTAMP(), nullable=True), + sa.Column("body", sa.BLOB(), nullable=True), + sa.PrimaryKeyConstraint("id", name="_artifacts_pk"), + sa.UniqueConstraint("uid", "project", "key", name="_artifacts_uc"), + ) + op.create_table( + "feature_sets", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "project", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("created", sa.TIMESTAMP(), nullable=True), + sa.Column("updated", sa.TIMESTAMP(), nullable=True), + sa.Column( + "state", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "uid", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("object", sa.JSON(), nullable=True), + sa.PrimaryKeyConstraint("id", name="_feature_sets_pk"), + sa.UniqueConstraint("name", "project", "uid", name="_feature_set_uc"), + ) + op.create_table( + "feature_vectors", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "project", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("created", sa.TIMESTAMP(), nullable=True), + sa.Column("updated", sa.TIMESTAMP(), nullable=True), + sa.Column( + "state", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "uid", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("object", sa.JSON(), nullable=True), + sa.PrimaryKeyConstraint("id", name="_feature_vectors_pk"), + sa.UniqueConstraint("name", "project", "uid", name="_feature_vectors_uc"), + ) + op.create_table( + "functions", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "project", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "uid", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("body", sa.BLOB(), nullable=True), + sa.Column("updated", sa.TIMESTAMP(), nullable=True), + sa.PrimaryKeyConstraint("id", name="_functions_pk"), + sa.UniqueConstraint("name", "project", "uid", name="_functions_uc"), + ) + op.create_table( + "logs", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "uid", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "project", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("body", sa.BLOB(), nullable=True), + sa.PrimaryKeyConstraint("id", name="_logs_pk"), + ) + op.create_table( + "marketplace_sources", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("index", sa.Integer(), nullable=True), + sa.Column("created", sa.TIMESTAMP(), nullable=True), + sa.Column("updated", sa.TIMESTAMP(), nullable=True), + sa.Column("object", sa.JSON(), nullable=True), + sa.PrimaryKeyConstraint("id", name="_marketplace_sources_pk"), + sa.UniqueConstraint("name", name="_marketplace_sources_uc"), + ) + op.create_table( + "projects", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "description", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "owner", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "source", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("spec", sa.BLOB(), nullable=True), + sa.Column("created", sa.TIMESTAMP(), nullable=True), + sa.Column( + "state", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.PrimaryKeyConstraint("id", name="_projects_pk"), + sa.UniqueConstraint("name", name="_projects_uc"), + ) + op.create_table( + "runs", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "uid", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "project", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("iteration", sa.Integer(), nullable=True), + sa.Column( + "state", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("body", sa.BLOB(), nullable=True), + sa.Column("start_time", sa.TIMESTAMP(), nullable=True), + sa.PrimaryKeyConstraint("id", name="_runs_pk"), + sa.UniqueConstraint("uid", "project", "iteration", name="_runs_uc"), + ) + op.create_table( + "schedules_v2", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "project", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=False, + ), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=False, + ), + sa.Column( + "kind", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "desired_state", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "state", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("creation_time", sa.TIMESTAMP(), nullable=True), + sa.Column( + "cron_trigger_str", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "last_run_uri", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("struct", sa.BLOB(), nullable=True), + sa.Column("concurrency_limit", sa.Integer(), nullable=False), + sa.PrimaryKeyConstraint("id", name="_schedules_v2_pk"), + sa.UniqueConstraint("project", "name", name="_schedules_v2_uc"), + ) + op.create_table( + "users", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.PrimaryKeyConstraint("id", name="_users_pk"), + sa.UniqueConstraint("name", name="_users_uc"), + ) + op.create_table( + "artifacts_labels", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("parent", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["parent"], ["artifacts.id"], name="_artifacts_labels_paren_fk" + ), + sa.PrimaryKeyConstraint("id", name="_artifacts_labels_pk"), + sa.UniqueConstraint("name", "parent", name="_artifacts_labels_uc"), + ) + op.create_table( + "artifacts_tags", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "project", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("obj_id", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["obj_id"], ["artifacts.id"], name="_artifacts_tags_obj_id_fk" + ), + sa.PrimaryKeyConstraint("id", name="_artifacts_tags_pk"), + sa.UniqueConstraint("project", "name", "obj_id", name="_artifacts_tags_uc"), + ) + op.create_table( + "entities", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("feature_set_id", sa.Integer(), nullable=True), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value_type", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.ForeignKeyConstraint( + ["feature_set_id"], ["feature_sets.id"], name="_entities_feature_set_id_fk" + ), + sa.PrimaryKeyConstraint("id", name="_entities_pk"), + ) + op.create_table( + "feature_sets_labels", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("parent", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["parent"], ["feature_sets.id"], name="_feature_sets_labels_parent_fk" + ), + sa.PrimaryKeyConstraint("id", name="_feature_sets_labels_pk"), + sa.UniqueConstraint("name", "parent", name="_feature_sets_labels_uc"), + ) + op.create_table( + "feature_sets_tags", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "project", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("obj_id", sa.Integer(), nullable=True), + sa.Column( + "obj_name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.ForeignKeyConstraint( + ["obj_id"], ["feature_sets.id"], name="_feature_sets_tags_obj_id_fk" + ), + sa.ForeignKeyConstraint( + ["obj_name"], ["feature_sets.name"], name="_feature_sets_tags_obj_name_fk" + ), + sa.PrimaryKeyConstraint("id", name="_feature_sets_tags_pk"), + sa.UniqueConstraint( + "project", "name", "obj_name", name="_feature_sets_tags_uc" + ), + ) + op.create_table( + "feature_vectors_labels", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("parent", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["parent"], ["feature_vectors.id"], name="_feature_vectors_labels_parent_fk" + ), + sa.PrimaryKeyConstraint("id", name="_feature_vectors_labels_pk"), + sa.UniqueConstraint("name", "parent", name="_feature_vectors_labels_uc"), + ) + op.create_table( + "feature_vectors_tags", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "project", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("obj_id", sa.Integer(), nullable=True), + sa.Column( + "obj_name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.ForeignKeyConstraint( + ["obj_id"], ["feature_vectors.id"], name="_feature_vectors_tags_obj_id_fk" + ), + sa.ForeignKeyConstraint( + ["obj_name"], + ["feature_vectors.name"], + name="_feature_vectors_tags_obj_name_fk", + ), + sa.PrimaryKeyConstraint("id", name="_feature_vectors_tags_pk"), + sa.UniqueConstraint( + "project", "name", "obj_name", name="_feature_vectors_tags_uc" + ), + ) + op.create_table( + "features", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("feature_set_id", sa.Integer(), nullable=True), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value_type", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.ForeignKeyConstraint( + ["feature_set_id"], ["feature_sets.id"], name="_features_feature_set_id_fk" + ), + sa.PrimaryKeyConstraint("id", name="_features_pk"), + ) + op.create_table( + "functions_labels", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("parent", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["parent"], ["functions.id"], name="_functions_labels_parent_fk" + ), + sa.PrimaryKeyConstraint("id", name="_functions_labels_pk"), + sa.UniqueConstraint("name", "parent", name="_functions_labels_uc"), + ) + op.create_table( + "functions_tags", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "project", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("obj_id", sa.Integer(), nullable=True), + sa.Column( + "obj_name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.ForeignKeyConstraint( + ["obj_id"], ["functions.id"], name="_functions_tags_obj_id_fk" + ), + sa.ForeignKeyConstraint( + ["obj_name"], ["functions.name"], name="_functions_tags_obj_name_fk" + ), + sa.PrimaryKeyConstraint("id", name="_functions_tags_pk"), + sa.UniqueConstraint("project", "name", "obj_name", name="_functions_tags_uc"), + ) + op.create_table( + "project_users", + sa.Column("project_id", sa.Integer(), nullable=True), + sa.Column("user_id", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["project_id"], ["projects.id"], name="_project_users_project_id_fk" + ), + sa.ForeignKeyConstraint( + ["user_id"], ["users.id"], name="_project_users_user_id_fk" + ), + ) + op.create_table( + "projects_labels", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("parent", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["parent"], ["projects.id"], name="_projects_labels_parent_fk" + ), + sa.PrimaryKeyConstraint("id", name="_projects_labels_pk"), + sa.UniqueConstraint("name", "parent", name="_projects_labels_uc"), + ) + op.create_table( + "runs_labels", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("parent", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(["parent"], ["runs.id"], name="_runs_labels_parent_fk"), + sa.PrimaryKeyConstraint("id", name="_runs_labels_pk"), + sa.UniqueConstraint("name", "parent", name="_runs_labels_uc"), + ) + op.create_table( + "runs_tags", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "project", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("obj_id", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(["obj_id"], ["runs.id"], name="_runs_tags_obj_id_fk"), + sa.PrimaryKeyConstraint("id", name="_runs_tags_pk"), + sa.UniqueConstraint("project", "name", "obj_id", name="_runs_tags_uc"), + ) + op.create_table( + "schedules_v2_labels", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("parent", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["parent"], ["schedules_v2.id"], name="_schedules_v2_labels_parent_fk" + ), + sa.PrimaryKeyConstraint("id", name="_schedules_v2_labels_pk"), + sa.UniqueConstraint("name", "parent", name="_schedules_v2_labels_uc"), + ) + op.create_table( + "entities_labels", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("parent", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["parent"], ["entities.id"], name="_entities_labels_parent_fk" + ), + sa.PrimaryKeyConstraint("id", name="_entities_labels_pk"), + sa.UniqueConstraint("name", "parent", name="_entities_labels_uc"), + ) + op.create_table( + "features_labels", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "name", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(length=255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column("parent", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["parent"], ["features.id"], name="_features_labels_parent_fk" + ), + sa.PrimaryKeyConstraint("id", name="_features_labels_pk"), + sa.UniqueConstraint("name", "parent", name="_features_labels_uc"), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("features_labels") + op.drop_table("entities_labels") + op.drop_table("schedules_v2_labels") + op.drop_table("runs_tags") + op.drop_table("runs_labels") + op.drop_table("projects_labels") + op.drop_table("project_users") + op.drop_table("functions_tags") + op.drop_table("functions_labels") + op.drop_table("features") + op.drop_table("feature_vectors_tags") + op.drop_table("feature_vectors_labels") + op.drop_table("feature_sets_tags") + op.drop_table("feature_sets_labels") + op.drop_table("entities") + op.drop_table("artifacts_tags") + op.drop_table("artifacts_labels") + op.drop_table("users") + op.drop_table("schedules_v2") + op.drop_table("runs") + op.drop_table("projects") + op.drop_table("marketplace_sources") + op.drop_table("logs") + op.drop_table("functions") + op.drop_table("feature_vectors") + op.drop_table("feature_sets") + op.drop_table("artifacts") + # ### end Alembic commands ### diff --git a/mlrun/api/migrations_sqlite/env.py b/mlrun/api/migrations_sqlite/env.py new file mode 100644 index 00000000000..fdc14548104 --- /dev/null +++ b/mlrun/api/migrations_sqlite/env.py @@ -0,0 +1,82 @@ +from logging.config import fileConfig + +from alembic import context +from sqlalchemy import engine_from_config, pool + +from mlrun import mlconf +from mlrun.api.db.sqldb import models + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name, disable_existing_loggers=False) + +# add your model's MetaData object here +# for 'autogenerate' support +target_metadata = models.Base.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + +# this will overwrite the ini-file sqlalchemy.url path +# with the path given in the mlconf +config.set_main_option("sqlalchemy.url", mlconf.httpdb.dsn) + + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + render_as_batch=True, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online(): + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = context.config.attributes.get("connection", None) + + if connectable is None: + connectable = engine_from_config( + config.get_section(config.config_ini_section), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/mlrun/api/migrations_sqlite/script.py.mako b/mlrun/api/migrations_sqlite/script.py.mako new file mode 100644 index 00000000000..2c0156303a8 --- /dev/null +++ b/mlrun/api/migrations_sqlite/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} diff --git a/mlrun/api/migrations/tests/test_migrations.py b/mlrun/api/migrations_sqlite/tests/test_migrations.py similarity index 100% rename from mlrun/api/migrations/tests/test_migrations.py rename to mlrun/api/migrations_sqlite/tests/test_migrations.py diff --git a/mlrun/api/migrations/versions/11f8dd2dc9fe_init.py b/mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py similarity index 50% rename from mlrun/api/migrations/versions/11f8dd2dc9fe_init.py rename to mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py index 2022f13ff90..9017b0f8496 100644 --- a/mlrun/api/migrations/versions/11f8dd2dc9fe_init.py +++ b/mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py @@ -8,6 +8,8 @@ import sqlalchemy as sa from alembic import op +from mlrun.api.utils.db.sql_collation import SQLCollationUtil + # revision identifiers, used by Alembic. revision = "11f8dd2dc9fe" down_revision = None @@ -20,9 +22,17 @@ def upgrade(): op.create_table( "artifacts", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("key", sa.String(), nullable=True), - sa.Column("project", sa.String(), nullable=True), - sa.Column("uid", sa.String(), nullable=True), + sa.Column( + "key", sa.String(255, collation=SQLCollationUtil.collation()), nullable=True + ), + sa.Column( + "project", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "uid", sa.String(255, collation=SQLCollationUtil.collation()), nullable=True + ), sa.Column("updated", sa.TIMESTAMP(), nullable=True), sa.Column("body", sa.BLOB(), nullable=True), sa.PrimaryKeyConstraint("id"), @@ -31,9 +41,19 @@ def upgrade(): op.create_table( "functions", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), - sa.Column("project", sa.String(), nullable=True), - sa.Column("uid", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "project", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "uid", sa.String(255, collation=SQLCollationUtil.collation()), nullable=True + ), sa.Column("body", sa.BLOB(), nullable=True), sa.Column("updated", sa.TIMESTAMP(), nullable=True), sa.PrimaryKeyConstraint("id"), @@ -42,31 +62,67 @@ def upgrade(): op.create_table( "logs", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("uid", sa.String(), nullable=True), - sa.Column("project", sa.String(), nullable=True), + sa.Column( + "uid", sa.String(255, collation=SQLCollationUtil.collation()), nullable=True + ), + sa.Column( + "project", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("body", sa.BLOB(), nullable=True), sa.PrimaryKeyConstraint("id"), ) op.create_table( "projects", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), - sa.Column("description", sa.String(), nullable=True), - sa.Column("owner", sa.String(), nullable=True), - sa.Column("source", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "description", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "owner", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "source", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("spec", sa.BLOB(), nullable=True), sa.Column("created", sa.TIMESTAMP(), nullable=True), - sa.Column("state", sa.String(), nullable=True), + sa.Column( + "state", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.PrimaryKeyConstraint("id"), sa.UniqueConstraint("name", name="_projects_uc"), ) op.create_table( "runs", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("uid", sa.String(), nullable=True), - sa.Column("project", sa.String(), nullable=True), + sa.Column( + "uid", sa.String(255, collation=SQLCollationUtil.collation()), nullable=True + ), + sa.Column( + "project", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("iteration", sa.Integer(), nullable=True), - sa.Column("state", sa.String(), nullable=True), + sa.Column( + "state", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("body", sa.BLOB(), nullable=True), sa.Column("start_time", sa.TIMESTAMP(), nullable=True), sa.PrimaryKeyConstraint("id"), @@ -74,28 +130,64 @@ def upgrade(): ) op.create_table( "schedules_v2", - sa.Column("project", sa.String(), nullable=False), - sa.Column("name", sa.String(), nullable=False), - sa.Column("kind", sa.String(), nullable=True), - sa.Column("desired_state", sa.String(), nullable=True), - sa.Column("state", sa.String(), nullable=True), + sa.Column( + "project", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=False, + ), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=False, + ), + sa.Column( + "kind", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "desired_state", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "state", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("creation_time", sa.TIMESTAMP(), nullable=True), - sa.Column("cron_trigger_str", sa.String(), nullable=True), + sa.Column( + "cron_trigger_str", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("struct", sa.BLOB(), nullable=True), sa.PrimaryKeyConstraint("project", "name"), ) op.create_table( "users", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.PrimaryKeyConstraint("id"), sa.UniqueConstraint("name", name="_users_uc"), ) op.create_table( "artifacts_labels", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), - sa.Column("value", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("parent", sa.Integer(), nullable=True), sa.ForeignKeyConstraint(["parent"], ["artifacts.id"],), sa.PrimaryKeyConstraint("id"), @@ -104,8 +196,16 @@ def upgrade(): op.create_table( "artifacts_tags", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("project", sa.String(), nullable=True), - sa.Column("name", sa.String(), nullable=True), + sa.Column( + "project", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("obj_id", sa.Integer(), nullable=True), sa.ForeignKeyConstraint(["obj_id"], ["artifacts.id"],), sa.PrimaryKeyConstraint("id"), @@ -114,8 +214,16 @@ def upgrade(): op.create_table( "functions_labels", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), - sa.Column("value", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("parent", sa.Integer(), nullable=True), sa.ForeignKeyConstraint(["parent"], ["functions.id"],), sa.PrimaryKeyConstraint("id"), @@ -124,10 +232,22 @@ def upgrade(): op.create_table( "functions_tags", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("project", sa.String(), nullable=True), - sa.Column("name", sa.String(), nullable=True), + sa.Column( + "project", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("obj_id", sa.Integer(), nullable=True), - sa.Column("obj_name", sa.Integer(), nullable=True), + sa.Column( + "obj_name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.ForeignKeyConstraint(["obj_id"], ["functions.id"],), sa.ForeignKeyConstraint(["obj_name"], ["functions.name"],), sa.PrimaryKeyConstraint("id"), @@ -143,8 +263,16 @@ def upgrade(): op.create_table( "runs_labels", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), - sa.Column("value", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("parent", sa.Integer(), nullable=True), sa.ForeignKeyConstraint(["parent"], ["runs.id"],), sa.PrimaryKeyConstraint("id"), @@ -153,8 +281,16 @@ def upgrade(): op.create_table( "runs_tags", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("project", sa.String(), nullable=True), - sa.Column("name", sa.String(), nullable=True), + sa.Column( + "project", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("obj_id", sa.Integer(), nullable=True), sa.ForeignKeyConstraint(["obj_id"], ["runs.id"],), sa.PrimaryKeyConstraint("id"), diff --git a/mlrun/api/migrations/versions/1c954f8cb32d_schedule_last_run_uri.py b/mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py similarity index 64% rename from mlrun/api/migrations/versions/1c954f8cb32d_schedule_last_run_uri.py rename to mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py index 1ca62fd3f67..3c61f6c65bf 100644 --- a/mlrun/api/migrations/versions/1c954f8cb32d_schedule_last_run_uri.py +++ b/mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py @@ -8,6 +8,8 @@ import sqlalchemy as sa from alembic import op +from mlrun.api.utils.db.sql_collation import SQLCollationUtil + # revision identifiers, used by Alembic. revision = "1c954f8cb32d" down_revision = "f7b5a1a03629" @@ -17,7 +19,13 @@ def upgrade(): with op.batch_alter_table("schedules_v2") as batch_op: - batch_op.add_column(sa.Column("last_run_uri", sa.String(), nullable=True)) + batch_op.add_column( + sa.Column( + "last_run_uri", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ) + ) def downgrade(): diff --git a/mlrun/api/migrations/versions/2b6d23c715aa_adding_feature_sets.py b/mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py similarity index 56% rename from mlrun/api/migrations/versions/2b6d23c715aa_adding_feature_sets.py rename to mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py index f93b165d9be..eaf82ae6151 100644 --- a/mlrun/api/migrations/versions/2b6d23c715aa_adding_feature_sets.py +++ b/mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py @@ -8,6 +8,8 @@ import sqlalchemy as sa from alembic import op +from mlrun.api.utils.db.sql_collation import SQLCollationUtil + # revision identifiers, used by Alembic. revision = "2b6d23c715aa" down_revision = "b68e8e897a28" @@ -20,12 +22,26 @@ def upgrade(): op.create_table( "feature_sets", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), - sa.Column("project", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "project", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("created", sa.TIMESTAMP(), nullable=True), sa.Column("updated", sa.TIMESTAMP(), nullable=True), - sa.Column("state", sa.String(), nullable=True), - sa.Column("uid", sa.String(), nullable=True), + sa.Column( + "state", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "uid", sa.String(255, collation=SQLCollationUtil.collation()), nullable=True + ), sa.Column("status", sa.JSON(), nullable=True), sa.PrimaryKeyConstraint("id"), sa.UniqueConstraint("name", "project", "uid", name="_feature_set_uc"), @@ -34,16 +50,32 @@ def upgrade(): "entities", sa.Column("id", sa.Integer(), nullable=False), sa.Column("feature_set_id", sa.Integer(), nullable=True), - sa.Column("name", sa.String(), nullable=True), - sa.Column("value_type", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value_type", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.ForeignKeyConstraint(["feature_set_id"], ["feature_sets.id"],), sa.PrimaryKeyConstraint("id"), ) op.create_table( "feature_sets_labels", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), - sa.Column("value", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("parent", sa.Integer(), nullable=True), sa.ForeignKeyConstraint(["parent"], ["feature_sets.id"],), sa.PrimaryKeyConstraint("id"), @@ -52,10 +84,22 @@ def upgrade(): op.create_table( "feature_sets_tags", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("project", sa.String(), nullable=True), - sa.Column("name", sa.String(), nullable=True), + sa.Column( + "project", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("obj_id", sa.Integer(), nullable=True), - sa.Column("obj_name", sa.Integer(), nullable=True), + sa.Column( + "obj_name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.ForeignKeyConstraint(["obj_id"], ["feature_sets.id"],), sa.ForeignKeyConstraint(["obj_name"], ["feature_sets.name"],), sa.PrimaryKeyConstraint("id"), @@ -67,8 +111,16 @@ def upgrade(): "features", sa.Column("id", sa.Integer(), nullable=False), sa.Column("feature_set_id", sa.Integer(), nullable=True), - sa.Column("name", sa.String(), nullable=True), - sa.Column("value_type", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value_type", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.ForeignKeyConstraint(["feature_set_id"], ["feature_sets.id"],), sa.PrimaryKeyConstraint("id"), ) diff --git a/mlrun/api/migrations/versions/863114f0c659_refactoring_feature_set.py b/mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py similarity index 100% rename from mlrun/api/migrations/versions/863114f0c659_refactoring_feature_set.py rename to mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py diff --git a/mlrun/api/migrations/versions/b68e8e897a28_schedule_labels.py b/mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py similarity index 72% rename from mlrun/api/migrations/versions/b68e8e897a28_schedule_labels.py rename to mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py index f019c56e2d9..8110f1a8a31 100644 --- a/mlrun/api/migrations/versions/b68e8e897a28_schedule_labels.py +++ b/mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py @@ -8,6 +8,8 @@ import sqlalchemy as sa from alembic import op +from mlrun.api.utils.db.sql_collation import SQLCollationUtil + # revision identifiers, used by Alembic. revision = "b68e8e897a28" down_revision = "cf21882f938e" @@ -20,8 +22,16 @@ def upgrade(): op.create_table( "schedules_v2_labels", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), - sa.Column("value", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("parent", sa.Integer(), nullable=True), sa.ForeignKeyConstraint(["parent"], ["schedules_v2.id"],), sa.PrimaryKeyConstraint("id"), diff --git a/mlrun/api/migrations/versions/bcd0c1f9720c_adding_project_labels.py b/mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py similarity index 72% rename from mlrun/api/migrations/versions/bcd0c1f9720c_adding_project_labels.py rename to mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py index ba40a996175..a89d1ed405e 100644 --- a/mlrun/api/migrations/versions/bcd0c1f9720c_adding_project_labels.py +++ b/mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py @@ -8,6 +8,8 @@ import sqlalchemy as sa from alembic import op +from mlrun.api.utils.db.sql_collation import SQLCollationUtil + # revision identifiers, used by Alembic. revision = "bcd0c1f9720c" down_revision = "f4249b4ba6fa" @@ -20,8 +22,16 @@ def upgrade(): op.create_table( "projects_labels", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), - sa.Column("value", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("parent", sa.Integer(), nullable=True), sa.ForeignKeyConstraint(["parent"], ["projects.id"],), sa.PrimaryKeyConstraint("id"), diff --git a/mlrun/api/migrations/versions/cf21882f938e_schedule_id.py b/mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py similarity index 100% rename from mlrun/api/migrations/versions/cf21882f938e_schedule_id.py rename to mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py diff --git a/mlrun/api/migrations/versions/d781f58f607f_tag_object_name_string.py b/mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py similarity index 64% rename from mlrun/api/migrations/versions/d781f58f607f_tag_object_name_string.py rename to mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py index ad89d4e7c0c..e60ccd68bed 100644 --- a/mlrun/api/migrations/versions/d781f58f607f_tag_object_name_string.py +++ b/mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py @@ -8,6 +8,8 @@ import sqlalchemy as sa from alembic import op +from mlrun.api.utils.db.sql_collation import SQLCollationUtil + # revision identifiers, used by Alembic. revision = "d781f58f607f" down_revision = "deac06871ace" @@ -17,11 +19,20 @@ def upgrade(): with op.batch_alter_table("feature_sets_tags") as batch_op: - batch_op.alter_column(column_name="obj_name", type_=sa.String()) + batch_op.alter_column( + column_name="obj_name", + type_=sa.String(255, collation=SQLCollationUtil.collation()), + ) with op.batch_alter_table("feature_vectors_tags") as batch_op: - batch_op.alter_column(column_name="obj_name", type_=sa.String()) + batch_op.alter_column( + column_name="obj_name", + type_=sa.String(255, collation=SQLCollationUtil.collation()), + ) with op.batch_alter_table("functions_tags") as batch_op: - batch_op.alter_column(column_name="obj_name", type_=sa.String()) + batch_op.alter_column( + column_name="obj_name", + type_=sa.String(255, collation=SQLCollationUtil.collation()), + ) def downgrade(): diff --git a/mlrun/api/migrations/versions/deac06871ace_adding_marketplace_sources_table.py b/mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py similarity index 83% rename from mlrun/api/migrations/versions/deac06871ace_adding_marketplace_sources_table.py rename to mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py index 1b510b1bb14..ce66b294506 100644 --- a/mlrun/api/migrations/versions/deac06871ace_adding_marketplace_sources_table.py +++ b/mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py @@ -8,6 +8,8 @@ import sqlalchemy as sa from alembic import op +from mlrun.api.utils.db.sql_collation import SQLCollationUtil + # revision identifiers, used by Alembic. revision = "deac06871ace" down_revision = "e1dd5983c06b" @@ -20,7 +22,11 @@ def upgrade(): op.create_table( "marketplace_sources", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("index", sa.Integer(), nullable=True), sa.Column("created", sa.TIMESTAMP(), nullable=True), sa.Column("updated", sa.TIMESTAMP(), nullable=True), diff --git a/mlrun/api/migrations/versions/e1dd5983c06b_schedule_concurrency_limit.py b/mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py similarity index 100% rename from mlrun/api/migrations/versions/e1dd5983c06b_schedule_concurrency_limit.py rename to mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py diff --git a/mlrun/api/migrations/versions/f4249b4ba6fa_adding_feature_vectors.py b/mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py similarity index 58% rename from mlrun/api/migrations/versions/f4249b4ba6fa_adding_feature_vectors.py rename to mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py index c25e1daa71d..43bb57aa24d 100644 --- a/mlrun/api/migrations/versions/f4249b4ba6fa_adding_feature_vectors.py +++ b/mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py @@ -8,6 +8,8 @@ import sqlalchemy as sa from alembic import op +from mlrun.api.utils.db.sql_collation import SQLCollationUtil + # revision identifiers, used by Alembic. revision = "f4249b4ba6fa" down_revision = "863114f0c659" @@ -20,12 +22,26 @@ def upgrade(): op.create_table( "feature_vectors", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), - sa.Column("project", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "project", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("created", sa.TIMESTAMP(), nullable=True), sa.Column("updated", sa.TIMESTAMP(), nullable=True), - sa.Column("state", sa.String(), nullable=True), - sa.Column("uid", sa.String(), nullable=True), + sa.Column( + "state", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "uid", sa.String(255, collation=SQLCollationUtil.collation()), nullable=True + ), sa.Column("object", sa.JSON(), nullable=True), sa.PrimaryKeyConstraint("id"), sa.UniqueConstraint("name", "project", "uid", name="_feature_vectors_uc"), @@ -33,8 +49,16 @@ def upgrade(): op.create_table( "feature_vectors_labels", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), - sa.Column("value", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("parent", sa.Integer(), nullable=True), sa.ForeignKeyConstraint(["parent"], ["feature_vectors.id"],), sa.PrimaryKeyConstraint("id"), @@ -43,10 +67,22 @@ def upgrade(): op.create_table( "feature_vectors_tags", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("project", sa.String(), nullable=True), - sa.Column("name", sa.String(), nullable=True), + sa.Column( + "project", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("obj_id", sa.Integer(), nullable=True), - sa.Column("obj_name", sa.Integer(), nullable=True), + sa.Column( + "obj_name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.ForeignKeyConstraint(["obj_id"], ["feature_vectors.id"],), sa.ForeignKeyConstraint(["obj_name"], ["feature_vectors.name"],), sa.PrimaryKeyConstraint("id"), diff --git a/mlrun/api/migrations/versions/f7b5a1a03629_adding_feature_labels.py b/mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py similarity index 66% rename from mlrun/api/migrations/versions/f7b5a1a03629_adding_feature_labels.py rename to mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py index 3b64a68d9a5..3c5e5d89be8 100644 --- a/mlrun/api/migrations/versions/f7b5a1a03629_adding_feature_labels.py +++ b/mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py @@ -8,6 +8,8 @@ import sqlalchemy as sa from alembic import op +from mlrun.api.utils.db.sql_collation import SQLCollationUtil + # revision identifiers, used by Alembic. revision = "f7b5a1a03629" down_revision = "2b6d23c715aa" @@ -20,8 +22,16 @@ def upgrade(): op.create_table( "entities_labels", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), - sa.Column("value", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("parent", sa.Integer(), nullable=True), sa.ForeignKeyConstraint(["parent"], ["entities.id"],), sa.PrimaryKeyConstraint("id"), @@ -30,8 +40,16 @@ def upgrade(): op.create_table( "features_labels", sa.Column("id", sa.Integer(), nullable=False), - sa.Column("name", sa.String(), nullable=True), - sa.Column("value", sa.String(), nullable=True), + sa.Column( + "name", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), + sa.Column( + "value", + sa.String(255, collation=SQLCollationUtil.collation()), + nullable=True, + ), sa.Column("parent", sa.Integer(), nullable=True), sa.ForeignKeyConstraint(["parent"], ["features.id"],), sa.PrimaryKeyConstraint("id"), diff --git a/mlrun/api/utils/db/__init__.py b/mlrun/api/utils/db/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/mlrun/api/utils/alembic.py b/mlrun/api/utils/db/alembic.py similarity index 85% rename from mlrun/api/utils/alembic.py rename to mlrun/api/utils/db/alembic.py index 663e238b607..042013eefec 100644 --- a/mlrun/api/utils/alembic.py +++ b/mlrun/api/utils/db/alembic.py @@ -9,6 +9,8 @@ from mlrun import mlconf from mlrun.utils import logger +from .mysql import MySQLUtil + class AlembicUtil(object): def __init__(self, alembic_config_path: pathlib.Path): @@ -51,7 +53,10 @@ def _get_db_file_path() -> str: Get the db file path from the dsn. Converts the dsn to the file path. e.g.: sqlite:////mlrun/db/mlrun.db?check_same_thread=false -> /mlrun/db/mlrun.db + if mysql is used returns empty string """ + if "mysql" in mlconf.httpdb.dsn: + return "" return mlconf.httpdb.dsn.split("?")[0].split("sqlite:///")[-1] def _get_current_revision(self) -> typing.Optional[str]: @@ -89,11 +94,17 @@ def _get_revision_history_list(self) -> typing.List[str]: def _parse_revision_history(output: str) -> typing.List[str]: return [line.split(" ")[2].replace(",", "") for line in output.splitlines()] - @staticmethod - def _backup_revision(db_file_path: str, current_version: str): + def _backup_revision(self, db_file_path: str, current_version: str): if db_file_path == ":memory:": return + if "mysql" in mlconf.httpdb.dsn: + self._backup_revision_mysql(db_file_path, current_version) + else: + self._backup_revision_sqlite(db_file_path, current_version) + + @staticmethod + def _backup_revision_sqlite(db_file_path: str, current_version: str): db_dir_path = pathlib.Path(os.path.dirname(db_file_path)) backup_path = db_dir_path / f"{current_version}.db" @@ -102,6 +113,15 @@ def _backup_revision(db_file_path: str, current_version: str): ) shutil.copy2(db_file_path, backup_path) + @staticmethod + def _backup_revision_mysql(db_file_path: str, current_version: str): + db_dir_path = pathlib.Path(os.path.dirname(db_file_path)) + backup_path = db_dir_path / f"{current_version}.db" + + mysql_util = MySQLUtil() + mysql_util.dump_database_to_file(backup_path) + mysql_util.close() + @staticmethod def _downgrade_to_revision( db_file_path: str, current_revision: str, fallback_version: str diff --git a/mlrun/api/utils/db/mysql.py b/mlrun/api/utils/db/mysql.py new file mode 100644 index 00000000000..20861055332 --- /dev/null +++ b/mlrun/api/utils/db/mysql.py @@ -0,0 +1,83 @@ +import os +import pathlib +import re +import typing + +import pymysql + + +class MySQLUtil(object): + dsn_env_var = "MLRUN_HTTPDB__DSN" + dsn_regex = ( + r"mysql\+pymysql://(?P.+)@(?P.+):(?P\d+)/(?P.+)" + ) + check_tables = [ + "projects", + # check functions as well just in case the previous version used a projects leader + "functions", + ] + + def __init__(self): + mysql_dsn_data = self.get_mysql_dsn_data() + if not mysql_dsn_data: + raise RuntimeError(f"Invalid mysql dsn: {self.get_dsn()}") + + self._connection = pymysql.connect( + host=mysql_dsn_data["host"], + user=mysql_dsn_data["username"], + port=int(mysql_dsn_data["port"]), + database=mysql_dsn_data["database"], + ) + + def close(self): + self._connection.close() + + def check_db_has_data(self): + with self._connection.cursor() as cursor: + for check_table in self.check_tables: + cursor.execute(f"SELECT COUNT(*) FROM `{check_table}`;") + if cursor.fetchone()[0] > 0: + return True + return False + + def dump_database_to_file(self, filepath: pathlib.Path): + with self._connection.cursor() as cursor: + database_dump = self._get_database_dump(cursor) + + with open(str(filepath), "w") as f: + f.writelines(database_dump) + + @staticmethod + def get_dsn() -> str: + return os.environ.get(MySQLUtil.dsn_env_var, "") + + @staticmethod + def get_mysql_dsn_data() -> typing.Optional[dict]: + match = re.match(MySQLUtil.dsn_regex, MySQLUtil.get_dsn()) + if not match: + return None + + return match.groupdict() + + @staticmethod + def _get_database_dump(cursor) -> str: + cursor.execute("SHOW TABLES") + data = "" + table_names = [] + for table_name in cursor.fetchall(): + table_names.append(table_name[0]) + + for table_name in table_names: + data += f"DROP TABLE IF EXISTS `{table_name}`;" + + cursor.execute(f"SHOW CREATE TABLE `{table_name}`;") + table_definition = cursor.fetchone()[1] + data += f"\n{table_definition};\n\n" + + cursor.execute(f"SELECT * FROM `{table_name}`;") + for row in cursor.fetchall(): + values = ", ".join([f'"{field}"' for field in row]) + data += f"INSERT INTO `{table_name}` VALUES({values});\n" + data += "\n\n" + + return data diff --git a/mlrun/api/utils/db/sql_collation.py b/mlrun/api/utils/db/sql_collation.py new file mode 100644 index 00000000000..904839eb467 --- /dev/null +++ b/mlrun/api/utils/db/sql_collation.py @@ -0,0 +1,17 @@ +from .mysql import MySQLUtil + + +class SQLCollationUtil(object): + class Collations(object): + + # with sqlite we use the default collation + sqlite = None + mysql = "utf8_bin" + + @staticmethod + def collation(): + mysql_dsn_data = MySQLUtil.get_mysql_dsn_data() + if mysql_dsn_data: + return SQLCollationUtil.Collations.mysql + + return SQLCollationUtil.Collations.sqlite diff --git a/mlrun/api/utils/db/sqlite_migration.py b/mlrun/api/utils/db/sqlite_migration.py new file mode 100644 index 00000000000..a211728f78c --- /dev/null +++ b/mlrun/api/utils/db/sqlite_migration.py @@ -0,0 +1,66 @@ +import typing + +import sqlite3_to_mysql + +from mlrun import mlconf +from mlrun.utils import logger + +from .mysql import MySQLUtil + + +class SQLiteMigrationUtil(object): + def __init__(self): + self._mysql_dsn_data = MySQLUtil.get_mysql_dsn_data() + self._migrator = self._create_migrator() + self._mysql_util = None + if self._mysql_dsn_data: + self._mysql_util = MySQLUtil() + + def transfer(self): + + # if some data is missing, don't transfer the data + if not self._migrator: + return + + db_has_data = False + if self._mysql_util: + if self._mysql_util.check_db_has_data(): + db_has_data = True + self._mysql_util.close() + + # if mysqldb already has data, don't transfer the data + if db_has_data: + return + + self._migrator.transfer() + + def _create_migrator(self) -> typing.Optional[sqlite3_to_mysql.SQLite3toMySQL]: + sqlite_file = self._get_old_db_file_path() + if not sqlite_file or not self._mysql_dsn_data: + return None + + logger.info( + "Creating SQLite to MySQL Converter", + sqlite_file=sqlite_file, + mysql_dsn_data=self._mysql_dsn_data, + ) + + return sqlite3_to_mysql.SQLite3toMySQL( + sqlite_file=sqlite_file, + mysql_user=self._mysql_dsn_data["username"], + mysql_database=self._mysql_dsn_data["database"], + mysql_host=self._mysql_dsn_data["host"], + mysql_port=int(self._mysql_dsn_data["port"]), + quiet=True, + ) + + @staticmethod + def _get_old_db_file_path() -> str: + """ + Get the db file path from the old_dsn. + Converts the dsn to the file path. e.g.: + sqlite:////mlrun/db/mlrun.db?check_same_thread=false -> /mlrun/db/mlrun.db + """ + if not mlconf.httpdb.old_dsn: + return "" + return mlconf.httpdb.old_dsn.split("?")[0].split("sqlite:///")[-1] diff --git a/mlrun/config.py b/mlrun/config.py index 9741a01ed1d..7a89526d3b9 100644 --- a/mlrun/config.py +++ b/mlrun/config.py @@ -114,6 +114,7 @@ "port": 8080, "dirpath": expanduser("~/.mlrun/db"), "dsn": "sqlite:////mlrun/db/mlrun.db?check_same_thread=false", + "old_dsn": "", "debug": False, "user": "", "password": "", diff --git a/requirements.txt b/requirements.txt index 84122ce1047..d9f1f321e74 100644 --- a/requirements.txt +++ b/requirements.txt @@ -62,3 +62,4 @@ v3iofs~=0.1.7 cryptography~=3.0, <3.4 storey~=0.8.0; python_version >= '3.7' deepdiff~=5.0 +PyMySQL~=1.0 diff --git a/setup.py b/setup.py index 15286db89ea..25ad17137ca 100644 --- a/setup.py +++ b/setup.py @@ -123,8 +123,10 @@ def load_deps(path): "mlrun.api.db", "mlrun.api.db.sqldb", "mlrun.api.db.filedb", - "mlrun.api.migrations", - "mlrun.api.migrations.versions", + "mlrun.api.migrations_sqlite", + "mlrun.api.migrations_sqlite.versions", + "mlrun.api.migrations_mysql", + "mlrun.api.migrations_mysql.versions", "mlrun.api.schemas", "mlrun.api.utils", "mlrun.api.utils.auth", diff --git a/tests/api/utils/test_alembic_util.py b/tests/api/utils/test_alembic_util.py index c036f3391d3..72239911c1a 100644 --- a/tests/api/utils/test_alembic_util.py +++ b/tests/api/utils/test_alembic_util.py @@ -8,7 +8,7 @@ import alembic.config import pytest -import mlrun.api.utils.alembic +import mlrun.api.utils.db.alembic from mlrun import mlconf @@ -24,7 +24,7 @@ def test_no_database_exists( mock_alembic, mock_database, mock_shutil_copy, from_scratch ): mock_database(db_file_exists=False) - alembic_util = mlrun.api.utils.alembic.AlembicUtil(pathlib.Path("")) + alembic_util = mlrun.api.utils.db.alembic.AlembicUtil(pathlib.Path("")) alembic_util.init_alembic(from_scratch=from_scratch) assert mock_alembic.stamp_calls == [] assert mock_alembic.upgrade_calls == ["head"] @@ -36,7 +36,7 @@ def test_database_exists_no_revision( mock_alembic, mock_database, mock_shutil_copy, from_scratch ): mock_database() - alembic_util = mlrun.api.utils.alembic.AlembicUtil(pathlib.Path("")) + alembic_util = mlrun.api.utils.db.alembic.AlembicUtil(pathlib.Path("")) alembic_util.init_alembic(from_scratch=from_scratch) # from scratch should skip stamp even if no revision exists @@ -51,7 +51,7 @@ def test_database_exists_known_revision( mock_alembic, mock_database, mock_shutil_copy, mock_db_file_name, from_scratch ): mock_database(current_revision=Constants.initial_revision) - alembic_util = mlrun.api.utils.alembic.AlembicUtil(pathlib.Path("")) + alembic_util = mlrun.api.utils.db.alembic.AlembicUtil(pathlib.Path("")) alembic_util.init_alembic(from_scratch=from_scratch) assert mock_alembic.stamp_calls == [] assert mock_alembic.upgrade_calls == ["head"] @@ -65,7 +65,7 @@ def test_database_exists_unknown_revision_successful_downgrade( mock_alembic, mock_database, mock_shutil_copy, mock_db_file_name, from_scratch ): mock_database(current_revision=Constants.unknown_revision) - alembic_util = mlrun.api.utils.alembic.AlembicUtil(pathlib.Path("")) + alembic_util = mlrun.api.utils.db.alembic.AlembicUtil(pathlib.Path("")) alembic_util.init_alembic(from_scratch=from_scratch) assert mock_alembic.stamp_calls == [] assert mock_alembic.upgrade_calls == ["head"] @@ -95,7 +95,7 @@ def test_database_exists_unknown_revision_failed_downgrade( mock_database( current_revision=Constants.unknown_revision, db_backup_exists=False, ) - alembic_util = mlrun.api.utils.alembic.AlembicUtil(pathlib.Path("")) + alembic_util = mlrun.api.utils.db.alembic.AlembicUtil(pathlib.Path("")) with pytest.raises( RuntimeError, match=f"Cannot fall back to revision {Constants.latest_revision}, "