Skip to content

Commit

Permalink
[DB] Support using MySQL (#1200)
Browse files Browse the repository at this point in the history
  • Loading branch information
quaark committed Sep 30, 2021
1 parent 03225a7 commit a9eb423
Show file tree
Hide file tree
Showing 34 changed files with 1,502 additions and 135 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Expand Up @@ -22,4 +22,4 @@ result*.html
tests/test_results/
venv
mlrun/utils/version/version.json
mlrun/api/migrations/mlrun.db
mlrun/api/migrations_sqlite/mlrun.db
35 changes: 29 additions & 6 deletions Makefile
Expand Up @@ -69,14 +69,37 @@ install-requirements: ## Install all requirements needed for development
-r dockerfiles/mlrun-api/requirements.txt \
-r docs/requirements.txt

.PHONY: create-migration
create-migration: export MLRUN_HTTPDB__DSN="sqlite:///$(PWD)/mlrun/api/migrations/mlrun.db?check_same_thread=false"
create-migration: ## Create a DB migration (MLRUN_MIGRATION_MESSAGE must be set)
.PHONY: create-migration-sqlite
create-migration-sqlite: export MLRUN_HTTPDB__DSN="sqlite:///$(PWD)/mlrun/api/migrations/mlrun.db?check_same_thread=false"
create-migration-sqlite: ## Create a DB migration (MLRUN_MIGRATION_MESSAGE must be set)
ifndef MLRUN_MIGRATION_MESSAGE
$(error MLRUN_MIGRATION_MESSAGE is undefined)
endif
alembic -c ./mlrun/api/alembic.ini upgrade head
alembic -c ./mlrun/api/alembic.ini revision --autogenerate -m "$(MLRUN_MIGRATION_MESSAGE)"
alembic -c ./mlrun/api/alembic_sqlite.ini upgrade head
alembic -c ./mlrun/api/alembic_sqlite.ini revision --autogenerate -m "$(MLRUN_MIGRATION_MESSAGE)"

.PHONY: create-migration-mysql
create-migration-mysql: export MLRUN_HTTPDB__DSN="mysql+pymysql://root:pass@localhost:3306/mlrun"
create-migration-mysql: ## Create a DB migration (MLRUN_MIGRATION_MESSAGE must be set)
ifndef MLRUN_MIGRATION_MESSAGE
$(error MLRUN_MIGRATION_MESSAGE is undefined)
endif
docker run \
--name=migration-db \
--rm \
-v $(pwd):/mlrun \
-p 3306:3306 \
-e MYSQL_ROOT_PASSWORD="pass" \
-e MYSQL_ROOT_HOST=% \
-e MYSQL_DATABASE="mlrun" \
-d \
mysql/mysql-server:5.7 \
--character-set-server=utf8 \
--collation-server=utf8_bin
alembic -c ./mlrun/api/alembic_mysql.ini upgrade head
alembic -c ./mlrun/api/alembic_mysql.ini revision --autogenerate -m "$(MLRUN_MIGRATION_MESSAGE)"
docker kill migration-db
docker rm migration-db

.PHONY: bump-version
bump-version: ## Bump version in all needed places in code
Expand Down Expand Up @@ -489,7 +512,7 @@ test-migrations: clean ## Run mlrun db migrations tests
--durations=100 \
-rf \
--test-alembic \
migrations/tests/*
migrations_sqlite/tests/*

.PHONY: test-system-dockerized
test-system-dockerized: build-test-system ## Run mlrun system tests in docker container
Expand Down
1 change: 1 addition & 0 deletions dockerfiles/mlrun-api/requirements.txt
Expand Up @@ -3,3 +3,4 @@ dask-kubernetes~=0.11.0
# 3.0 iguazio system is running k8s 1.17 so ideally we would use 17.X, but kfp limiting to <12.0
kubernetes-asyncio~=11.0
apscheduler~=3.6
sqlite3-to-mysql~=1.4
2 changes: 1 addition & 1 deletion mlrun/api/alembic.ini
Expand Up @@ -2,7 +2,7 @@

[alembic]
# path to migration scripts
script_location = %(here)s/migrations
script_location = %(here)s/migrations_sqlite

# template used to generate migration files
# file_template = %%(rev)s_%%(slug)s
Expand Down
86 changes: 86 additions & 0 deletions mlrun/api/alembic_mysql.ini
@@ -0,0 +1,86 @@
# A generic, single database configuration.

[alembic]
# path to migration scripts
script_location = %(here)s/migrations_mysql

# template used to generate migration files
# file_template = %%(rev)s_%%(slug)s

# timezone to use when rendering the date
# within the migration file as well as the filename.
# string value is passed to dateutil.tz.gettz()
# leave blank for localtime
# timezone =

# max length of characters to apply to the
# "slug" field
# truncate_slug_length = 40

# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false

# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false

# version location specification; this defaults
# to alembic/versions. When using multiple version
# directories, initial revisions must be specified with --version-path
# version_locations = %(here)s/bar %(here)s/bat alembic/versions

# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8

# this is overridden anyway in the alembic env.py by what ever is configured in mlrun.mlconf
sqlalchemy.url = sqlite:///test.db


[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts. See the documentation for further
# detail and examples

# format using "black" - use the console_scripts runner, against the "black" entrypoint
# hooks=black
# black.type=console_scripts
# black.entrypoint=black
# black.options=-l 79

# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = WARN
handlers = console
qualname =

[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine

[logger_alembic]
level = INFO
handlers =
qualname = alembic

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic

[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
96 changes: 52 additions & 44 deletions mlrun/api/db/sqldb/models.py
Expand Up @@ -33,6 +33,7 @@
from sqlalchemy.orm import class_mapper, relationship

from mlrun.api import schemas
from mlrun.api.utils.db.sql_collation import SQLCollationUtil

Base = declarative_base()
NULL = None # Avoid flake8 issuing warnings when comparing in filter
Expand Down Expand Up @@ -81,8 +82,8 @@ class Label(Base, BaseModel):
)

id = Column(Integer, primary_key=True)
name = Column(String)
value = Column(String)
name = Column(String(255, collation=SQLCollationUtil.collation()))
value = Column(String(255, collation=SQLCollationUtil.collation()))
parent = Column(Integer, ForeignKey(f"{table}.id"))

return Label
Expand All @@ -96,8 +97,8 @@ class Tag(Base, BaseModel):
)

id = Column(Integer, primary_key=True)
project = Column(String)
name = Column(String)
project = Column(String(255, collation=SQLCollationUtil.collation()))
name = Column(String(255, collation=SQLCollationUtil.collation()))
obj_id = Column(Integer, ForeignKey(f"{table}.id"))

return Tag
Expand All @@ -113,10 +114,13 @@ class Tag(Base, BaseModel):
)

id = Column(Integer, primary_key=True)
project = Column(String)
name = Column(String)
project = Column(String(255, collation=SQLCollationUtil.collation()))
name = Column(String(255, collation=SQLCollationUtil.collation()))
obj_id = Column(Integer, ForeignKey(f"{table}.id"))
obj_name = Column(String, ForeignKey(f"{table}.name"))
obj_name = Column(
String(255, collation=SQLCollationUtil.collation()),
ForeignKey(f"{table}.name"),
)

return Tag

Expand All @@ -135,9 +139,9 @@ class Artifact(Base, HasStruct):
Tag = make_tag(__tablename__)

id = Column(Integer, primary_key=True)
key = Column(String)
project = Column(String)
uid = Column(String)
key = Column(String(255, collation=SQLCollationUtil.collation()))
project = Column(String(255, collation=SQLCollationUtil.collation()))
uid = Column(String(255, collation=SQLCollationUtil.collation()))
updated = Column(TIMESTAMP)
# TODO: change to JSON, see mlrun/api/schemas/function.py::FunctionState for reasoning
body = Column(BLOB)
Expand All @@ -156,9 +160,9 @@ class Function(Base, HasStruct):
Tag = make_tag_v2(__tablename__)

id = Column(Integer, primary_key=True)
name = Column(String)
project = Column(String)
uid = Column(String)
name = Column(String(255, collation=SQLCollationUtil.collation()))
project = Column(String(255, collation=SQLCollationUtil.collation()))
uid = Column(String(255, collation=SQLCollationUtil.collation()))
# TODO: change to JSON, see mlrun/api/schemas/function.py::FunctionState for reasoning
body = Column(BLOB)
updated = Column(TIMESTAMP)
Expand All @@ -171,8 +175,8 @@ class Log(Base, BaseModel):
__tablename__ = "logs"

id = Column(Integer, primary_key=True)
uid = Column(String)
project = Column(String)
uid = Column(String(255, collation=SQLCollationUtil.collation()))
project = Column(String(255, collation=SQLCollationUtil.collation()))
# TODO: change to JSON, see mlrun/api/schemas/function.py::FunctionState for reasoning
body = Column(BLOB)

Expand All @@ -189,10 +193,10 @@ class Run(Base, HasStruct):
Tag = make_tag(__tablename__)

id = Column(Integer, primary_key=True)
uid = Column(String)
project = Column(String)
uid = Column(String(255, collation=SQLCollationUtil.collation()))
project = Column(String(255, collation=SQLCollationUtil.collation()))
iteration = Column(Integer)
state = Column(String)
state = Column(String(255, collation=SQLCollationUtil.collation()))
# TODO: change to JSON, see mlrun/api/schemas/function.py::FunctionState for reasoning
body = Column(BLOB)
start_time = Column(TIMESTAMP)
Expand All @@ -208,14 +212,18 @@ class Schedule(Base, BaseModel):
Label = make_label(__tablename__)

id = Column(Integer, primary_key=True)
project = Column(String, nullable=False)
name = Column(String, nullable=False)
kind = Column(String)
desired_state = Column(String)
state = Column(String)
project = Column(
String(255, collation=SQLCollationUtil.collation()), nullable=False
)
name = Column(
String(255, collation=SQLCollationUtil.collation()), nullable=False
)
kind = Column(String(255, collation=SQLCollationUtil.collation()))
desired_state = Column(String(255, collation=SQLCollationUtil.collation()))
state = Column(String(255, collation=SQLCollationUtil.collation()))
creation_time = Column(TIMESTAMP)
cron_trigger_str = Column(String)
last_run_uri = Column(String)
cron_trigger_str = Column(String(255, collation=SQLCollationUtil.collation()))
last_run_uri = Column(String(255, collation=SQLCollationUtil.collation()))
# TODO: change to JSON, see mlrun/api/schemas/function.py::FunctionState for reasoning
struct = Column(BLOB)
labels = relationship(Label, cascade="all, delete-orphan")
Expand Down Expand Up @@ -253,24 +261,24 @@ class User(Base, BaseModel):
__table_args__ = (UniqueConstraint("name", name="_users_uc"),)

id = Column(Integer, primary_key=True)
name = Column(String)
name = Column(String(255, collation=SQLCollationUtil.collation()))

class Project(Base, BaseModel):
__tablename__ = "projects"
# For now since we use project name a lot
__table_args__ = (UniqueConstraint("name", name="_projects_uc"),)

id = Column(Integer, primary_key=True)
name = Column(String)
description = Column(String)
owner = Column(String)
source = Column(String)
name = Column(String(255, collation=SQLCollationUtil.collation()))
description = Column(String(255, collation=SQLCollationUtil.collation()))
owner = Column(String(255, collation=SQLCollationUtil.collation()))
source = Column(String(255, collation=SQLCollationUtil.collation()))
# the attribute name used to be _spec which is just a wrong naming, the attribute was renamed to _full_object
# leaving the column as is to prevent redundant migration
# TODO: change to JSON, see mlrun/api/schemas/function.py::FunctionState for reasoning
_full_object = Column("spec", BLOB)
created = Column(TIMESTAMP, default=datetime.utcnow)
state = Column(String)
state = Column(String(255, collation=SQLCollationUtil.collation()))
users = relationship(User, secondary=project_users)

Label = make_label(__tablename__)
Expand All @@ -294,8 +302,8 @@ class Feature(Base, BaseModel):
id = Column(Integer, primary_key=True)
feature_set_id = Column(Integer, ForeignKey("feature_sets.id"))

name = Column(String)
value_type = Column(String)
name = Column(String(255, collation=SQLCollationUtil.collation()))
value_type = Column(String(255, collation=SQLCollationUtil.collation()))

Label = make_label(__tablename__)
labels = relationship(Label, cascade="all, delete-orphan")
Expand All @@ -308,8 +316,8 @@ class Entity(Base, BaseModel):
id = Column(Integer, primary_key=True)
feature_set_id = Column(Integer, ForeignKey("feature_sets.id"))

name = Column(String)
value_type = Column(String)
name = Column(String(255, collation=SQLCollationUtil.collation()))
value_type = Column(String(255, collation=SQLCollationUtil.collation()))

Label = make_label(__tablename__)
labels = relationship(Label, cascade="all, delete-orphan")
Expand All @@ -324,12 +332,12 @@ class FeatureSet(Base, BaseModel):
)

id = Column(Integer, primary_key=True)
name = Column(String)
project = Column(String)
name = Column(String(255, collation=SQLCollationUtil.collation()))
project = Column(String(255, collation=SQLCollationUtil.collation()))
created = Column(TIMESTAMP, default=datetime.now(timezone.utc))
updated = Column(TIMESTAMP, default=datetime.now(timezone.utc))
state = Column(String)
uid = Column(String)
state = Column(String(255, collation=SQLCollationUtil.collation()))
uid = Column(String(255, collation=SQLCollationUtil.collation()))

_full_object = Column("object", JSON)

Expand Down Expand Up @@ -360,12 +368,12 @@ class FeatureVector(Base, BaseModel):
)

id = Column(Integer, primary_key=True)
name = Column(String)
project = Column(String)
name = Column(String(255, collation=SQLCollationUtil.collation()))
project = Column(String(255, collation=SQLCollationUtil.collation()))
created = Column(TIMESTAMP, default=datetime.now(timezone.utc))
updated = Column(TIMESTAMP, default=datetime.now(timezone.utc))
state = Column(String)
uid = Column(String)
state = Column(String(255, collation=SQLCollationUtil.collation()))
uid = Column(String(255, collation=SQLCollationUtil.collation()))

_full_object = Column("object", JSON)

Expand All @@ -391,7 +399,7 @@ class MarketplaceSource(Base, BaseModel):
__table_args__ = (UniqueConstraint("name", name="_marketplace_sources_uc"),)

id = Column(Integer, primary_key=True)
name = Column(String)
name = Column(String(255, collation=SQLCollationUtil.collation()))
index = Column(Integer)
created = Column(TIMESTAMP, default=datetime.now(timezone.utc))
updated = Column(TIMESTAMP, default=datetime.now(timezone.utc))
Expand Down

0 comments on commit a9eb423

Please sign in to comment.