Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature store] Feature set api fixes #549

Merged
merged 14 commits into from
Nov 25, 2020
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
31 changes: 29 additions & 2 deletions mlrun/api/db/sqldb/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -945,9 +945,14 @@ def _update_existing_feature_set(
self._update_feature_set_features(feature_set, features)
self._update_feature_set_entities(feature_set, entities)

labels = new_feature_set_dict["metadata"].pop("labels", {})
labels = new_feature_set_dict["metadata"].pop("labels", {}) or {}
update_labels(feature_set, labels)

@staticmethod
def _validate_feature_set_kind(kind):
if kind != "FeatureSet":
raise ValueError(f"invalid kind for a feature-set object: {kind}")

def store_feature_set(
self,
session,
Expand All @@ -962,10 +967,19 @@ def store_feature_set(
if not tag and not uid:
raise ValueError("cannot store feature set without reference (tag or uid)")

feature_set_project = feature_set.metadata.project
if feature_set_project and feature_set_project != project:
raise mlrun.errors.MLRunInvalidArgumentError(
f"feature-set object with conflicting project name - {feature_set_project}"
)
Hedingber marked this conversation as resolved.
Show resolved Hide resolved

feature_set.metadata.project = project

if feature_set.metadata.name != name:
raise mlrun.errors.MLRunInvalidArgumentError(
"Changing name for an existing feature-set"
)
self._validate_feature_set_kind(feature_set.kind)

original_uid = uid

Expand Down Expand Up @@ -1012,7 +1026,18 @@ def create_feature_set(
):
name = feature_set.metadata.name
if not name or not project:
raise ValueError("feature-set missing name or project")
raise mlrun.errors.MLRunInvalidArgumentError(
"feature-set missing name or project"
)
self._validate_feature_set_kind(feature_set.kind)

feature_set_project = feature_set.metadata.project
if feature_set_project and feature_set_project != project:
raise mlrun.errors.MLRunInvalidArgumentError(
f"feature-set object with conflicting project name - {feature_set_project}"
)

feature_set.metadata.project = project

self._ensure_project(session, project)
tag = feature_set.metadata.tag or "latest"
Expand Down Expand Up @@ -1067,6 +1092,8 @@ def patch_feature_set(
f"Feature-set not found {feature_set_uri}"
)

self._validate_feature_set_kind(feature_set_update.get("kind", "FeatureSet"))
Hedingber marked this conversation as resolved.
Show resolved Hide resolved

feature_set_struct = feature_set_record.dict()
# using mergedeep for merging the patch content into the existing dictionary
strategy = mergedeep.Strategy.REPLACE
Expand Down
1 change: 1 addition & 0 deletions mlrun/api/schemas/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class Config:


class FeatureSet(BaseModel):
kind: str = "FeatureSet"
Hedingber marked this conversation as resolved.
Show resolved Hide resolved
metadata: ObjectMetadata
spec: FeatureSetSpec
status: FeatureSetStatus
Expand Down
1 change: 1 addition & 0 deletions mlrun/api/schemas/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

class ObjectMetadata(BaseModel):
name: str
project: Optional[str]
tag: Optional[str]
labels: Optional[dict]
updated: Optional[datetime]
Expand Down
8 changes: 4 additions & 4 deletions mlrun/db/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,13 @@ def list_artifact_tags(self, project):
@abstractmethod
def create_feature_set(
self, feature_set: Union[dict, schemas.FeatureSet], project="", versioned=True
) -> schemas.FeatureSet:
) -> dict:
pass

@abstractmethod
def get_feature_set(
self, name: str, project: str = "", tag: str = None, uid: str = None
) -> schemas.FeatureSet:
) -> dict:
pass

@abstractmethod
Expand All @@ -159,14 +159,14 @@ def list_feature_sets(
entities: List[str] = None,
features: List[str] = None,
labels: List[str] = None,
) -> schemas.FeatureSetsOutput:
) -> List[dict]:
pass

@abstractmethod
def store_feature_set(
self,
name,
feature_set: Union[dict, schemas.FeatureSet],
name=None,
project="",
tag=None,
uid=None,
Expand Down
2 changes: 1 addition & 1 deletion mlrun/db/filedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ def list_feature_sets(
raise NotImplementedError()

def store_feature_set(
self, name, feature_set, project="", tag=None, uid=None, versioned=True
self, feature_set, name=None, project="", tag=None, uid=None, versioned=True
):
raise NotImplementedError()

Expand Down
27 changes: 15 additions & 12 deletions mlrun/db/httpdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,24 +691,26 @@ def get_pipeline(self, run_id: str, namespace: str = None, timeout: int = 10):

def create_feature_set(
self, feature_set: Union[dict, schemas.FeatureSet], project="", versioned=True
) -> schemas.FeatureSet:
project = project or default_project
path = f"projects/{project}/feature-sets"
params = {"versioned": versioned}

) -> dict:
if isinstance(feature_set, schemas.FeatureSet):
feature_set = feature_set.dict()

project = (
project or feature_set["metadata"].get("project", None) or default_project
)
path = f"projects/{project}/feature-sets"
params = {"versioned": versioned}

name = feature_set["metadata"]["name"]
error_message = f"Failed creating feature-set {project}/{name}"
resp = self.api_call(
"POST", path, error_message, params=params, body=json.dumps(feature_set),
)
return schemas.FeatureSet(**resp.json())
return resp.json()

def get_feature_set(
self, name: str, project: str = "", tag: str = None, uid: str = None
) -> schemas.FeatureSet:
) -> dict:
if uid and tag:
raise MLRunInvalidArgumentError("both uid and tag were provided")

Expand All @@ -717,7 +719,7 @@ def get_feature_set(
path = f"projects/{project}/feature-sets/{name}/references/{reference}"
error_message = f"Failed retrieving feature-set {project}/{name}"
resp = self.api_call("GET", path, error_message)
return schemas.FeatureSet(**resp.json())
return resp.json()

def list_features(
self,
Expand Down Expand Up @@ -750,7 +752,7 @@ def list_feature_sets(
entities: List[str] = None,
features: List[str] = None,
labels: List[str] = None,
) -> schemas.FeatureSetsOutput:
) -> List[dict]:
project = project or default_project
params = {
"name": name,
Expand All @@ -767,12 +769,12 @@ def list_feature_sets(
f"Failed listing feature-sets, project: {project}, query: {params}"
)
resp = self.api_call("GET", path, error_message, params=params)
return schemas.FeatureSetsOutput(**resp.json())
return resp.json()["feature_sets"]

def store_feature_set(
self,
name,
feature_set: Union[dict, schemas.FeatureSet],
name=None,
project="",
tag=None,
uid=None,
Expand All @@ -786,7 +788,8 @@ def store_feature_set(
if isinstance(feature_set, schemas.FeatureSet):
feature_set = feature_set.dict()

project = project or default_project
name = name or feature_set["metadata"]["name"]
project = project or feature_set["metadata"].get("project") or default_project
reference = uid or tag or "latest"
path = f"projects/{project}/feature-sets/{name}/references/{reference}"
error_message = f"Failed storing feature-set {project}/{name}"
Expand Down
3 changes: 2 additions & 1 deletion mlrun/db/sqldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,9 @@ def list_feature_sets(
)

def store_feature_set(
self, name, feature_set, project="", tag=None, uid=None, versioned=True
self, feature_set, name=None, project="", tag=None, uid=None, versioned=True
):
name = name or feature_set.metadata.name
return self._transform_db_error(
self.db.store_feature_set,
self.session,
Expand Down
55 changes: 53 additions & 2 deletions tests/api/api/test_feature_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

def _generate_feature_set(name):
return {
"kind": "FeatureSet",
"metadata": {
"name": name,
"labels": {"owner": "saarc", "group": "dev"},
Expand Down Expand Up @@ -110,12 +111,14 @@ def test_feature_set_create_with_extra_fields(db: Session, client: TestClient) -
json_response = response.json()
_assert_extra_fields_exist(json_response)

# Make sure extra fields outside of the metadata/spec/status trio are not stored
# Make sure extra fields outside of the metadata/spec/status/kind fields are not stored
feature_set = _generate_feature_set("feature_set2")
feature_set["something_else"] = {"extra_field": "extra_value"}

response = _feature_set_create_and_assert(client, project_name, feature_set)
assert len(response) == 3 and "something_else" not in response
assert (
len(response) == 4 and "kind" in response and "something_else" not in response
)


def test_feature_set_create_and_list(db: Session, client: TestClient) -> None:
Expand Down Expand Up @@ -390,6 +393,54 @@ def test_feature_set_store(db: Session, client: TestClient) -> None:
assert response.status_code == HTTPStatus.BAD_REQUEST.value


def test_feature_set_create_without_labels(db: Session, client: TestClient) -> None:
project_name = f"prj-{uuid4().hex}"
name = "feature_set1"
feature_set = _generate_feature_set(name)

feature_set["metadata"].pop("labels")
_feature_set_create_and_assert(client, project_name, feature_set)

feature_set_update = {
"metadata": {"labels": {"label1": "value1", "label2": "value2"}}
}
feature_set_response = _patch_feature_set(
client, project_name, name, feature_set_update
)
assert (
len(feature_set_response["metadata"]["labels"]) == 2
), "Labels didn't get updated"


def test_feature_set_project_name_mismatch_failure(
db: Session, client: TestClient
) -> None:
project_name = f"prj-{uuid4().hex}"
name = "feature_set1"
feature_set = _generate_feature_set(name)
feature_set["metadata"]["project"] = "booboo"
# Calling POST with a different project name in object metadata should fail
response = client.post(
f"/api/projects/{project_name}/feature-sets", json=feature_set
)
assert response.status_code == HTTPStatus.BAD_REQUEST.value

# When POSTing without project name, project name should be implanted in the response
feature_set["metadata"].pop("project")
feature_set_response = _feature_set_create_and_assert(
client, project_name, feature_set
)
assert feature_set_response["metadata"]["project"] == project_name

feature_set["metadata"]["project"] = "woohoo"
# Calling PUT with a different project name in object metadata should fail
response = client.put(
f"/api/projects/{project_name}/feature-sets/{name}/references/latest",
json=feature_set,
)
assert response.status_code == HTTPStatus.BAD_REQUEST.value


def test_features_list(db: Session, client: TestClient) -> None:
project_name = f"prj-{uuid4().hex}"

Expand Down
23 changes: 18 additions & 5 deletions tests/rundb/test_httpdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ def test_list_functions(create_server):

def _create_feature_set(name):
return {
"kind": "FeatureSet",
"metadata": {
"name": name,
"labels": {"owner": "saarc", "group": "dev"},
Expand Down Expand Up @@ -372,7 +373,7 @@ def test_feature_sets(create_server):
db.create_feature_set(feature_set, project=project, versioned=True)

# Test store_feature_set, which allows updates as well as inserts
db.store_feature_set(name, feature_set, project=project, versioned=True)
db.store_feature_set(feature_set, name=name, project=project, versioned=True)

feature_set_update = {
"spec": {
Expand All @@ -386,9 +387,21 @@ def test_feature_sets(create_server):
)
feature_sets = db.list_feature_sets(project=project)

assert (
len(feature_sets.feature_sets) == count
), "bad list results - wrong number of members"
assert len(feature_sets) == count, "bad list results - wrong number of members"

feature_set = db.get_feature_set(name, project)
assert len(feature_set.spec.features) == 4
assert len(feature_set["spec"]["features"]) == 4

# Create a feature-set that has no labels
name = "feature_set_no_labels"
feature_set_without_labels = _create_feature_set(name)
feature_set_without_labels["metadata"].pop("labels")
# Use project name in the feature-set (don't provide it to API)
feature_set_without_labels["metadata"]["project"] = project
db.store_feature_set(feature_set_without_labels)
feature_set_update = {
"metadata": {"labels": {"label1": "value1", "label2": "value2"}}
}
db.update_feature_set(name, feature_set_update, project)
feature_set = db.get_feature_set(name, project)
assert len(feature_set["metadata"]["labels"]) == 2, "Labels didn't get updated"