From ef4656d811f730c87d5fe9dc7ee3f7b72881e4a4 Mon Sep 17 00:00:00 2001 From: Eyal Salomon Date: Wed, 30 Jun 2021 17:45:39 +0300 Subject: [PATCH] [Feature Vector] Fix using feature from another project in `get_offline_features` (#1069) --- mlrun/feature_store/common.py | 17 +++++++++++++++++ mlrun/feature_store/feature_vector.py | 10 ++++++++-- requirements.txt | 2 +- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/mlrun/feature_store/common.py b/mlrun/feature_store/common.py index 0441e39913a..6429d842d81 100644 --- a/mlrun/feature_store/common.py +++ b/mlrun/feature_store/common.py @@ -20,6 +20,7 @@ from ..config import config +project_separator = "/" feature_separator = "." expected_message = f"in the form feature-set{feature_separator}feature[ as alias]" @@ -44,6 +45,22 @@ def parse_feature_string(feature): return feature_set.strip(), feature_name.strip(), None +def parse_project_name_from_feature_string(feature): + """parse feature string into project name and feature""" + # expected format: / + if project_separator not in feature: + return None, feature + + splitted = feature.split(project_separator) + if len(splitted) > 2: + raise mlrun.errors.MLRunInvalidArgumentError( + f"feature {feature} must be {expected_message}, cannot have more than one '/'" + ) + project_name = splitted[0] + feature_name = splitted[1] + return project_name.strip(), feature_name.strip() + + def get_feature_set_by_uri(uri, project=None): """get feature set object from db by uri""" db = mlrun.get_run_db() diff --git a/mlrun/feature_store/feature_vector.py b/mlrun/feature_store/feature_vector.py index 0c8a041b1a7..02dc2322c6f 100644 --- a/mlrun/feature_store/feature_vector.py +++ b/mlrun/feature_store/feature_vector.py @@ -22,7 +22,11 @@ from ..config import config as mlconf from ..datastore import get_store_uri from ..datastore.targets import CSVTarget, ParquetTarget, get_offline_target -from ..feature_store.common import get_feature_set_by_uri, parse_feature_string +from ..feature_store.common import ( + get_feature_set_by_uri, + parse_feature_string, + parse_project_name_from_feature_string, +) from ..features import Feature from ..model import DataSource, DataTarget, ModelObj, ObjectList, VersionedObjMetadata from ..runtimes.function_reference import FunctionReference @@ -266,10 +270,12 @@ def add_feature(name, alias, feature_set_object): feature_set_fields[featureset_name].append((name, alias)) for feature in features: + project_name, feature = parse_project_name_from_feature_string(feature) feature_set, feature_name, alias = parse_feature_string(feature) if feature_set not in feature_set_objects.keys(): feature_set_objects[feature_set] = get_feature_set_by_uri( - feature_set, self.metadata.project + feature_set, + project_name if project_name is not None else self.metadata.project, ) feature_set_object = feature_set_objects[feature_set] diff --git a/requirements.txt b/requirements.txt index 9123305a556..7ccc1f84b2e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -56,4 +56,4 @@ fsspec~=0.9.0 v3iofs~=0.1.7 # 3.4 and above failed builidng in some images - see https://github.com/pyca/cryptography/issues/5771 cryptography~=3.3.2 -storey~=0.6.8; python_version >= '3.7' +storey~=0.6.9; python_version >= '3.7'