Skip to content

Commit

Permalink
[Feature Store] Fix run id for get offline features with target (#1881)
Browse files Browse the repository at this point in the history
  • Loading branch information
benbd86 committed Apr 7, 2022
1 parent 2804ac6 commit bab0beb
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 0 deletions.
5 changes: 5 additions & 0 deletions mlrun/feature_store/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,11 @@ def get_offline_features(
entity_timestamp_column = (
entity_timestamp_column or feature_vector.spec.timestamp_field
)

if target:
if not target.run_id:
target.run_id = "offline-features"

if run_config:
return run_merge_job(
feature_vector,
Expand Down
31 changes: 31 additions & 0 deletions tests/system/feature_store/test_feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,37 @@ def test_get_offline_features_with_or_without_indexes(self):
assert df_with_index.index.name == "ticker"
assert "time" in df_with_index.columns, "'time' column should be present"

@pytest.mark.parametrize(
"target_path",
[
None, # default
f"v3io:///bigdata/{project_name}/gof_wt.parquet", # single file
f"v3io:///bigdata/{project_name}/{{run_id}}/gof_wt.parquet", # single file with run_id
f"v3io:///bigdata/{project_name}/gof_wt/", # directory
f"v3io:///bigdata/{project_name}/gof_wt/{{run_id}}", # directory with run_id
f"v3io:///bigdata/{project_name}/gof_wt/{{run_id}}/gof_wt", # directory with run_id in middle of path
],
)
def test_different_target_paths_for_get_offline_features(self, target_path):
stocks = pd.DataFrame(
{
"ticker": ["MSFT", "GOOG", "AAPL"],
"name": ["Microsoft Corporation", "Alphabet Inc", "Apple Inc"],
"booly": [True, False, True],
}
)
stocks_set = fs.FeatureSet(
"stocks_test", entities=[Entity("ticker", ValueType.STRING)]
)
fs.ingest(stocks_set, stocks)

vector = fs.FeatureVector("SjqevLXR", ["stocks_test.*"])
target = ParquetTarget(name="parquet", path=target_path)
fs.get_offline_features(vector, with_indexes=True, target=target)
df = pd.read_parquet(target.get_target_path())
assert df is not None
assert target.run_id == "offline-features"

def test_feature_set_db(self):
name = "stocks_test"
stocks_set = fs.FeatureSet(name, entities=["ticker"])
Expand Down

0 comments on commit bab0beb

Please sign in to comment.