Skip to content

Commit

Permalink
[Docs] Change feature store alias to fstore (from fs) (#1001)
Browse files Browse the repository at this point in the history
  • Loading branch information
gilad-shaham committed Jun 14, 2021
1 parent 9acf47a commit 3ede55b
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 85 deletions.
22 changes: 11 additions & 11 deletions docs/feature-store/basic-demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@
"metadata": {},
"outputs": [],
"source": [
"import mlrun.feature_store as fs\n",
"import mlrun.feature_store as fstore\n",
"from mlrun.feature_store.steps import *\n",
"from mlrun.features import MinMaxValidator"
]
Expand Down Expand Up @@ -516,8 +516,8 @@
],
"source": [
"# add feature set without time column (stock ticker metadata) \n",
"stocks_set = fs.FeatureSet(\"stocks\", entities=[fs.Entity(\"ticker\")])\n",
"fs.ingest(stocks_set, stocks, infer_options=fs.InferOptions.default())"
"stocks_set = fstore.FeatureSet(\"stocks\", entities=[fstore.Entity(\"ticker\")])\n",
"fstore.ingest(stocks_set, stocks, infer_options=fstore.InferOptions.default())"
]
},
{
Expand All @@ -535,7 +535,7 @@
"outputs": [],
"source": [
"# create a new feature set\n",
"quotes_set = fs.FeatureSet(\"stock-quotes\", entities=[fs.Entity(\"ticker\")])"
"quotes_set = fstore.FeatureSet(\"stock-quotes\", entities=[fstore.Entity(\"ticker\")])"
]
},
{
Expand Down Expand Up @@ -704,7 +704,7 @@
"quotes_set.add_aggregation(\"bids\", \"bid\", [\"min\", \"max\"], \"1h\", \"10m\")\n",
"\n",
"# add feature validation policy\n",
"quotes_set[\"bid\"] = fs.Feature(validator=MinMaxValidator(min=52, severity=\"info\"))\n",
"quotes_set[\"bid\"] = fstore.Feature(validator=MinMaxValidator(min=52, severity=\"info\"))\n",
"\n",
"# add default target definitions and plot\n",
"quotes_set.set_targets()\n",
Expand Down Expand Up @@ -938,12 +938,12 @@
}
],
"source": [
"fs.infer_metadata(\n",
"fstore.infer_metadata(\n",
" quotes_set,\n",
" quotes,\n",
" entity_columns=[\"ticker\"],\n",
" timestamp_key=\"time\",\n",
" options=fs.InferOptions.default(),\n",
" options=fstore.InferOptions.default(),\n",
")"
]
},
Expand Down Expand Up @@ -1699,7 +1699,7 @@
],
"source": [
"# save ingest data and print the FeatureSet spec\n",
"df = fs.ingest(quotes_set, quotes)"
"df = fstore.ingest(quotes_set, quotes)"
]
},
{
Expand All @@ -1726,7 +1726,7 @@
" \"stocks.*\",\n",
"]\n",
"\n",
"vector = fs.FeatureVector(\"stocks-vec\", features, description=\"stocks demo feature vector\")\n",
"vector = fstore.FeatureVector(\"stocks-vec\", features, description=\"stocks demo feature vector\")\n",
"vector.save()"
]
},
Expand Down Expand Up @@ -1848,7 +1848,7 @@
}
],
"source": [
"resp = fs.get_offline_features(vector, entity_rows=trades, entity_timestamp_column=\"time\")\n",
"resp = fstore.get_offline_features(vector, entity_rows=trades, entity_timestamp_column=\"time\")\n",
"resp.to_dataframe()"
]
},
Expand All @@ -1865,7 +1865,7 @@
"metadata": {},
"outputs": [],
"source": [
"service = fs.get_online_feature_service(\"stocks-vec\")"
"service = fstore.get_online_feature_service(\"stocks-vec\")"
]
},
{
Expand Down
59 changes: 27 additions & 32 deletions docs/feature-store/end-to-end-demo/01-ingest-datasources.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,16 @@
"outputs": [],
"source": [
"import mlrun\n",
"from os import getenv\n",
"\n",
"mlrun.set_environment(project='fsdemo', user_project=True)\n",
"project, artifact_path = mlrun.set_environment(project='fsdemo', user_project=True)\n",
"# location of the output data files\n",
"data_path = f\"{getenv('V3IO_HOME_URL')}/demos/feature-store/data/\""
"data_path = f\"{artifact_path}/data/\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"metadata": {},
"outputs": [],
"source": [
"def move_timestamps(df, shift='0s'):\n",
Expand Down Expand Up @@ -86,7 +81,7 @@
"outputs": [],
"source": [
"# Import MLRun's Feature Store\n",
"import mlrun.feature_store as fs\n",
"import mlrun.feature_store as fstore\n",
"\n",
"# Import MLRun's Data Sources to set the wanted ingestion pipeline\n",
"from mlrun.datastore.sources import CSVSource, ParquetSource, HttpSource"
Expand All @@ -103,8 +98,8 @@
"import storey\n",
"\n",
"# Define the Lab Measurements FeatureSet\n",
"measurements_set = fs.FeatureSet(\"measurements\", \n",
" entities=[fs.Entity(\"patient_id\")], \n",
"measurements_set = fstore.FeatureSet(\"measurements\", \n",
" entities=[fstore.Entity(\"patient_id\")], \n",
" timestamp_key='timestamp', \n",
" description=\"various patient health measurements\")\n",
"\n",
Expand Down Expand Up @@ -676,8 +671,8 @@
"source": [
"# User our loaded DF as the datasource and ingest it through\n",
"# the define pipeline\n",
"resp = fs.ingest(measurements_set, measurements_df, \n",
" infer_options=fs.InferOptions.default())\n",
"resp = fstore.ingest(measurements_set, measurements_df, \n",
" infer_options=fstore.InferOptions.default())\n",
"resp.head()"
]
},
Expand Down Expand Up @@ -720,7 +715,7 @@
"outputs": [],
"source": [
"# add feature set without time column (stock ticker metadata) \n",
"patients_set = fs.FeatureSet(\"patient_details\", entities=[fs.Entity(\"patient_id\")],\n",
"patients_set = fstore.FeatureSet(\"patient_details\", entities=[fstore.Entity(\"patient_id\")],\n",
" description=\"personal and medical patient details\")\n",
"\n",
"# Get FeatureSet computation graph\n",
Expand Down Expand Up @@ -1018,7 +1013,7 @@
"patients_df = pd.read_parquet('https://s3.wasabisys.com/iguazio/data/patients/patient_details.parquet')\n",
"\n",
"# Run local ingestion test\n",
"fs.infer(patients_set, patients_df.head())"
"fstore.infer(patients_set, patients_df.head())"
]
},
{
Expand All @@ -1040,8 +1035,8 @@
"patients_set.save()\n",
"\n",
"# Run Ingestion task\n",
"resp = fs.ingest(patients_set, patients_df, \n",
" infer_options=fs.InferOptions.default())"
"resp = fstore.ingest(patients_set, patients_df, \n",
" infer_options=fstore.InferOptions.default())"
]
},
{
Expand Down Expand Up @@ -1303,8 +1298,8 @@
],
"source": [
"source = ParquetSource('pq', 'https://s3.wasabisys.com/iguazio/data/patients/patient_details.parquet')\n",
"config = fs.RunConfig(local=False).apply(mlrun.platforms.auto_mount())\n",
"fs.ingest(patients_set, source, run_config=config)"
"config = fstore.RunConfig(local=False).apply(mlrun.platforms.auto_mount())\n",
"fstore.ingest(patients_set, source, run_config=config)"
]
},
{
Expand All @@ -1329,8 +1324,8 @@
"metadata": {},
"outputs": [],
"source": [
"early_sense_set = fs.FeatureSet(\"early_sense\", entities=[fs.Entity(\"patient_id\")], timestamp_key='timestamp',\n",
" description=\"real time patient bed sensor data\")"
"early_sense_set = fstore.FeatureSet(\"early_sense\", entities=[fstore.Entity(\"patient_id\")], timestamp_key='timestamp',\n",
" description=\"real time patient bed sensor data\")"
]
},
{
Expand All @@ -1350,8 +1345,8 @@
"source": [
"from mlrun.features import MinMaxValidator\n",
"\n",
"early_sense_set[\"hr\"] = fs.Feature(validator = MinMaxValidator(min=0, max=220, severity=\"info\"))\n",
"early_sense_set[\"rr\"] = fs.Feature(validator = MinMaxValidator(min=0, max=25, severity=\"info\"))"
"early_sense_set[\"hr\"] = fstore.Feature(validator = MinMaxValidator(min=0, max=220, severity=\"info\"))\n",
"early_sense_set[\"rr\"] = fstore.Feature(validator = MinMaxValidator(min=0, max=25, severity=\"info\"))"
]
},
{
Expand Down Expand Up @@ -1829,7 +1824,7 @@
"early_sense_df = pd.read_parquet('https://s3.wasabisys.com/iguazio/data/patients/early_sense.parquet')\n",
"early_sense_df['timestamp'] = pd.to_datetime(early_sense_df['timestamp'])\n",
"early_sense_df = move_timestamps(early_sense_df) # update timestamps\n",
"fs.infer(early_sense_set, early_sense_df.head())"
"fstore.infer(early_sense_set, early_sense_df.head())"
]
},
{
Expand All @@ -1847,7 +1842,7 @@
],
"source": [
"# Run ingest pipeline\n",
"df=fs.ingest(early_sense_set, early_sense_df)\n",
"df=fstore.ingest(early_sense_set, early_sense_df)\n",
"\n",
"# Save the early-sense Featureset\n",
"early_sense_set.save()\n",
Expand Down Expand Up @@ -1909,11 +1904,11 @@
"# an MLRun function from them so we can run the pipeline\n",
"# as a Nuclio function\n",
"func = mlrun.code_to_function(\"ingest\", kind=\"serving\")\n",
"nuclio_config = fs.RunConfig(function=func, local=False).apply(mlrun.platforms.auto_mount())\n",
"nuclio_config = fstore.RunConfig(function=func, local=False).apply(mlrun.platforms.auto_mount())\n",
"\n",
"# Deploy the Online ingestion service using the pipeline definition from before\n",
"# with our new HTTP Source and our define Function\n",
"server = fs.deploy_ingestion_service(early_sense_set, source, run_config=nuclio_config)"
"server = fstore.deploy_ingestion_service(early_sense_set, source, run_config=nuclio_config)"
]
},
{
Expand Down Expand Up @@ -2024,10 +2019,10 @@
],
"source": [
"#labels_df = pd.read_parquet('labels.parquet')\n",
"labels_set = fs.FeatureSet(\"labels\", entities=[fs.Entity(\"patient_id\")], timestamp_key='timestamp',\n",
" description=\"training labels\")\n",
"labels_set = fstore.FeatureSet(\"labels\", entities=[fstore.Entity(\"patient_id\")], timestamp_key='timestamp',\n",
" description=\"training labels\")\n",
"labels_set.set_targets()\n",
"df = fs.infer(labels_set, data_path + 'labels.parquet')"
"df = fstore.infer(labels_set, data_path + 'labels.parquet')"
]
},
{
Expand Down Expand Up @@ -2178,7 +2173,7 @@
}
],
"source": [
"df = fs.ingest(labels_set, data_path + 'labels.parquet')"
"df = fstore.ingest(labels_set, data_path + 'labels.parquet')"
]
},
{
Expand Down Expand Up @@ -2221,4 +2216,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
19 changes: 9 additions & 10 deletions docs/feature-store/end-to-end-demo/02-create-training-model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,10 @@
"outputs": [],
"source": [
"import mlrun\n",
"from os import getenv\n",
"\n",
"project, _ = mlrun.set_environment(project='fsdemo', user_project=True)\n",
"project, artifact_path = mlrun.set_environment(project='fsdemo', user_project=True)\n",
"# location of the output data files\n",
"data_path = f\"{getenv('V3IO_HOME_URL')}/demos/feature-store/data/\""
"data_path = f\"{artifact_path}/data/\""
]
},
{
Expand All @@ -58,7 +57,7 @@
"outputs": [],
"source": [
"# Import MLRun's Feature Store\n",
"import mlrun.feature_store as fs\n",
"import mlrun.feature_store as fstore\n",
"\n",
"# Define the featuer vector's name for future reference\n",
"feature_vector_name = 'patient-deterioration'\n",
Expand Down Expand Up @@ -101,10 +100,10 @@
" ]\n",
"\n",
"# Define the feature vector\n",
"fv = fs.FeatureVector(feature_vector_name, \n",
" features, \n",
" label_feature=\"labels.label\",\n",
" description='Predict patient deterioration')\n",
"fv = fstore.FeatureVector(feature_vector_name, \n",
" features, \n",
" label_feature=\"labels.label\",\n",
" description='Predict patient deterioration')\n",
"\n",
"# Save the feature vector in the Feature Store\n",
"fv.save()"
Expand Down Expand Up @@ -350,7 +349,7 @@
"# Get offline feature vector\n",
"# will return a pandas dataframe and save the dataset to parquet so a \n",
"# training job could train on it\n",
"dataset = fs.get_offline_features(feature_vector_name, target=ParquetTarget())\n",
"dataset = fstore.get_offline_features(feature_vector_name, target=ParquetTarget())\n",
"\n",
"# View dataset example\n",
"df = dataset.to_dataframe()\n",
Expand Down Expand Up @@ -922,7 +921,7 @@
}
],
"source": [
"fs.get_feature_vector(f'{project}/{feature_vector_name}').status.targets['parquet'].to_dict()"
"fstore.get_feature_vector(f'{project}/{feature_vector_name}').status.targets['parquet'].to_dict()"
]
},
{
Expand Down
13 changes: 7 additions & 6 deletions docs/feature-store/feature-sets.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,14 @@ in a NoSQL DB, users can use the default targets or add/replace with additional

Graph example (storey engine):
```python
feature_set = FeatureSet("measurements", entities=[Entity(key)], timestamp_key="timestamp")
import mlrun.feature_store as fstore
feature_set = fstore.FeatureSet("measurements", entities=[Entity(key)], timestamp_key="timestamp")
# Define the computational graph including our custom functions
feature_set.graph.to(DropColumns(drop_columns))\
.to(RenameColumns(mapping={'bad': 'bed'}))
feature_set.add_aggregation('hr', 'hr', ['avg'], ["1h"])
feature_set.plot()
fs.ingest(feature_set, data_df)
fstore.ingest(feature_set, data_df)
```

Graph example (pandas engine):
Expand All @@ -72,9 +73,9 @@ def myfunc1(df, context=None):
df = df.drop(columns=["exchange"])
return df

stocks_set = fs.FeatureSet("stocks", entities=[Entity("ticker")], engine="pandas")
stocks_set = fstore.FeatureSet("stocks", entities=[Entity("ticker")], engine="pandas")
stocks_set.graph.to(name="s1", handler="myfunc1")
df = fs.ingest(stocks_set, stocks_df)
df = fstore.ingest(stocks_set, stocks_df)
```

The graph steps can use built-in transformation classes, simple python classes or function handlers.
Expand All @@ -85,7 +86,7 @@ This allows to get a preview of the results (in the returned dataframe). The sim
The infer operation also learns the feature set schema and does statistical analysis on the result by default.

```python
df = fs.infer(quotes_set, quotes)
df = fstore.infer(quotes_set, quotes)

# print the featue statistics
print(quotes_set.get_stats_table())
Expand Down Expand Up @@ -146,7 +147,7 @@ There are multiple data source options including http, kafka, kinesis, v3io stre
source = HTTPSource()
func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
config = RunConfig(function=func)
fs.deploy_ingestion_service(my_set, source, run_config=config)
fstore.deploy_ingestion_service(my_set, source, run_config=config)
```

To learn more about deploy_ingestion_service go to {py:class}`~mlrun.feature_store.deploy_ingestion_service`
Expand Down

0 comments on commit 3ede55b

Please sign in to comment.