# Train and Deploy a model using a Feature Store

In this notebook we show how to:

1. access a feature store registry that has been published to blob
1. discover features in the feature store
1. train a model using the offline store (using the feast function `get_historical_features()`)
1. use the feast `materialize()` function to push features from the offline store to an online store (redis)
1. Deploy the model to an Azure ML endpoint where the features are consumed from the online store (feast function `get_online_features()`)

## Set the registry connection string
Feast gets the credentials to the blob location containing the feast registry file using the `FEAST_AZ_STORAGE_CONNECTION_STRING` environment variable. 

__NOTE: you will need to set the connection string as a secret first. This is a one-time operation__

In [None]:
import os
from azureml.core import Workspace

ws = Workspace.from_config()
keyvault = ws.get_default_keyvault()

#keyvault.set_secret('FEAST-REGISTRY-CONN-STRING', '<CONNECTION_STRING>')


In [None]:
import os
from azureml.core import Workspace

ws = Workspace.from_config()
keyvault = ws.get_default_keyvault()

# this is the blob connection string (the blob location contains the registry.db file)
os.environ['FEAST_AZ_STORAGE_CONNECTION_STRING']=keyvault.get_secret('FEAST-REGISTRY-CONN-STRING')

## Connect to Feature store

Below we create a Feast repository config, which accesses the registry.db file and also provides the credentials to the offline and online storage.

__NOTE: You will need to provide the registry location on your blob storage__

In [None]:
import os
from feast import FeatureStore, RepoConfig
from feast.repo_config import SqlServerOfflineStoreConfig, RedisOnlineStoreConfig
from azureml.core import Workspace

ws = Workspace.from_config()
keyvault = ws.get_default_keyvault()

# update this to your location
FEAST_REGISTRY_BLOB = "az://<CONTAINER_NAME>/<PATH>/registry.db" 

connection_string = keyvault.get_secret('FEAST-SQL-CONN')
redis_endpoint = keyvault.get_secret('FEAST-REDIS-CONN')
orders_table = "orders"
driver_hourly_table = "driver_hourly"
customer_profile_table = "customer_profile"

repo_cfg = RepoConfig(
        project = "production",
        provider = "local",
        registry = FEAST_REGISTRY_BLOB,
        offline_store = SqlServerOfflineStoreConfig(connection_string=connection_string),
        online_store = RedisOnlineStoreConfig(connection_string=redis_endpoint)
    )

store = FeatureStore(config=repo_cfg)


### List the feature views

Below we you see your feature views

In [None]:
store.list_feature_views()

## Load features into a pandas dataframe

Below we load the features from the feature store into a pandas data frame

In [None]:
sql_job = store.get_historical_features(
    entity_df="SELECT * FROM orders",
    feature_refs=[
        "driver_stats:conv_rate",
        "driver_stats:acc_rate",
        "driver_stats:avg_daily_trips",
        "customer_profile:current_balance",
        "customer_profile:avg_passenger_count",
        "customer_profile:lifetime_trip_count",
    ],
)

training_df = sql_job.to_df()
training_df.head()

### Get only required features and drop NANs

In [None]:
x = training_df[['order_is_success', 
    'driver_stats__conv_rate', 
    'driver_stats__acc_rate',
    'driver_stats__avg_daily_trips',
    'customer_profile__current_balance',
    'customer_profile__avg_passenger_count',
    'customer_profile__lifetime_trip_count']].dropna()
x.head()

## Train a model and capture metrics with MLFlow

Below we train a model using the features from the feature store. Note that we also log the feature registry with the model.

In [None]:
import mlflow
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from azureml.core import Workspace

# connect to your workspace
ws = Workspace.from_config()

# create experiment and start logging to a new run in the experiment
experiment_name = "order_model"

# set up MLflow to track the metrics
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())
mlflow.set_experiment(experiment_name)
mlflow.sklearn.autolog()

training_df = training_df.dropna()
X = training_df[['driver_stats__conv_rate', 'driver_stats__acc_rate', 'driver_stats__avg_daily_trips', 
        'customer_profile__current_balance', 'customer_profile__avg_passenger_count','customer_profile__lifetime_trip_count' ]].dropna()
y = training_df['order_is_success']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
clf = RandomForestClassifier(n_estimators=10)

# train the model
with mlflow.start_run() as run:
    clf.fit(X_train, y_train)

## Prepare for deployment

### Register model and the feature registry 

In [None]:
# register the model
model_uri = "runs:/{}/model".format(run.info.run_id)
model = mlflow.register_model(model_uri, "order_model")

### `materialize()` data into the online store (redis)

In [None]:
from datetime import datetime, timedelta

end_date = datetime.now()
start_date = end_date - timedelta(days=7)
store.materialize(start_date=start_date, end_date=end_date)

In [None]:
from azureml.core.environment import Environment
from azureml.core.webservice import AciWebservice
from azureml.core import Workspace

ws = Workspace.from_config()
keyvault = ws.get_default_keyvault()

# create deployment config i.e. compute resources
aciconfig = AciWebservice.deploy_configuration(
    cpu_cores=1,
    memory_gb=1,
    description="orders service using feast",
)

# get registered environment
env = Environment.from_conda_specification("feast-env", "environment.yml")
env.python.conda_dependencies.add_pip_package("azureml-defaults")

# again ensure that the scoring environment has access to the registry file
env.environment_variables = {
    "FEAST_HIST_CONN": connection_string,
    "FEAST_REDIS_CONN": redis_endpoint,
    "FEAST_AZ_STORAGE_CONNECTION_STRING": os.environ['FEAST_AZ_STORAGE_CONNECTION_STRING'],
    "FEAST_REGISTRY_BLOB": FEAST_REGISTRY_BLOB
}

In [None]:
import uuid
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core.model import Model

# get the registered model
model = Model(ws, "order_model")

# create an inference config i.e. the scoring script and environment
inference_config = InferenceConfig(
    entry_script="score.py", 
    environment=env, 
    source_directory="src"
)

# deploy the service
service_name = "orders-service" + str(uuid.uuid4())[:4]
service = Model.deploy(
    workspace=ws,
    name=service_name,
    models=[model],
    inference_config=inference_config,
    deployment_config=aciconfig,
)

service.wait_for_deployment(show_output=True)

In [None]:
# send raw HTTP request to test the web service.
import requests
import numpy as np

# send a random row from the test set to score
random_index = np.random.randint(0, len(X_test) - 1)
input_data = '{"driver":1423, "customer_id":50999}'

headers = {"Content-Type": "application/json"}

resp = requests.post(service.scoring_uri, input_data, headers=headers)

print("POST to url", service.scoring_uri)
print("prediction:", resp.text)