# List registered models

In [None]:
from mlflow.tracking import MlflowClient
from datetime import datetime
import pandas as pd
import boto3
import json

%load_ext dotenv
%dotenv

In [None]:
client = MlflowClient()

def print_model_info(models):
    """Lists models registered in MLflow's Model Registry."""
    
    for m in models:
        print("--")
        print("name: {}".format(m.name))
        print("tags: {}".format(m.tags))
        # print("description: {}".format(m.description))
        
        for mv in sorted(m.latest_versions, key=lambda x: x.version):
            print(
                "\tversion: {}, registration date: {}, stage: {}"
                .format(mv.version, 
                        datetime.fromtimestamp(mv.creation_timestamp/1000.0), 
                        mv.current_stage)
            )

print_model_info(client.list_registered_models())

# Transitioning an MLflow model's stage

In [None]:
new_stage = "Production"

client.transition_model_version_stage(
    name="wine-quality-predictor",
    version=1,
    stage=new_stage
)

In [None]:
client.update_model_version(
    name="wine-quality-predictor",
    version=1,
    description=f"{new_stage} model since {datetime.today().date()}"
)

In [None]:
print_model_info(client.list_registered_models())

# Serve model locally 

MLflow also has a CLI that supports the following commands:

* `serve` deploys the model as a local REST API server.

* `build_docker` packages a REST API endpoint serving the model as a docker image.

* `predict` uses the model to generate a prediction for a local CSV or JSON file. Note that this method only supports DataFrame input.

We will deploy the latest production model as a local REST API server. To do so, we just need to run these command in a terminal:

* `source .env`
* `mlflow models serve -m models:/wine-quality-predictor/Production --no-conda`

Then from another terminal, run this to send a prediction request to the server:

`curl http://127.0.0.1:5000/invocations -H 'Content-Type: application/json' -d '{"columns":["fixed acidity","volatile acidity","citric acid","residual sugar","chlorides","free sulfur dioxide","total sulfur dioxide","density","pH","sulphates","alcohol"],"index":[82],"data":[[7.4,0.5,0.47,2.0,0.086,21.0,73.0,0.997,3.36,0.57,9.1]]}'`

## Deploy model in AWS Sagemaker

The `mlflow.sagemaker` module can deploy `python_function` models locally in a Docker container with SageMaker compatible environment and remotely on SageMaker. 

Usage:

* `mlflow sagemaker build-and-push-container`  - build the container (only needs to be called once)
* `mlflow sagemaker run-local -m <path-to-model>`  - test the model locally
* `mlflow sagemaker deploy <parameters>` - deploy the model remotely

Using the following code you can deploy the model to SageMaker:

In [None]:
import mlflow.sagemaker as mfs

# Deployment configuration
region = os.get("AWS_DEFAULT_REGION")
aws_id = os.get("AWS_ID")
arn = os.get("AWS_ARN")
app_name = "mlflow-example"
model_uri = "models:/wine-quality-predictor/Production"
image_url = aws_id + ".dkr.ecr." + region + ".amazonaws.com/mlflow-pyfunc:1.14.1"

mfs.deploy(app_name=app_name, 
           model_uri=model_uri, 
           region_name=region, 
           mode="create",
           execution_role_arn=arn,
           image_url=image_url)

### Calling the Inference Endpoint

Let's use the inference endpoint provided by Sagemaker to make predictions, we are providing two utility methods to interact with it:

- check_status: checks the status of our endpoint.
- query_endpoint: sends an inference request to the inference endpoint and returns the predicted values.

In [None]:
def check_status(app_name, region):
    sage_client = boto3.client('sagemaker', region_name=region)
    endpoint_description = sage_client.describe_endpoint(EndpointName=app_name)
    endpoint_status = endpoint_description["EndpointStatus"]
    return endpoint_status

# check endpoint status
print("Application status is: {}".format(check_status(app_name, region)))

In [None]:
def query_endpoint(app_name, input_json):
    client = boto3.session.Session().client("sagemaker-runtime", region)

    response = client.invoke_endpoint(
        EndpointName=app_name,
        Body=input_json,
        ContentType='application/json; format=pandas-split',
    )
    preds = response['Body'].read().decode("ascii")
    preds = json.loads(preds)
    print("Received response: {}".format(preds))
    return preds

# create test data and make inference from enpoint
query_input = '{"columns":["fixed acidity","volatile acidity","citric acid","residual sugar","chlorides","free sulfur dioxide","total sulfur dioxide","density","pH","sulphates","alcohol"],"index":[82],"data":[[7.4,0.5,0.47,2.0,0.086,21.0,73.0,0.997,3.36,0.57,9.1]]}'
prediction1 = query_endpoint(app_name=app_name, input_json=query_input)