# Saving Models with MLflow Models

## Importing dependencies

In [1]:
# Importing dependencies
import mlflow
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import datasets
import pandas as pd
from mlflow.models import infer_signature, ModelSignature
from mlflow.types import Schema, ColSpec

## Fitting a scikit-learn estimator

In [2]:
# Loading data
data = datasets.load_breast_cancer()
    
# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data.data, 
                                                    data.target,
                                                    stratify=data.target)

# Instantiating and fitting the model
model = LogisticRegression(max_iter=1000)            
model.fit(X=X_train, y=y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression(max_iter=1000)

## Providing input signature and input examples

### Inferring the input signature automatically

In [3]:
# Converting train features into a DataFrame
X_train_df = pd.DataFrame(data=X_train, columns=data.feature_names)

# Inferring the input signature
signature = infer_signature(model_input=X_train_df, 
                            model_output=model.predict(X_test))

In [4]:
# Inspecting the signature
print(signature)

inputs: 
  ['mean radius': double, 'mean texture': double, 'mean perimeter': double, 'mean area': double, 'mean smoothness': double, 'mean compactness': double, 'mean concavity': double, 'mean concave points': double, 'mean symmetry': double, 'mean fractal dimension': double, 'radius error': double, 'texture error': double, 'perimeter error': double, 'area error': double, 'smoothness error': double, 'compactness error': double, 'concavity error': double, 'concave points error': double, 'symmetry error': double, 'fractal dimension error': double, 'worst radius': double, 'worst texture': double, 'worst perimeter': double, 'worst area': double, 'worst smoothness': double, 'worst compactness': double, 'worst concavity': double, 'worst concave points': double, 'worst symmetry': double, 'worst fractal dimension': double]
outputs: 
  [Tensor('int32', (-1,))]



### Specifying the input signature manually

In [5]:
# Example input schema for the Iris dataset
input_schema = Schema(inputs=[
    ColSpec(type="double", name="sepal length (cm)"),
    ColSpec(type="double", name="sepal width (cm)"),
    ColSpec(type="double", name="petal length (cm)"),
    ColSpec(type="double", name="petal width (cm)"),
])

# Example input schema for the Iris dataset
output_schema = Schema(inputs=[ColSpec(type="long")])

In [6]:
# Creating an input schema for the breast cancer dataset
input_schema = Schema(inputs=[ColSpec(type="double", name=feature_name) 
                              for feature_name in data.feature_names])

# Creating an output schema for the breast cancer dataset
output_schema = Schema(inputs=[ColSpec("long")])

In [7]:
# Viewing the input schema
print(input_schema)

# Viewing the output schema
print("\n", output_schema)

['mean radius': double, 'mean texture': double, 'mean perimeter': double, 'mean area': double, 'mean smoothness': double, 'mean compactness': double, 'mean concavity': double, 'mean concave points': double, 'mean symmetry': double, 'mean fractal dimension': double, 'radius error': double, 'texture error': double, 'perimeter error': double, 'area error': double, 'smoothness error': double, 'compactness error': double, 'concavity error': double, 'concave points error': double, 'symmetry error': double, 'fractal dimension error': double, 'worst radius': double, 'worst texture': double, 'worst perimeter': double, 'worst area': double, 'worst smoothness': double, 'worst compactness': double, 'worst concavity': double, 'worst concave points': double, 'worst symmetry': double, 'worst fractal dimension': double]

 [long]


In [8]:
# Creatubg a signature from our schemas
signature = ModelSignature(inputs=input_schema, outputs=output_schema)

### Providing input examples

In [9]:
# Creating an input example from our feature DataFrame
input_example = X_train_df.iloc[:2]

## Specifying conda and pip dependencies

In [10]:
# Specifying a conda environment
conda_env = {
    "channels": ["default"],
    "dependencies": ["pip"],
    "pip": ["mlflow", "cloudpickle==1.6.0"],
    "name": "mlflow-env"}

# Specifying pip requirements
pip_requirements = ["mlflow"]

## Saving the model

### Saving a model to a local path

In [11]:
# Saving the model 
mlflow.sklearn.save_model(sk_model=model, 
                          path="model", 
                          conda_env=conda_env, 
                          signature=signature,
                          input_example=input_example)

### Logging the model as an artifact under an MLflow run

In [12]:
# Saving the model as an artifact in a run
with mlflow.start_run() as run:
    # Obtaining the ID of this run
    run_id = run.info.run_id
    
    # Logging our model
    mlflow.sklearn.log_model(sk_model=model, 
                             artifact_path="model", 
                             conda_env=conda_env, 
                             signature=signature,
                             input_example=input_example)

## Loading our saved models

In [13]:
# Path to the model saved with log_model
model_uri_logged = "runs:/{run_id}/model"

# Path to the model saved with save_model
model_uri_saved = "model"

In [14]:
# Loading our model as a Python function
pyfunc_model = mlflow.pyfunc.load_model(model_uri=model_uri_saved)

# Loading our model as a scikit-learn model
sklearn_model = mlflow.sklearn.load_model(model_uri=model_uri_saved)

## Doing inference with the loaded models

In [15]:
# Inference with the scikit-learn model
sklearn_predictions = sklearn_model.predict(X_test)

# Creating a DataFrame from our test features
X_test_df = pd.DataFrame(X_test, columns=data.feature_names)

# Inferece with the Python function
pyfunc_predictions = pyfunc_model.predict(X_test_df)

In [16]:
import numpy as np

# Inspecting our predictions
print(sklearn_predictions, 
      "\n\n", 
      np.equal(pyfunc_predictions, sklearn_predictions).all())

[0 0 1 0 0 0 1 0 1 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 0 1 1 0 1
 1 0 1 0 1 1 1 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0 1 1 1 1 0 1 1 1 0 0 1 1 1 1 0
 0 1 1 0 0 1 0 1 1 1 0 0 1 0 0 0 0 0 1 1 1 1 1 1 0 1 1 1 0 1 1 1 0 1 1 1 1
 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 1 0 0] 

 True


# Serving models with MLflow Models

## Doing inference with a served model

### Programmatic inference with MLflow Models

In [17]:
# Importing the requests library for handling HTTP requests
import requests

# Declaring our endpoint and payload
url = "http://127.0.0.1:5000/invocations"

# Defining our query function
def query(url, payload, headers={"Content-Type": "application/json"}):
    return requests.post(url=url, 
                         data=payload, 
                         headers=headers)

In [18]:
# Converting our test DataFrame to JSON with different data orientations
# Records
payload = X_test_df.to_json(orient="records")

# Split
payload = X_test_df.to_json(orient="split")

In [19]:
# Sending a POST request and obtaining the results
response = query(url=url, 
                 payload=payload)

# Inspecting the response
print(response.json())

[0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0]


In [20]:
# Sending a POST request and obtaining the results
response = query(url=url, 
                 payload=X_test_df.to_csv(), 
                 headers={"Content-Type": "text/csv"})

# Inspecting the response
print(response.json())

[0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0]
