# Query MLFlow to find the latest production model

This is a simple demo as presented in https://databricks.com/session_na20/accelerating-mlflow-hyper-parameter-optimization-pipelines-with-rapids.
It simply pulls out the latest "production" model from the MLFlow model registry and uses it directly within Python. See also the mlflow_project dierctory for more detailed MLFlow production examples.

In [2]:
import mlflow
from pprint import pprint

model_name = "MLFlow_Airline_RAPIDS"
model_id = None
model_uri = None

client = mlflow.tracking.MlflowClient()
for mv in client.search_model_versions(f"name='{model_name}'"):
    if (mv.current_stage == "Production"):
        run_data = client.get_run(mv.run_id)
        model_uri = mv.source
        model_id = mv.run_id
        break

print(f"Model: {model_uri}")

## Load the model back to memory

In [4]:
import mlflow.sklearn
model = mlflow.sklearn.load_model(model_uri)
print(model.__class__)

# Finally load data and predict

In [6]:
import cudf

df = cudf.read_parquet("/dbfs/data/airlines/airline_20000000.parquet")
X = df[df.columns.difference(["ArrDelayBinary"])]
# X = df.drop("ArrDelayBinary", axis=1)
model.predict(X)

In [7]:
X.columns

In [8]:
df.drop("ArrDelayBinary", axis=1).columns

In [9]:
df.columns