In [0]:
# This notebook is meant to run batch inference on the top of new iris samples

In [0]:
%pip install mlflow --upgrade
dbutils.library.restartPython()

In [0]:
from sklearn import datasets
from mlflow.pyfunc import load_model
import pandas as pd
import mlflow
from datetime import datetime

In [0]:
dbutils.widgets.text("catalog_name", "pedroz_e2edata_dev")
catalog_name = dbutils.widgets.get("catalog_name")

In [0]:
model_name = f'{catalog_name}.default.iris_model'

In [0]:
# Pull the dataset for running the inference
iris_samples = datasets.load_iris(as_frame=True)
df_samples = pd.DataFrame(data = iris_samples['data'], columns = iris_samples['feature_names'])
df_samples.columns = df_samples.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
df_samples.head()

  df_samples.columns = df_samples.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
  df_samples.columns = df_samples.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')


Unnamed: 0,sepal_length_cm,sepal_width_cm,petal_length_cm,petal_width_cm
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [0]:
model_uri = f"models:/{model_name}@champion"
model = load_model(model_uri)

In [0]:
predictions = model.predict(df_samples)
df_samples['prediction'] = predictions

df_samples.head()

Unnamed: 0,sepal_length_cm,sepal_width_cm,petal_length_cm,petal_width_cm,prediction
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [0]:
df_samples['actual_label'] = iris_samples['target']
df_samples.head()

Unnamed: 0,sepal_length_cm,sepal_width_cm,petal_length_cm,petal_width_cm,prediction,actual_label
0,5.1,3.5,1.4,0.2,0,0
1,4.9,3.0,1.4,0.2,0,0
2,4.7,3.2,1.3,0.2,0,0
3,4.6,3.1,1.5,0.2,0,0
4,5.0,3.6,1.4,0.2,0,0


In [0]:
mlflow_client = mlflow.tracking.MlflowClient()
model_version = mlflow_client.get_model_version_by_alias(model_name, "champion").version

df_samples['prediction_timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
df_samples['model_id_col'] = model_name + '@' + model_version

display(df_samples)

sepal_length_cm,sepal_width_cm,petal_length_cm,petal_width_cm,prediction,actual_label,prediction_timestamp,model_id_col
5.1,3.5,1.4,0.2,0,0,2025-08-11 13:12:12,pedroz_e2edata_dev.default.iris_model@21
4.9,3.0,1.4,0.2,0,0,2025-08-11 13:12:12,pedroz_e2edata_dev.default.iris_model@21
4.7,3.2,1.3,0.2,0,0,2025-08-11 13:12:12,pedroz_e2edata_dev.default.iris_model@21
4.6,3.1,1.5,0.2,0,0,2025-08-11 13:12:12,pedroz_e2edata_dev.default.iris_model@21
5.0,3.6,1.4,0.2,0,0,2025-08-11 13:12:12,pedroz_e2edata_dev.default.iris_model@21
5.4,3.9,1.7,0.4,0,0,2025-08-11 13:12:12,pedroz_e2edata_dev.default.iris_model@21
4.6,3.4,1.4,0.3,0,0,2025-08-11 13:12:12,pedroz_e2edata_dev.default.iris_model@21
5.0,3.4,1.5,0.2,0,0,2025-08-11 13:12:12,pedroz_e2edata_dev.default.iris_model@21
4.4,2.9,1.4,0.2,0,0,2025-08-11 13:12:12,pedroz_e2edata_dev.default.iris_model@21
4.9,3.1,1.5,0.1,0,0,2025-08-11 13:12:12,pedroz_e2edata_dev.default.iris_model@21


In [0]:
df_spark = spark.createDataFrame(df_samples)

In [0]:
try:
    display(spark.table(f"{catalog_name}.default.iris_data").limit(5))
    table_exists = True
except:
    table_exists = False

sepal_length_cm,sepal_width_cm,petal_length_cm,petal_width_cm,species,id
5.1,3.5,1.4,0.2,0,1
4.9,3.0,1.4,0.2,0,2
4.7,3.2,1.3,0.2,0,3
4.6,3.1,1.5,0.2,0,4
5.0,3.6,1.4,0.2,0,5


In [0]:
if table_exists: # append
    df_spark.write.mode("append").saveAsTable(f"{catalog_name}.default.iris_inferences")
else: # create table from scratch
    df_spark.write.mode("overwrite").saveAsTable(f"{catalog_name}.default.iris_inferences")

In [0]:
display(spark.sql(f"SELECT * FROM {catalog_name}.default.iris_inferences LIMIT 5"))

sepal_length_cm,sepal_width_cm,petal_length_cm,petal_width_cm,prediction,actual_label,prediction_timestamp,model_id_col
5.1,3.5,1.4,0.2,0,0,2025-08-11 13:08:48,pedroz_e2edata_dev.default.iris_model@21
4.9,3.0,1.4,0.2,0,0,2025-08-11 13:08:48,pedroz_e2edata_dev.default.iris_model@21
4.7,3.2,1.3,0.2,0,0,2025-08-11 13:08:48,pedroz_e2edata_dev.default.iris_model@21
4.6,3.1,1.5,0.2,0,0,2025-08-11 13:08:48,pedroz_e2edata_dev.default.iris_model@21
5.0,3.6,1.4,0.2,0,0,2025-08-11 13:08:48,pedroz_e2edata_dev.default.iris_model@21
