In [0]:
import mlflow.pyfunc
import pandas as pd
from pyspark.sql.functions import col
from pyspark.sql.types import StringType


## What does this code do?
| Step | Code | What It Does |
|:--|:--|:--|
| 1 | `MODEL_NAME = "HF_TWEET_SENTIMENT"` | Define the name of the MLflow-registered model you want to use. |
| 2 | `mlflow.pyfunc.spark_udf(...)` | Load the model from MLflow as a **Spark UDF**, using the **local** Python environment (`env_manager="local"`). |
| 3 | `spark.createDataFrame([...], ["text"])` | Create a Spark DataFrame containing example text inputs (e.g., user messages, tweets). |
| 4 | `df.withColumn("model_output", sentiment_model_udf(col("text")))` | Apply the loaded model UDF to the `"text"` column, creating a new column `"model_output"` with predictions. |
| 5 | `df_with_predictions.show(truncate=False)` | Display the Spark DataFrame showing the original texts alongside their predicted sentiments. |


In [0]:
MODEL_NAME = "HF_TWEET_SENTIMENT"

# 1. Load model as a Spark UDF
sentiment_model_udf = mlflow.pyfunc.spark_udf(
    spark,
    model_uri=f"models:/{MODEL_NAME}/production",
    env_manager="local"  # Recreate the python environment
)

# Example input DataFrame
df = spark.createDataFrame([
    ("This is amazing!",),
    ("I hated this movie.",),
    ("Absolutely fantastic product!",),
    ("Worst experience ever.",)
], ["text"])

# 2. Apply the model Spark UDF directly
df_with_predictions = df.withColumn(
    "model_output",
    sentiment_model_udf(col("text"))
)

# Show result
df_with_predictions.show(truncate=False)

In [0]:
# Save predictions partitioned by sentiment
delta_path = "/dbfs/mnt/tmp/delta/sentiment"

df_with_predictions = df_with_predictions.withColumn(
    "predicted_sentiment",
    col("model_output.label")
)

df_with_predictions.write.format("delta") \
    .mode("overwrite") \
    .partitionBy("predicted_sentiment") \
    .save(delta_path)

In [0]:
display(dbutils.fs.ls(delta_path))

In [0]:
# Optimize the Delta table to compact small files
display(spark.sql(f"""OPTIMIZE delta.`{delta_path}`"""))