In [0]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import mlflow
import mlflow.sklearn

In [0]:
# Parameters for the catalog/schema/table
catalog = "workspace"
schema = "schema_iris"
train_table = "iris_train"
test_table = "iris_test"

# Full table names
train_table_full = f"{catalog}.{schema}.{train_table}"
test_table_full = f"{catalog}.{schema}.{test_table}"

# Load training and test sets
train_df = spark.table(train_table_full).toPandas()

test_df = spark.table(test_table_full).toPandas()

In [0]:
# Prepare features and labels
train_x = train_df[["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]
train_y = train_df["Species"]
test_x = test_df[["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]
test_y = test_df["Species"]

In [0]:
# Instantiate and fit the Random Forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(train_x, train_y)

In [0]:
# Predict on test set
y_pred = rf.predict(test_x)

# Evaluate accuracy
accuracy = accuracy_score(test_y, y_pred)
print(f"Random Forest model accuracy: {accuracy:.4f}")

In [0]:
model_name = "iris_random_forest"
registered_model_name = f"{catalog}.{schema}.{model_name}"

# Example: take the first row as input example for signature inference
input_example = train_x.iloc[[0]]

mlflow.set_registry_uri("databricks-uc")

with mlflow.start_run():
    mlflow.sklearn.log_model(
        sk_model=rf,
        artifact_path="model",
        input_example=input_example,
        registered_model_name=registered_model_name
    )

print(f"Model registered in Unity Catalog as {registered_model_name}")