In [0]:
from databricks.feature_engineering import FeatureEngineeringClient, FeatureLookup
from sklearn.ensemble import RandomForestClassifier
import mlflow

# Initialize FeatureEngineeringClient
fe = FeatureEngineeringClient()

# Set Unity Catalog and schema
spark.sql("USE CATALOG oms_analytics")
spark.sql("USE SCHEMA ml")

# Load customer_id and label (is_churn) from historical data soruce (it could table a in real world cases)
labels_df = spark.read.csv(
    "file:/Workspace/Users/yasodhashree91@gmail.com/oms-databricks/04_AI_ML/feature_eng/data_files/customers.csv", header=True, inferSchema=True).select("customer_id", "is_churn")

# Define Feature Lookup from feature store
feature_lookups = [
    FeatureLookup(
        table_name="oms_analytics.feature_store.customers_features",
        lookup_key="customer_id"
    )
]

# Create training set with feature enrichment
training_set = fe.create_training_set(
    df=labels_df,
    feature_lookups=feature_lookups,
    label="is_churn",
    exclude_columns=["customer_id"]
)

# Load to pandas DataFrame
train_pdf = training_set.load_df().toPandas()

# Split into features (X) and label (y)
X = train_pdf.drop(columns=["is_churn"])
y = train_pdf["is_churn"]


In [0]:
# Train and log model with MLflow
mlflow.sklearn.autolog(log_models=False)

with mlflow.start_run():
    model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
    model.fit(X, y)

    fe.log_model(
        model=model,
        artifact_path="churn_prediction",
        flavor=mlflow.sklearn,
        training_set=training_set,
        registered_model_name="churn_prediction_model",
    )