In [0]:
!pip install xgboost
!pip install pandas
!pip install scikit-learn
!pip install mlflow


In [0]:
import pandas as pd
import xgboost as xgb
import mlflow
import mlflow.xgboost
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from mlflow.models.signature import infer_signature
import numpy as np

mlflow.set_experiment("/Users/sumalatha.suresh.nayak@gmail.com/xgboost/XGBOOST")

df = pd.read_csv("data/train.csv")  # adjust path

X = df.drop("target", axis=1)
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

params = {
    "objective": "reg:squarederror",
    "max_depth": 5,
    "eta": 0.1,
    "n_estimators": 200
}

with mlflow.start_run():
    model = xgb.XGBRegressor(**params)
    model.fit(X_train, y_train)

    preds = model.predict(X_test)
    mse = mean_squared_error(y_test, preds)
    rmse = np.sqrt(mse)

    # Log metrics
    mlflow.log_params(params)
    mlflow.log_metric("rmse", rmse)

    # Infer signature for Unity Catalog
    train_preds = model.predict(X_train)
    signature = infer_signature(X_train, train_preds)

    # Log model with signature
    mlflow.xgboost.log_model(
        xgb_model=model,
        artifact_path="model",
        registered_model_name="xgboost_sales_model",
        signature=signature
    )

    print(f"RMSE: {rmse}")


In [0]:
import os
os.environ["DATABRICKS_TOKEN"]