In [8]:
import mlflow

# Run the login function to authenticate with Databricks CE
mlflow.login()


2024/08/29 02:00:25 INFO mlflow.utils.credentials: Successfully connected to MLflow hosted tracking server! Host: https://community.cloud.databricks.com.


In [9]:

# dummy experiment from tutorial
mlflow.set_tracking_uri("databricks")

mlflow.set_experiment("/check-databricks-connection")

with mlflow.start_run():
    mlflow.log_metric("foo", 1)
    mlflow.log_metric("bar", 2)



2024/08/29 02:00:34 INFO mlflow.tracking._tracking_service.client: 🏃 View run awesome-seal-612 at: https://community.cloud.databricks.com/ml/experiments/2597702965538188/runs/a4d786667a4f4e408d7a75d0062d959b.
2024/08/29 02:00:34 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://community.cloud.databricks.com/ml/experiments/2597702965538188.


## Simple Model and MLflow Experiment with Iris Dataset
Uses classic Iris dataset and RandomForest algo to train an ML model for classifying species of irises.

### Key Steps
1. Load dataset
2. Split data set into training and testing subsets (0.7, 0.3)
3. Train a Random Forest Classifier model
4. Make predictions on the test data
5. Calculate and log model accuracy to MLflow 

In [13]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import mlflow
import mlflow.sklearn

#kill old processes
if mlflow.active_run():
    mlflow.end_run()
    
#set url and experiment in databricks
mlflow.set_tracking_uri('databricks')
mlflow.set_experiment('/iris-random-forest')
# start an mlflow experiment
with mlflow.start_run():
    # load iris dataset
    data = load_iris()
    # split data into train and test subsets
    X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.3, random_state=23)

# train RandomForest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# predict
predictions = model.predict(X_test)

# calaculate accuracy
accuracy = accuracy_score(y_test, predictions)

# track model and metrix in mlflow
mlflow.sklearn.log_model(model, 'model')
mlflow.log_metric('accuracy', accuracy)

print('Run ID:', mlflow.active_run().info.run_id)
print('Model Accuracy:', accuracy)



2024/08/29 02:09:57 INFO mlflow.tracking._tracking_service.client: 🏃 View run rambunctious-asp-686 at: https://community.cloud.databricks.com/ml/experiments/2597702965538188/runs/c656feba1e3c4106a842bedcd931d475.
2024/08/29 02:09:57 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://community.cloud.databricks.com/ml/experiments/2597702965538188.
2024/08/29 02:09:59 INFO mlflow.tracking._tracking_service.client: 🏃 View run adaptable-dove-810 at: https://community.cloud.databricks.com/ml/experiments/3146434618114174/runs/b0cd591459c1414a9bf9ba9968fb11da.
2024/08/29 02:09:59 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://community.cloud.databricks.com/ml/experiments/3146434618114174.


Run ID: 2b75755ca9e641fcbb2c683a870a92d8
Model Accuracy: 0.9777777777777777
