In [3]:
# Import necessary libraries
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [4]:
# Set up MLflow tracking
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Iris Classification 2")

2024/09/20 13:55:04 INFO mlflow.tracking.fluent: Experiment with name 'Iris Classification 2' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/947996101379484187', creation_time=1726854904817, experiment_id='947996101379484187', last_update_time=1726854904817, lifecycle_stage='active', name='Iris Classification 2', tags={}>

In [5]:
# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target
X, y

(array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
        [5

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Define a function to train and log a model
def train_and_log_model(model, model_name, params):
    # Start an MLflow run
    with mlflow.start_run(run_name=model_name):
        # Train the model
        model.fit(X_train, y_train)
        
        # Make predictions
        y_pred = model.predict(X_test)
        
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')
        
        # Log parameters
        mlflow.log_params(params)
        
        # Log metrics
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1_score", f1)
        
        # Log the model
        mlflow.sklearn.log_model(model, "model")
        
        print(f"{model_name} - Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}")

In [8]:
# Train and log Logistic Regression
lr_params = {"C": 1.0, "max_iter": 100}
lr_model = LogisticRegression(**lr_params)
train_and_log_model(lr_model, "Logistic Regression", lr_params)

2024/09/20 13:55:15 INFO mlflow.tracking._tracking_service.client: 🏃 View run Logistic Regression at: http://localhost:5000/#/experiments/947996101379484187/runs/68760eb7142042febe3c23d343ec4b32.
2024/09/20 13:55:15 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/947996101379484187.


Logistic Regression - Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1-score: 1.0000


In [9]:
# Train and log Decision Tree
dt_params = {"max_depth": 5, "min_samples_split": 2}
dt_model = DecisionTreeClassifier(**dt_params)
train_and_log_model(dt_model, "Decision Tree", dt_params)

2024/09/20 13:55:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run Decision Tree at: http://localhost:5000/#/experiments/947996101379484187/runs/68a56157bb7542768120032cc0cde658.
2024/09/20 13:55:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/947996101379484187.


Decision Tree - Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1-score: 1.0000


In [10]:
# Train and log Random Forest
rf_params = {"n_estimators": 100, "max_depth": 5, "min_samples_split": 2}
rf_model = RandomForestClassifier(**rf_params)
train_and_log_model(rf_model, "Random Forest", rf_params)

2024/09/20 13:55:27 INFO mlflow.tracking._tracking_service.client: 🏃 View run Random Forest at: http://localhost:5000/#/experiments/947996101379484187/runs/6b3c9d501ae04827bfe5414a8a5cb4d2.
2024/09/20 13:55:27 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/947996101379484187.


Random Forest - Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1-score: 1.0000
