In [1]:
import mlflow 
from sklearn.datasets import load_iris 
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.metrics import accuracy_score 
import os 

In [2]:
mlflow.set_tracking_uri("http://localhost:8090") 

In [3]:
mlflow.set_experiment("Iris-Tracking") 

2024/05/03 17:19:45 INFO mlflow.tracking.fluent: Experiment with name 'Iris-Tracking' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/451129951168216863', creation_time=1714736985543, experiment_id='451129951168216863', last_update_time=1714736985543, lifecycle_stage='active', name='Iris-Tracking', tags={}>

In [4]:
# Load the Dataset 
iris = load_iris()
X, y = iris.data, iris.target

In [7]:
# Set experiment name
experiment_name = "Iris-Tracking"

In [6]:
# Split the data into train and test sets 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
# Start MLflow run
with mlflow.start_run(run_name="Random Forest Experiment"):

    # Log experiment name as metadata
    mlflow.set_tag("experiment_name", experiment_name)
    
    # Define and train the model
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Log model parameters
    mlflow.log_params({
        "n_estimators": 100,
        "random_state": 42
    })
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate and log accuracy
    accuracy = accuracy_score(y_test, y_pred)
    mlflow.log_metric("accuracy", accuracy)
    
    # Log artifact: Save the trained model
    model_path = "random_forest_model"
    os.makedirs(model_path, exist_ok=True)
    model_filename = "model.pkl"
    model_filepath = os.path.join(model_path, model_filename)
    mlflow.sklearn.save_model(model, model_filepath)
    mlflow.log_artifact(model_filepath, artifact_path="models")
