#### **Cell 1: Imports**
*Purpose: Import all necessary libraries for data handling, modeling, and experiment tracking.*

In [3]:
# Standard ML libraries for data loading, splitting, and modeling.
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# MLflow is the key library for experiment tracking.
# We import the main library and the specific 'sklearn' flavor for auto-logging.
import mlflow
import mlflow.sklearn

#### **Cell 2: Load and Prepare Data**
*Purpose: Load a sample dataset and split it into training and testing sets, a standard practice in any ML workflow.*

In [4]:
# Load a well-known sample dataset for this demonstration.
iris = load_iris()

# Splitting the data is a critical step to prevent the model from being evaluated
# on the same data it was trained on, which ensures a fair assessment of its performance.
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

print("Data loaded and split successfully.")
print(f"Training set size: {X_train.shape[0]} samples")
print(f"Test set size: {X_test.shape[0]} samples")

Data loaded and split successfully.
Training set size: 120 samples
Test set size: 30 samples


#### **Cell 3: Train Model and Log Experiment with MLflow**
*Purpose: This is the core of the MLOps workflow. We train a model and meticulously log every important piece of information to ensure the experiment is fully reproducible.*

In [5]:
# By wrapping our training run in 'with mlflow.start_run()', we ensure that
# all parameters, metrics, and the model artifact are logged to a single, organized run.
# This is the foundation of experiment tracking.

with mlflow.start_run():
    # --- 1. Log Parameters ---
    # We explicitly log the hyperparameters used for this training run.
    # If we change these later, we can compare runs to see how it affected performance.
    solver = 'liblinear'
    random_state = 42
    mlflow.log_param("solver", solver)
    mlflow.log_param("random_state", random_state)
    
    # --- 2. Train the Model ---
    # Standard scikit-learn model training.
    lr = LogisticRegression(solver=solver, random_state=random_state)
    lr.fit(X_train, y_train)
    
    # --- 3. Evaluate and Log Metrics ---
    # We evaluate the model on the unseen test set.
    y_pred = lr.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    # We log the resulting performance metric. This is our key indicator of model quality.
    mlflow.log_metric("accuracy", accuracy)
    
    # --- 4. Log the Model Artifact ---
    # This is the most crucial step for deployment. MLflow packages the trained model
    # along with its dependencies (e.g., scikit-learn version) into a portable format.
    # This packaged artifact can then be easily deployed as an API without compatibility issues.
    mlflow.sklearn.log_model(lr, "model")
    
    # --- 5. Print Results ---
    print(f"Model Accuracy: {accuracy}")
    print("Run successfully logged to MLflow.")
    run_id = mlflow.active_run().info.run_id
    print(f"MLflow Run ID: {run_id}")



Model Accuracy: 1.0
Run successfully logged to MLflow.
MLflow Run ID: 18a4185da0104d5f97fa36b92d445d8e
