Set up logging and load the Iris dataset

In [1]:
import logging
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Configure logging
logging.basicConfig(
    level=logging.INFO,  # Set the logging level to INFO
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',  # Add timestamp, logger name, level and message
    handlers=[logging.StreamHandler(), logging.FileHandler('ml_pipeline_iris.log')]  # Log to both console and file
)

# Create a custom logger
logger = logging.getLogger('ml_pipeline_iris')

# Load the Iris dataset
data = load_iris()
X = data.data
y = data.target
logger.info('Iris dataset loaded successfully.')


Split the dataset into training and testing sets and log their shapes

In [2]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Log the shapes of training and test sets
logger.info(f"Training data shape: {X_train.shape}")
logger.info(f"Test data shape: {X_test.shape}")


Train the RandomForestClassifier and log the process

In [3]:
# Create a RandomForestClassifier model
model = RandomForestClassifier()

# Log the start of training
logger.info("Training started...")

# Train the model
model.fit(X_train, y_train)

# Log the end of training
logger.info("Training ended.")


Make predictions and log the accuracy

In [4]:
# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Log the accuracy
logger.info(f"Model Accuracy: {accuracy * 100:.2f}%")


Demonstrate logging exceptions

In [5]:
try:
    # Intentional division by zero for exception logging
    result = 10 / 0
except ZeroDivisionError as e:
    logger.error(f"ZeroDivisionError: {e}", exc_info=True)

try:
    # Intentional invalid integer conversion for exception logging
    number = int("invalid_string")
except ValueError as e:
    logger.error(f"ValueError: {e}", exc_info=True)


ERROR:ml_pipeline_iris:ZeroDivisionError: division by zero
Traceback (most recent call last):
  File "/tmp/ipython-input-1719223514.py", line 3, in <cell line: 0>
    result = 10 / 0
             ~~~^~~
ZeroDivisionError: division by zero
ERROR:ml_pipeline_iris:ValueError: invalid literal for int() with base 10: 'invalid_string'
Traceback (most recent call last):
  File "/tmp/ipython-input-1719223514.py", line 9, in <cell line: 0>
    number = int("invalid_string")
             ^^^^^^^^^^^^^^^^^^^^^
ValueError: invalid literal for int() with base 10: 'invalid_string'


Run and verify logging output

In [6]:
import logging
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler(), logging.FileHandler('ml_pipeline_iris.log')]
)

# Create a custom logger
logger = logging.getLogger('ml_pipeline_iris')

# Load the Iris dataset
data = load_iris()
X = data.data
y = data.target
logger.info('Iris dataset loaded successfully.')

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
logger.info(f"Training data shape: {X_train.shape}")
logger.info(f"Test data shape: {X_test.shape}")

# Create and train the RandomForestClassifier
model = RandomForestClassifier()
logger.info("Training started...")
model.fit(X_train, y_train)
logger.info("Training ended.")

# Make predictions and calculate accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
logger.info(f"Model Accuracy: {accuracy * 100:.2f}%")

# Log exceptions with traceback
try:
    result = 10 / 0
except ZeroDivisionError as e:
    logger.error(f"ZeroDivisionError: {e}", exc_info=True)

try:
    number = int("invalid_string")
except ValueError as e:
    logger.error(f"ValueError: {e}", exc_info=True)


ERROR:ml_pipeline_iris:ZeroDivisionError: division by zero
Traceback (most recent call last):
  File "/tmp/ipython-input-4089047115.py", line 41, in <cell line: 0>
    result = 10 / 0
             ~~~^~~
ZeroDivisionError: division by zero
ERROR:ml_pipeline_iris:ValueError: invalid literal for int() with base 10: 'invalid_string'
Traceback (most recent call last):
  File "/tmp/ipython-input-4089047115.py", line 46, in <cell line: 0>
    number = int("invalid_string")
             ^^^^^^^^^^^^^^^^^^^^^
ValueError: invalid literal for int() with base 10: 'invalid_string'
