In [1]:
# Import necessary modules from Scikit-learn for loading data, splitting data,
# scaling features, building a classifier, and evaluating performance.
from sklearn import datasets                     # Provides access to several machine learning datasets including Iris.
from sklearn.model_selection import train_test_split  # Splits the data into training and testing sets.
from sklearn.preprocessing import StandardScaler   # Standardizes features by removing the mean and scaling to unit variance.
from sklearn.linear_model import LogisticRegression  # Implements logistic regression for classification.
from sklearn.metrics import accuracy_score         # Computes the accuracy of the classifier’s predictions.

# Load the Iris dataset.
# datasets.load_iris() returns a dictionary-like object with keys such as 'data' and 'target'.
# - 'data' contains the feature measurements (sepal length, sepal width, petal length, petal width).
# - 'target' contains the target class labels (0, 1, 2) corresponding to three types of iris flowers.
iris = datasets.load_iris()

# Separate the feature data (X) and the target labels (y).
X = iris.data      # X is a 2D array of shape (150, 4) where each row represents an iris flower's measurements.
y = iris.target    # y is an array of shape (150,) with values 0, 1, or 2.

# Split the dataset into training and testing sets.
# - test_size=0.3 indicates that 30% of the data will be used for testing, and 70% for training.
# - random_state=42 ensures reproducibility of the split by fixing the random seed.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features.
# Standardizing is important because it puts all features on a common scale; this is especially beneficial for many machine learning algorithms.
scaler = StandardScaler()         # Instantiate a StandardScaler object.
X_train = scaler.fit_transform(X_train)  # Fit the scaler on the training data and transform it.
X_test = scaler.transform(X_test)        # Transform the test data using the same parameters (mean and standard deviation) from the training data.

# Instantiate the Classification Model.
# Here we use Logistic Regression, a simple yet effective linear classifier.
# - max_iter=200 ensures that the optimizer has enough iterations to converge.
clf = LogisticRegression(max_iter=200)

# Train the Model.
# The fit() method adjusts the model parameters using the standardized training data and corresponding labels.
clf.fit(X_train, y_train)

# Use the trained model to make predictions on the test set.
y_pred = clf.predict(X_test)

# Evaluate the Model.
# accuracy_score() compares the true labels (y_test) with the predicted labels (y_pred) to compute the fraction of correct predictions.
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0
