In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the Iris dataset
# The dataset is included in scikit-learn and contains 150 samples of iris flowers.
# There are 4 features (sepal length, sepal width, petal length, petal width) and 3 species (Setosa, Versicolor, Virginica).
iris = load_iris()
X = iris.data
y = iris.target
target_names = iris.target_names

# Display basic information about the dataset
print("--- Dataset Information ---")
print(f"Features (X) shape: {X.shape}")
print(f"Target (y) shape: {y.shape}")
print(f"Target names: {target_names}\n")

# Split the dataset into a training set and a testing set
# This is a standard practice to evaluate the model's performance on unseen data.
# We'll use 80% of the data for training and 20% for testing.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Discriminant Analysis (LDA) model
# LDA is a supervised dimensionality reduction and classification algorithm.
# It finds a linear combination of features that characterizes or separates two or more classes.
lda = LinearDiscriminantAnalysis()

# Fit the LDA model on the training data.
# The model learns the optimal linear discriminants from the training features and labels.
lda.fit(X_train, y_train)

# Use the trained LDA model to make predictions on the test data.
# The model predicts the species of each flower in the test set.
y_pred = lda.predict(X_test)

# Evaluate the model's performance
print("--- Model Performance on Test Data ---")
# The accuracy score shows the percentage of correctly classified instances.
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}\n")

# The classification report provides a detailed breakdown of precision, recall, and F1-score for each class.
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=target_names))

# The confusion matrix visualizes the performance of a classification model.
# Each cell (i, j) represents the number of observations known to be in group i and predicted to be in group j.
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# --- Classifying a New Flower ---
print("\n--- Classifying a New, Unseen Flower ---")

# Define the features of a new, unseen flower.
# Let's create a hypothetical flower with sepal length, sepal width, petal length, and petal width.
# Example 1: Features similar to Iris Setosa
new_flower_features_1 = np.array([[5.1, 3.5, 1.4, 0.2]])
# Use the trained LDA model to predict its species.
predicted_species_index_1 = lda.predict(new_flower_features_1)
predicted_species_name_1 = target_names[predicted_species_index_1[0]]
print(f"New flower features 1: {new_flower_features_1[0]}")
print(f"Predicted species 1: {predicted_species_name_1}\n")

# Example 2: Features similar to Iris Versicolor
new_flower_features_2 = np.array([[6.0, 2.9, 4.5, 1.5]])
# Use the trained LDA model to predict its species.
predicted_species_index_2 = lda.predict(new_flower_features_2)
predicted_species_name_2 = target_names[predicted_species_index_2[0]]
print(f"New flower features 2: {new_flower_features_2[0]}")
print(f"Predicted species 2: {predicted_species_name_2}")

--- Dataset Information ---
Features (X) shape: (150, 4)
Target (y) shape: (150,)
Target names: ['setosa' 'versicolor' 'virginica']

--- Model Performance on Test Data ---
Accuracy: 1.00

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]

--- Classifying a New, Unseen Flower ---
New flower features 1: [5.1 3.5 1.4 0.2]
Predicted species 1: setosa

New flower features 2: [6.  2.9 4.5 1.5]
Predicted species 2: versicolor
