**Data and Preprocessing Visualizations
Understanding your data is the first step. These visualizations help you see raw images, augmented versions, and the distribution of your classes.

**

In [None]:
# --- 1. Data and Preprocessing Visualizations ---

print("\n--- Displaying Sample Original Images ---")
# Get a batch of images and labels from the training generator
images, labels = next(train_generator)
class_names = list(train_generator.class_indices.keys())

plt.figure(figsize=(12, 12))
for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    # De-normalize image for display if it was preprocessed to [-1, 1] or [0, 1]
    display_image = images[i]
    if display_image.min() < 0: # If scaled to [-1, 1]
        display_image = (display_image * 0.5 + 0.5) # Scale to [0, 1]
    elif display_image.max() > 1: # If not scaled (e.g., from custom_image_preprocessing saving)
        display_image = display_image / 255.0 # Scale to [0, 1]

    plt.imshow(display_image)
    # Get the true label
    true_label_idx = np.argmax(labels[i])
    plt.title(f"Class: {class_names[true_label_idx]}")
    plt.axis("off")
plt.suptitle("Sample Original Images from Training Set", fontsize=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()


print("\n--- Displaying Sample Augmented Images ---")
# To show augmented images, we need to create a temporary generator
# that applies the augmentations but doesn't save to disk.
temp_aug_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.8, 1.2],
    channel_shift_range=0.1,
    fill_mode='nearest',
    # Use the combined preprocessing function if it's defined and you want to see its effect
    # For visualization, sometimes it's better to see the raw augmentation before model-specific preproc.
    # If combined_preprocessing_function expects [0,255] and returns [-1,1], adjust display.
    # For simplicity, let's omit model-specific preproc for this visualization.
    # preprocessing_function=combined_preprocessing_function
)

temp_aug_generator = temp_aug_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='image_path',
    y_col='label',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=9, # Get 9 images for display
    class_mode='categorical',
    shuffle=True
)

images_aug, labels_aug = next(temp_aug_generator)

plt.figure(figsize=(12, 12))
for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    # De-normalize image for display if necessary
    display_image_aug = images_aug[i]
    if display_image_aug.min() < 0: # If scaled to [-1, 1]
        display_image_aug = (display_image_aug * 0.5 + 0.5) # Scale to [0, 1]
    plt.imshow(display_image_aug)
    true_label_idx_aug = np.argmax(labels_aug[i])
    plt.title(f"Augmented: {class_names[true_label_idx_aug]}")
    plt.axis("off")
plt.suptitle("Sample Augmented Images from Training Set", fontsize=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()


print("\n--- Class Distribution Plots ---")
# Plotting class distribution for train, validation, and test sets
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
fig.suptitle('Class Distribution Across Splits', fontsize=16)

# Training set distribution
sns.countplot(y='label', data=train_df, order=train_df['label'].value_counts().index, ax=axes[0], palette='viridis')
axes[0].set_title(f'Training Set ({len(train_df)} images)')
axes[0].set_xlabel('Number of Images')
axes[0].set_ylabel('Class')

# Validation set distribution
sns.countplot(y='label', data=val_df, order=val_df['label'].value_counts().index, ax=axes[1], palette='viridis')
axes[1].set_title(f'Validation Set ({len(val_df)} images)')
axes[1].set_xlabel('Number of Images')
axes[1].set_ylabel('') # Hide y-label for cleaner look

# Test set distribution
sns.countplot(y='label', data=test_df, order=test_df['label'].value_counts().index, ax=axes[2], palette='viridis')
axes[2].set_title(f'Test Set ({len(test_df)} images)')
axes[2].set_xlabel('Number of Images')
axes[2].set_ylabel('') # Hide y-label for cleaner look

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

# Image Dimensions Distribution (if you want to analyze actual image sizes before resizing)
# This requires loading images and checking their shapes, which can be slow for large datasets.
# We'll just print a note here, as the ImageDataGenerator handles resizing.
print("\nNote: Image dimensions are handled by target_size in ImageDataGenerator.")
print(f"All images will be resized to ({IMG_HEIGHT}, {IMG_WIDTH}) for model input.")

**Model Architecture Visualizations
These help you understand the layers and connections within your CNN.**

In [None]:
# --- 2. Model Architecture Visualizations ---

print("\n--- Model Summary ---")
# Prints a text summary of the model's layers, output shapes, and number of parameters.
model.summary()

print("\n--- Plotting Model Graph ---")
# Requires graphviz and pydot to be installed:
# pip install graphviz pydot
# If you encounter errors, ensure graphviz is also installed on your system.
try:
    plot_model(model, to_file='model_architecture.png', show_shapes=True, show_layer_names=True)
    print("Model architecture saved to 'model_architecture.png'.")
    # Display the image in the notebook
    from IPython.display import Image
    display(Image(filename='model_architecture.png'))
except ImportError:
    print("Warning: pydot and/or graphviz not installed. Cannot plot model architecture.")
    print("Please install them: pip install pydot graphviz")
    print("Also ensure graphviz is installed on your system and added to PATH.")
except Exception as e:
    print(f"An error occurred while plotting the model: {e}")

** Feature and Interpretability Visualizations
These visualizations help you understand what features your CNN is learning and which parts of an image are important for its predictions.**

In [None]:
# --- 3. Feature and Interpretability Visualizations ---

print("\n--- Visualizing Activation Maps (Feature Maps) ---")
# This shows the output of intermediate layers when an image is passed through the model.
# It helps understand what features (edges, textures, patterns) are detected at different depths.

# Get a sample image for visualization
sample_image, sample_label = next(test_generator)
sample_image = sample_image[0] # Take the first image from the batch
sample_image_expanded = np.expand_dims(sample_image, axis=0) # Add batch dimension

# Select a few convolutional layers to visualize
# You might need to adjust layer names based on your model's architecture
layer_outputs = [layer.output for layer in model.layers if 'conv2d' in layer.name]
activation_model = Model(inputs=model.input, outputs=layer_outputs)
activations = activation_model.predict(sample_image_expanded)

# De-normalize image for display
display_image_orig = sample_image
if display_image_orig.min() < 0:
    display_image_orig = (display_image_orig * 0.5 + 0.5)

plt.figure(figsize=(15, 8))
plt.subplot(len(activations) + 1, 1, 1)
plt.imshow(display_image_orig)
plt.title(f"Original Image (True Class: {class_names[np.argmax(sample_label[0])]})")
plt.axis('off')

for i, activation in enumerate(activations):
    layer_name = model.layers[i].name
    # Number of features in the feature map
    n_features = activation.shape[-1]
    # The feature map has shape (1, size, size, n_features). We want to plot one channel.
    size = activation.shape[1]

    # Display up to 8 feature maps for each layer (or fewer if n_features is small)
    num_to_display = min(n_features, 8)
    for j in range(num_to_display):
        ax = plt.subplot(len(activations) + 1, num_to_display, num_to_display * (i + 1) + j + 1)
        plt.imshow(activation[0, :, :, j], cmap='viridis') # Use 'viridis' or 'gray'
        plt.title(f"{layer_name}\nChannel {j}")
        plt.axis('off')
plt.suptitle("Activation Maps for Sample Image", fontsize=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()


print("\n--- Visualizing Filters (Weights) of First Conv Layer ---")
# Visualizing the weights of the first convolutional layer can show what basic patterns
# (edges, colors, textures) the network is looking for.

# Find the first convolutional layer
first_conv_layer = None
for layer in model.layers:
    if isinstance(layer, Conv2D):
        first_conv_layer = layer
        break

if first_conv_layer:
    filters, biases = first_conv_layer.get_weights()
    print(f"Shape of filters in '{first_conv_layer.name}': {filters.shape}") # (kernel_size, kernel_size, input_channels, output_channels)

    # Normalize filter values to between 0 and 1 for visualization
    f_min, f_max = filters.min(), filters.max()
    filters = (filters - f_min) / (f_max - f_min)

    # Plot the first few filters
    n_filters = filters.shape[-1]
    # Display up to 16 filters
    num_to_display = min(n_filters, 16)
    rows = int(np.ceil(np.sqrt(num_to_display)))
    cols = int(np.ceil(num_to_display / rows))

    plt.figure(figsize=(cols * 2, rows * 2))
    for i in range(num_to_display):
        ax = plt.subplot(rows, cols, i + 1)
        # Display the filter (assuming 3 input channels, take the first one or average)
        if filters.shape[2] == 3: # If input channels are 3 (RGB)
            plt.imshow(filters[:, :, :, i])
        else: # If input channels are 1 (grayscale) or different, display first channel
            plt.imshow(filters[:, :, 0, i], cmap='gray')
        plt.title(f'Filter {i+1}')
        plt.axis('off')
    plt.suptitle(f"Filters of Layer: {first_conv_layer.name}", fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()
else:
    print("No convolutional layer found in the model to visualize filters.")


print("\n--- Class Activation Maps (Grad-CAM) ---")
# Grad-CAM helps visualize regions in the input image that are important for the model's prediction.
# It requires a target layer (usually the last convolutional layer before classification)
# and a target class for which to generate the heatmap.

def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    """
    Generates a Grad-CAM heatmap for a given image and model.
    Args:
        img_array (np.array): Preprocessed image array (e.g., from test_generator).
        model (tf.keras.Model): The trained Keras model.
        last_conv_layer_name (str): The name of the last convolutional layer in the model.
        pred_index (int, optional): The index of the class for which to generate the heatmap.
                                    If None, the predicted class will be used.
    Returns:
        np.array: The Grad-CAM heatmap.
    """
    # First, we create a model that maps the input image to the activations
    # of the last convolutional layer as well as the output predictions.
    grad_model = Model(
        inputs=model.inputs,
        outputs=[model.get_layer(last_conv_layer_name).output, model.output]
    )

    # Then, we compute the gradient of the top predicted class for our input image
    # with respect to the activations of the last convolutional layer.
    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]

    # This is the gradient of the output neuron (top predicted or chosen)
    # with respect to the output feature map of the last convolutional layer.
    grads = tape.gradient(class_channel, last_conv_layer_output)

    # This is a vector where each entry is the mean intensity of the gradient
    # over a specific feature map channel.
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # We multiply each channel in the feature map array by "how important" this channel is
    # with respect to the top predicted class, then sum all the channels to obtain
    # the heatmap class activation.
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    # For visualization, normalize the heatmap values to between 0 and 1.
    heatmap = tf.maximum(heatmap, 0) / tf.reduce_max(heatmap)
    return heatmap.numpy()

def display_gradcam(img, heatmap, alpha=0.4):
    """
    Overlays the heatmap on the original image.
    Args:
        img (np.array): Original image (denormalized, [0, 1] range).
        heatmap (np.array): Grad-CAM heatmap ([0, 1] range).
        alpha (float): Transparency of the heatmap overlay.
    """
    # Rescale heatmap to a range 0-255
    heatmap = np.uint8(255 * heatmap)

    # Use viridis colormap to colorize the heatmap
    colormap = plt.cm.viridis
    heatmap = colormap(heatmap)[:, :, :3] # Get RGB channels
    heatmap = np.uint8(255 * heatmap)

    # Create a transparent overlay of the heatmap
    heatmap_overlay = cv2.addWeighted(img.astype(np.uint8), alpha, heatmap, 1 - alpha, 0)
    return heatmap_overlay

# Find the last convolutional layer in your model
last_conv_layer = None
for layer in reversed(model.layers):
    if isinstance(layer, Conv2D):
        last_conv_layer = layer
        break

if last_conv_layer:
    last_conv_layer_name = last_conv_layer.name
    print(f"Using last convolutional layer: '{last_conv_layer_name}' for Grad-CAM.")

    # Get a sample image and its true label
    sample_images_batch, sample_labels_batch = next(test_generator)
    sample_image_for_cam = sample_images_batch[0]
    true_label_for_cam_idx = np.argmax(sample_labels_batch[0])
    true_label_for_cam_name = class_names[true_label_for_cam_idx]

    # Predict the class for the sample image
    preds = model.predict(np.expand_dims(sample_image_for_cam, axis=0))
    predicted_class_idx = np.argmax(preds[0])
    predicted_class_name = class_names[predicted_class_idx]
    predicted_confidence = preds[0][predicted_class_idx]

    # Generate Grad-CAM heatmap
    heatmap = make_gradcam_heatmap(
        np.expand_dims(sample_image_for_cam, axis=0),
        model,
        last_conv_layer_name,
        pred_index=predicted_class_idx # Generate heatmap for the predicted class
    )

    # De-normalize image for display
    display_img_cam = sample_image_for_cam
    if display_img_cam.min() < 0:
        display_img_cam = (display_img_cam * 0.5 + 0.5) # Scale to [0, 1] for display
    display_img_cam = np.uint8(255 * display_img_cam) # Convert to [0, 255] uint8

    # Create and display the Grad-CAM overlay
    gradcam_overlay = display_gradcam(display_img_cam, heatmap)

    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.imshow(display_img_cam)
    plt.title(f"Original Image\nTrue: {true_label_for_cam_name}\nPred: {predicted_class_name} ({predicted_confidence:.2f})")
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.imshow(gradcam_overlay)
    plt.title("Grad-CAM Heatmap Overlay")
    plt.axis('off')
    plt.suptitle("Grad-CAM Visualization", fontsize=16)
    plt.tight_layout()
    plt.show()

else:
    print("Could not find a convolutional layer for Grad-CAM visualization.")
    print("Ensure your model has at least one Conv2D layer.")

**Model Performance Visualizations
These plots help you assess how well your model learned and how it performs on unseen data.**

In [None]:
# --- 4. Model Performance Visualizations ---

print("\n--- Training History Plots (Loss & Accuracy) ---")
# Plotting training and validation loss
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history['loss'], label='Training Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

# Plotting training and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(history['accuracy'], label='Training Accuracy')
plt.plot(history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.suptitle("Model Training History", fontsize=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()


print("\n--- Confusion Matrix ---")
# A confusion matrix shows the number of correct and incorrect predictions
# made by the classification model compared to the actual outcomes (true labels).

# Get true labels and predictions from the test set
print("Generating predictions for Confusion Matrix...")
test_labels = test_generator.classes # Get integer labels
y_pred_probs = model.predict(test_generator)
y_pred_classes = np.argmax(y_pred_probs, axis=1)

# Map integer labels back to class names for better readability
true_class_names = [class_names[idx] for idx in test_labels]
predicted_class_names = [class_names[idx] for idx in y_pred_classes]

# Compute confusion matrix
cm = confusion_matrix(true_class_names, predicted_class_names, labels=class_names)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()


print("\n--- Classification Report ---")
# Provides precision, recall, F1-score, and support for each class.
print(classification_report(true_class_names, predicted_class_names, target_names=class_names))


print("\n--- ROC Curve and AUC (One-vs-Rest for Multi-class) ---")
# ROC curve and AUC are useful for evaluating binary classifiers,
# but can be extended to multi-class using one-vs-rest strategy.

# Get true one-hot encoded labels for ROC curve
y_true_one_hot = tf.keras.utils.to_categorical(test_labels, num_classes=len(class_names))

plt.figure(figsize=(10, 8))
for i, class_name in enumerate(class_names):
    fpr, tpr, _ = roc_curve(y_true_one_hot[:, i], y_pred_probs[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'ROC curve of {class_name} (area = {roc_auc:.2f})')

plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve (One-vs-Rest)')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()


print("\n--- Sample Predictions with Confidence ---")
# Display a few images with their true label, predicted label, and confidence.

# Get a fresh batch from the test generator
sample_images_batch_pred, sample_labels_batch_pred = next(test_generator)
sample_predictions = model.predict(sample_images_batch_pred)

plt.figure(figsize=(15, 15))
for i in range(min(9, len(sample_images_batch_pred))): # Display up to 9 images
    ax = plt.subplot(3, 3, i + 1)
    display_image_pred = sample_images_batch_pred[i]
    if display_image_pred.min() < 0:
        display_image_pred = (display_image_pred * 0.5 + 0.5)

    plt.imshow(display_image_pred)
    true_label_idx = np.argmax(sample_labels_batch_pred[i])
    predicted_label_idx = np.argmax(sample_predictions[i])
    confidence = sample_predictions[i][predicted_label_idx]

    true_label_name = class_names[true_label_idx]
    predicted_label_name = class_names[predicted_label_idx]

    color = "green" if true_label_idx == predicted_label_idx else "red"
    plt.title(f"True: {true_label_name}\nPred: {predicted_label_name} ({confidence:.2f})", color=color)
    plt.axis("off")
plt.suptitle("Sample Predictions with Confidence", fontsize=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

**Model Comparison Visualizations
If you train multiple CNN models (e.g., with different architectures, hyperparameters, or datasets), these visualizations help you compare their performance.**

In [None]:
# --- 5. Model Comparison Visualizations ---

print("\n--- Comparing Training Histories of Multiple Models ---")
# To compare models, you need to have their 'history' objects.
# Let's create a dummy history for a second model for demonstration.

# Replace with your actual model histories
history_model1 = history # Assuming 'history' is from your first model
history_model2 = {
    'loss': np.random.rand(len(history_model1['loss'])) * 1.8 + 0.6,
    'accuracy': np.random.rand(len(history_model1['accuracy'])) * 0.15 + 0.65,
    'val_loss': np.random.rand(len(history_model1['val_loss'])) * 1.8 + 0.6,
    'val_accuracy': np.random.rand(len(history_model1['val_accuracy'])) * 0.15 + 0.65
}
# Simulate slightly better performance for model 2
history_model2['loss'] = np.sort(history_model2['loss'])[::-1] * 0.9
history_model2['val_loss'] = np.sort(history_model2['val_loss'])[::-1] * 0.9
history_model2['accuracy'] = np.sort(history_model2['accuracy']) * 1.05
history_model2['val_accuracy'] = np.sort(history_model2['val_accuracy']) * 1.05


model_histories = {
    'Model A': history_model1,
    'Model B': history_model2
}

# Plotting loss comparison
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
for model_name, hist in model_histories.items():
    plt.plot(hist['loss'], label=f'{model_name} Training Loss')
    plt.plot(hist['val_loss'], linestyle='--', label=f'{model_name} Validation Loss')
plt.title('Loss Comparison over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

# Plotting accuracy comparison
plt.subplot(1, 2, 2)
for model_name, hist in model_histories.items():
    plt.plot(hist['accuracy'], label=f'{model_name} Training Accuracy')
    plt.plot(hist['val_accuracy'], linestyle='--', label=f'{model_name} Validation Accuracy')
plt.title('Accuracy Comparison over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.suptitle("Comparison of Model Training Histories", fontsize=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()


print("\n--- Comparing Final Performance Metrics ---")
# Assuming you have final loss and accuracy for each model (e.g., from model.evaluate)
# Replace with your actual evaluation results
final_metrics = {
    'Model A': {'loss': history_model1['val_loss'][-1], 'accuracy': history_model1['val_accuracy'][-1]},
    'Model B': {'loss': history_model2['val_loss'][-1], 'accuracy': history_model2['val_accuracy'][-1]},
}

models = list(final_metrics.keys())
losses = [final_metrics[m]['loss'] for m in models]
accuracies = [final_metrics[m]['accuracy'] for m in models]

x = np.arange(len(models)) # Label locations
width = 0.35 # Width of the bars

fig, ax = plt.subplots(figsize=(8, 6))
rects1 = ax.bar(x - width/2, losses, width, label='Validation Loss', color='skyblue')
rects2 = ax.bar(x + width/2, accuracies, width, label='Validation Accuracy', color='lightcoral')

ax.set_ylabel('Value')
ax.set_title('Final Validation Performance Comparison')
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.legend()
ax.set_ylim(0, max(max(losses), max(accuracies)) * 1.1) # Adjust y-lim dynamically

def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate(f'{height:.2f}',
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3), # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')

autolabel(rects1)
autolabel(rects2)

plt.tight_layout()
plt.show()