In [None]:
# Step 1: Install Required Packages
!pip install kagglehub

In [None]:
# Step 2: Import Required Libraries
import kagglehub
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import os
import shutil
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
from google.colab import files
import random

In [None]:
# Step 3: Download Dataset from Kaggle
print("Downloading waste classification dataset...")
path = kagglehub.dataset_download("aashidutt3/waste-segregation-image-dataset")
print("Path to dataset files:", path)

# Set paths for the dataset
dataset_base_path = path
train_data_path = os.path.join(dataset_base_path, "Dataset", "train")
test_data_path = os.path.join(dataset_base_path, "Dataset", "val")

print(f"Training data path: {train_data_path}")
print(f"Test data path: {test_data_path}")

In [None]:
# Step 4: Create Dataset Verification Function
def verify_dataset_structure(base_path):
    """Verify and display the dataset structure"""
    print(f"Checking dataset structure at: {base_path}")
    
    if not os.path.exists(base_path):
        print(f"❌ Path does not exist: {base_path}")
        return False
    
    print("📁 Dataset structure:")
    for root, dirs, files in os.walk(base_path):
        level = root.replace(base_path, '').count(os.sep)
        indent = ' ' * 2 * level
        print(f"{indent}{os.path.basename(root)}/")
        subindent = ' ' * 2 * (level + 1)
        for file in files[:3]:  # Show first 3 files
            print(f"{subindent}{file}")
        if len(files) > 3:
            print(f"{subindent}... and {len(files) - 3} more files")
    return True

# Verify the dataset structure
verify_dataset_structure(dataset_base_path)

In [None]:
# Step 5: Organize Dataset for Binary Classification
def organize_binary_dataset_simple():
    """
    Function to organize the nested dataset structure into standard binary classification format.
    The dataset has: train/biodegradable/[subcategories] and train/non_biodegradable/[subcategories]
    """
    # Create organized dataset structure
    organized_path = "/content/organized_dataset"
    
    # Create directories
    for split in ['train', 'validation']:
        for category in ['Biodegradable', 'Non-Biodegradable']:
            os.makedirs(f"{organized_path}/{split}/{category}", exist_ok=True)
    
    print("Organizing dataset...")
    total_moved = 0
    
    # The dataset structure is: Dataset/train/biodegradable/[subcategories] and Dataset/train/non_biodegradable/[subcategories]
    train_source_path = os.path.join(dataset_base_path, "Dataset", "train")
    val_source_path = os.path.join(dataset_base_path, "Dataset", "val")
    
    print(f"Train source path: {train_source_path}")
    print(f"Val source path: {val_source_path}")
    
    # Process both train and validation sets from the original dataset
    source_splits = [
        (train_source_path, "train_source"),
        (val_source_path, "val_source")
    ]
    
    all_biodegradable_images = []
    all_non_biodegradable_images = []
    
    # Collect all images from both train and val sets
    for source_path, split_name in source_splits:
        if os.path.exists(source_path):
            print(f"\nProcessing {split_name}...")
            
            # Process biodegradable subcategories
            bio_path = os.path.join(source_path, "biodegradable")
            if os.path.exists(bio_path):
                for subcat in os.listdir(bio_path):
                    subcat_path = os.path.join(bio_path, subcat)
                    if os.path.isdir(subcat_path):
                        images = [os.path.join(subcat_path, f) for f in os.listdir(subcat_path) 
                                if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
                        all_biodegradable_images.extend(images)
                        print(f"  Found {len(images)} images in biodegradable/{subcat}")
            
            # Process non-biodegradable subcategories
            non_bio_path = os.path.join(source_path, "non_biodegradable")
            if os.path.exists(non_bio_path):
                for subcat in os.listdir(non_bio_path):
                    subcat_path = os.path.join(non_bio_path, subcat)
                    if os.path.isdir(subcat_path):
                        images = [os.path.join(subcat_path, f) for f in os.listdir(subcat_path) 
                                if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
                        all_non_biodegradable_images.extend(images)
                        print(f"  Found {len(images)} images in non_biodegradable/{subcat}")
    
    print(f"\nTotal collected:")
    print(f"  Biodegradable: {len(all_biodegradable_images)} images")
    print(f"  Non-Biodegradable: {len(all_non_biodegradable_images)} images")
    
    # Shuffle and split the combined data
    random.shuffle(all_biodegradable_images)
    random.shuffle(all_non_biodegradable_images)
    
    # Function to copy images to organized structure
    def copy_images_to_split(image_list, target_class, split_ratio=0.8):
        split_idx = int(split_ratio * len(image_list))
        train_images = image_list[:split_idx]
        val_images = image_list[split_idx:]
        
        copied_count = 0
        
        # Copy training images
        for i, src_path in enumerate(train_images):
            if os.path.exists(src_path):
                # Create unique filename to avoid conflicts
                filename = f"{target_class.lower()}_{i:06d}_{os.path.basename(src_path)}"
                dst_path = os.path.join(organized_path, 'train', target_class, filename)
                shutil.copy2(src_path, dst_path)
                copied_count += 1
        
        # Copy validation images
        for i, src_path in enumerate(val_images):
            if os.path.exists(src_path):
                # Create unique filename to avoid conflicts
                filename = f"{target_class.lower()}_val_{i:06d}_{os.path.basename(src_path)}"
                dst_path = os.path.join(organized_path, 'validation', target_class, filename)
                shutil.copy2(src_path, dst_path)
                copied_count += 1
        
        return len(train_images), len(val_images), copied_count
    
    # Copy biodegradable images
    bio_train, bio_val, bio_copied = copy_images_to_split(all_biodegradable_images, "Biodegradable")
    print(f"✅ Biodegradable: {bio_train} train, {bio_val} validation ({bio_copied} copied)")
    
    # Copy non-biodegradable images
    non_bio_train, non_bio_val, non_bio_copied = copy_images_to_split(all_non_biodegradable_images, "Non-Biodegradable")
    print(f"✅ Non-Biodegradable: {non_bio_train} train, {non_bio_val} validation ({non_bio_copied} copied)")
    
    total_moved = bio_copied + non_bio_copied
    print(f"\nTotal images organized: {total_moved}")
    return organized_path

# Execute the organization
organized_path = organize_binary_dataset_simple()

In [None]:
# Step 6: Verify Organized Dataset
def verify_organized_dataset(organized_path):
    """Verify the organized dataset structure and count images"""
    print(f"Verifying organized dataset at: {organized_path}")
    
    for split in ['train', 'validation']:
        print(f"\n📊 {split.upper()} SET:")
        split_path = os.path.join(organized_path, split)
        total_images = 0
        
        for category in ['Biodegradable', 'Non-Biodegradable']:
            category_path = os.path.join(split_path, category)
            if os.path.exists(category_path):
                count = len([f for f in os.listdir(category_path) 
                           if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
                print(f"   {category}: {count:,} images")
                total_images += count
            else:
                print(f"   ❌ {category}: Directory not found")
        
        print(f"   Total {split}: {total_images:,} images")
    
    return True

# Verify the organized dataset
verify_organized_dataset(organized_path)

In [None]:
# Step 7: Set Up Data Preprocessing and Augmentation
# Define image parameters
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32

# Create data generators with augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2,
    shear_range=0.2,
    fill_mode='nearest'
)

# Validation data should only be rescaled (no augmentation)
validation_datagen = ImageDataGenerator(rescale=1./255)

# Create data generators
train_generator = train_datagen.flow_from_directory(
    f'{organized_path}/train',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

validation_generator = validation_datagen.flow_from_directory(
    f'{organized_path}/validation',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

print("Data generators created successfully!")
print(f"Training samples: {train_generator.samples}")
print(f"Validation samples: {validation_generator.samples}")
print(f"Class indices: {train_generator.class_indices}")

In [None]:
# Step 6.5: Configure PIL for Better Error Handling
from PIL import ImageFile
import warnings

# Allow loading of truncated images (this helps with some corrupted images)
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Suppress warnings for palette images
warnings.filterwarnings('ignore', category=UserWarning, module='PIL')

print("✅ PIL configured for robust image loading")
print("   - Truncated images will be loaded when possible")
print("   - Palette transparency warnings suppressed")

In [None]:
# Step 5.5: Validate and Clean Dataset
def validate_and_clean_images(organized_path):
    """
    Validate all images in the organized dataset and remove corrupted ones
    """
    from PIL import Image
    import gc
    
    print("Validating and cleaning dataset...")
    total_removed = 0
    
    for split in ['train', 'validation']:
        for category in ['Biodegradable', 'Non-Biodegradable']:
            category_path = os.path.join(organized_path, split, category)
            
            if os.path.exists(category_path):
                print(f"\nValidating {split}/{category}...")
                image_files = [f for f in os.listdir(category_path) 
                             if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
                
                corrupted_files = []
                
                for i, filename in enumerate(image_files):
                    if i % 500 == 0:  # Progress update every 500 files
                        print(f"  Processed {i}/{len(image_files)} images...")
                    
                    filepath = os.path.join(category_path, filename)
                    
                    try:
                        # Try to open and verify the image
                        with Image.open(filepath) as img:
                            # Try to load the image data
                            img.load()
                            # Verify image has valid dimensions
                            if img.size[0] < 10 or img.size[1] < 10:
                                corrupted_files.append(filepath)
                                continue
                            # Try to convert to RGB (this will catch some corruption)
                            img.convert('RGB')
                    
                    except Exception as e:
                        print(f"    Corrupted image found: {filename} - {str(e)}")
                        corrupted_files.append(filepath)
                
                # Remove corrupted files
                for filepath in corrupted_files:
                    try:
                        os.remove(filepath)
                        total_removed += 1
                    except Exception as e:
                        print(f"    Error removing {filepath}: {str(e)}")
                
                remaining_count = len(image_files) - len(corrupted_files)
                print(f"  ✅ {category}: {remaining_count} valid images (removed {len(corrupted_files)} corrupted)")
                
                # Force garbage collection
                gc.collect()
    
    print(f"\n🧹 Dataset cleaning completed!")
    print(f"   Total corrupted images removed: {total_removed}")
    return total_removed

# Clean the dataset
removed_count = validate_and_clean_images(organized_path)

In [None]:
# Step 8: Create CNN Model Architecture
def create_cnn_model():
    """Create a CNN model for binary classification"""
    model = keras.Sequential([
        # First Conv Block
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
        layers.BatchNormalization(),
        layers.MaxPooling2D(2, 2),
        
        # Second Conv Block
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(2, 2),
        
        # Third Conv Block
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(2, 2),
        
        # Fourth Conv Block
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(2, 2),
        
        # Flatten and Dense layers
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(512, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(1, activation='sigmoid')  # Binary classification
    ])
    
    return model

# Create and compile the model
model = create_cnn_model()

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print("Model created and compiled successfully!")
model.summary()

In [None]:
# Step 9: Set Up Training Callbacks
callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=5,
        min_lr=0.0001,
        verbose=1
    )
]

In [None]:
# Step 10: Train the Model
print("Starting model training...")
# Training strategy: Choose epochs based on your time and accuracy requirements
# Set EPOCHS based on your needs:
# - 5 epochs: Quick test (~3-5 min, 65-75% accuracy)
# - 10 epochs: Fast training (~5-10 min, 75-80% accuracy)
# - 20 epochs: Balanced approach (~15-20 min, 80-85% accuracy) 
# - 30 epochs: Good results (~25-30 min, 85-90% accuracy)
# - 50 epochs: Better results (~40-50 min, 90-92% accuracy)
# - 100 epochs: Maximum results (~80-100 min, 92-95% accuracy)
EPOCHS = 30  # Increased for better accuracy - adjust as needed

# Calculate steps per epoch for better time estimation
steps_per_epoch = train_generator.samples // BATCH_SIZE
validation_steps = validation_generator.samples // BATCH_SIZE

# Add safety checks
if steps_per_epoch == 0:
    steps_per_epoch = 1
if validation_steps == 0:
    validation_steps = 1

print(f"Training samples: {train_generator.samples}")
print(f"Validation samples: {validation_generator.samples}")
print(f"Steps per epoch: {steps_per_epoch}")
print(f"Validation steps: {validation_steps}")
print(f"Estimated training time: ~{EPOCHS * steps_per_epoch * 2 / 60:.1f} minutes")
print(f"Note: EarlyStopping may stop training before {EPOCHS} epochs if model stops improving")
print("EarlyStopping settings: patience=10 (stops if no improvement for 10 epochs)")
print(f"💡 Tip: You can change EPOCHS to any value (10, 20, 30, 50, 100+) based on your needs")

try:
    history = model.fit(
        train_generator,
        steps_per_epoch=steps_per_epoch,
        epochs=EPOCHS,
        validation_data=validation_generator,
        validation_steps=validation_steps,
        callbacks=callbacks,
        verbose=1
    )
    print("\n✅ Training completed successfully!")
    
except Exception as e:
    print(f"\n❌ Training failed with error: {str(e)}")
    print("\n🔧 Troubleshooting suggestions:")
    print("1. Run the dataset cleaning cell again")
    print("2. Check if there are any remaining corrupted images")
    print("3. Try reducing batch size or image resolution")
    print("4. Restart the runtime and try again")
    raise e

In [None]:
# Step 11: Evaluate the Model
print("\nEvaluating model on validation data...")
val_loss, val_accuracy = model.evaluate(validation_generator, verbose=0)
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")

In [None]:
# Step 12: Plot Training History
def plot_training_history(history):
    """Plot training and validation accuracy and loss"""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    # Plot accuracy
    ax1.plot(history.history['accuracy'], label='Training Accuracy')
    ax1.plot(history.history['val_accuracy'], label='Validation Accuracy')
    ax1.set_title('Model Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    ax1.grid(True)
    
    # Plot loss
    ax2.plot(history.history['loss'], label='Training Loss')
    ax2.plot(history.history['val_loss'], label='Validation Loss')
    ax2.set_title('Model Loss')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    ax2.grid(True)
    
    plt.tight_layout()
    plt.show()

plot_training_history(history)

In [None]:
# Step 13: Generate Predictions and Confusion Matrix
print("\nGenerating predictions for confusion matrix...")
validation_generator.reset()
predictions = model.predict(validation_generator, verbose=1)
predicted_classes = (predictions > 0.5).astype(int).flatten()
true_classes = validation_generator.classes

# Create confusion matrix
cm = confusion_matrix(true_classes, predicted_classes)
class_names = list(validation_generator.class_indices.keys())

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Print classification report
print("\nClassification Report:")
print(classification_report(true_classes, predicted_classes, target_names=class_names))

In [None]:
# Step 14: Save the Model
model_save_path = "/content/waste_model_bio.h5"
model.save(model_save_path)
print(f"\nModel saved as: {model_save_path}")

In [None]:
# Step 15: Create a Test Function
def test_single_prediction(model, image_path):
    """Test model on a single image"""
    from tensorflow.keras.preprocessing import image
    
    img = image.load_img(image_path, target_size=(IMG_HEIGHT, IMG_WIDTH))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array /= 255.0
    
    prediction = model.predict(img_array)[0][0]
    class_names = ['Biodegradable', 'Non-Biodegradable']
    predicted_class = class_names[int(prediction > 0.5)]
    confidence = prediction if prediction > 0.5 else 1 - prediction
    
    print(f"Prediction: {predicted_class}")
    print(f"Confidence: {confidence:.2%}")
    
    return predicted_class, confidence

In [None]:
# Step 16: Download Model and Create Sample Images
# Import files module explicitly to avoid conflicts
from google.colab import files as colab_files

# Download the model to local system
print("\nDownloading model to local system...")
colab_files.download(model_save_path)

# Create sample predictions for demo
print("\nCreating sample images for demo...")
sample_images_path = "/content/sample_images"
os.makedirs(sample_images_path, exist_ok=True)

# Copy sample images from validation set
import random

# Use the new class names
for class_name in ['Biodegradable', 'Non-Biodegradable']:
    class_path = f"{organized_path}/validation/{class_name}"
    if os.path.exists(class_path):
        image_files = os.listdir(class_path)  # Use different variable name to avoid conflict
        sample_file = random.choice(image_files)
        src = os.path.join(class_path, sample_file)
        dst = os.path.join(sample_images_path, f"sample_{class_name.lower().replace('-', '_')}.jpg")
        shutil.copy2(src, dst)
        
        # Test prediction on sample
        print(f"\nTesting on sample {class_name} image:")
        test_single_prediction(model, dst)

# Create a zip file with sample images
shutil.make_archive("/content/sample_images", 'zip', sample_images_path)
colab_files.download("/content/sample_images.zip")

print("\n" + "="*60)
print("🎉 TRAINING COMPLETED SUCCESSFULLY! 🎉")
print("="*60)

# Final performance summary
print(f"📊 FINAL MODEL PERFORMANCE:")
print(f"   • Validation Accuracy: {val_accuracy:.1%}")
print(f"   • Validation Loss: {val_loss:.4f}")
print(f"   • Training Epochs: {len(history.history['accuracy'])}")
print(f"   • Dataset Size: {train_generator.samples + validation_generator.samples:,} images")
print(f"   • Model Size: ~{os.path.getsize(model_save_path) / (1024*1024):.1f}MB")

print(f"\n📦 FILES DOWNLOADED:")
print(f"   1. 🤖 waste_model_bio.h5 - The trained CNN model")
print(f"   2. 🖼️ sample_images.zip - Demo images for testing")

print(f"\n🚀 NEXT STEPS FOR DEPLOYMENT:")
print(f"   1. Convert model to TensorFlow.js: tensorflowjs_converter")
print(f"   2. Integrate into your portfolio website")
print(f"   3. Use sample images for live demo")
print(f"   4. Deploy to web hosting platform")

print(f"\n💼 PORTFOLIO HIGHLIGHTS:")
print(f"   • End-to-end ML pipeline development")
print(f"   • Computer vision and CNN architecture")
print(f"   • Data preprocessing and augmentation")
print(f"   • Model optimization and deployment")
print("="*60)

In [None]:
# Step 17: Comprehensive Model Analysis and Review
def analyze_trained_model(model_path):
    """
    Comprehensive analysis of the trained .h5 model file
    """
    print("🔍 COMPREHENSIVE MODEL ANALYSIS")
    print("="*60)
    
    try:
        # Load the saved model
        loaded_model = keras.models.load_model(model_path)
        print("✅ Model loaded successfully!")
        
        # 1. Basic Model Information
        print(f"\n📊 BASIC MODEL INFORMATION:")
        print(f"   • Model Type: {type(loaded_model).__name__}")
        print(f"   • Input Shape: {loaded_model.input_shape}")
        print(f"   • Output Shape: {loaded_model.output_shape}")
        print(f"   • Total Layers: {len(loaded_model.layers)}")
        
        # 2. Model Architecture Summary
        print(f"\n🏗️ MODEL ARCHITECTURE:")
        loaded_model.summary()
        
        # 3. Model Size Analysis
        import os
        model_size_mb = os.path.getsize(model_path) / (1024 * 1024)
        print(f"\n💾 MODEL SIZE ANALYSIS:")
        print(f"   • File Size: {model_size_mb:.2f} MB")
        
        # Count parameters
        total_params = loaded_model.count_params()
        trainable_params = sum([tf.keras.backend.count_params(w) for w in loaded_model.trainable_weights])
        non_trainable_params = total_params - trainable_params
        
        print(f"   • Total Parameters: {total_params:,}")
        print(f"   • Trainable Parameters: {trainable_params:,}")
        print(f"   • Non-trainable Parameters: {non_trainable_params:,}")
        print(f"   • Memory Usage (approx): {total_params * 4 / (1024*1024):.2f} MB")
        
        # 4. Layer Analysis
        print(f"\n🔬 DETAILED LAYER ANALYSIS:")
        conv_layers = 0
        dense_layers = 0
        dropout_layers = 0
        batch_norm_layers = 0
        
        for i, layer in enumerate(loaded_model.layers):
            layer_type = type(layer).__name__
            if 'Conv' in layer_type:
                conv_layers += 1
            elif 'Dense' in layer_type:
                dense_layers += 1
            elif 'Dropout' in layer_type:
                dropout_layers += 1
            elif 'BatchNormalization' in layer_type:
                batch_norm_layers += 1
            
            # Print first few and last few layers
            if i < 5 or i >= len(loaded_model.layers) - 3:
                output_shape = str(layer.output_shape) if hasattr(layer, 'output_shape') else 'N/A'
                print(f"   Layer {i+1}: {layer_type} - {output_shape}")
            elif i == 5:
                print("   ... (middle layers omitted)")
        
        print(f"\n📈 LAYER STATISTICS:")
        print(f"   • Convolutional Layers: {conv_layers}")
        print(f"   • Dense Layers: {dense_layers}")
        print(f"   • Dropout Layers: {dropout_layers}")
        print(f"   • Batch Normalization Layers: {batch_norm_layers}")
        
        # 5. Model Compilation Information
        print(f"\n⚙️ COMPILATION SETTINGS:")
        print(f"   • Optimizer: {loaded_model.optimizer.__class__.__name__}")
        print(f"   • Loss Function: {loaded_model.loss}")
        print(f"   • Metrics: {loaded_model.metrics_names}")
        
        # 6. Test Model Prediction Function
        print(f"\n🧪 MODEL TESTING:")
        
        def test_model_prediction():
            # Create a dummy input to test the model
            dummy_input = np.random.rand(1, IMG_HEIGHT, IMG_WIDTH, 3)
            prediction = loaded_model.predict(dummy_input, verbose=0)
            return prediction[0][0]
        
        test_pred = test_model_prediction()
        predicted_class = "Non-Biodegradable" if test_pred > 0.5 else "Biodegradable"
        confidence = test_pred if test_pred > 0.5 else 1 - test_pred
        
        print(f"   ✅ Model prediction test successful!")
        print(f"   📊 Test prediction: {predicted_class} ({confidence:.1%} confidence)")
        
        # 7. Model Validation
        print(f"\n✅ MODEL VALIDATION SUMMARY:")
        print(f"   • Model loads correctly: ✅")
        print(f"   • Architecture is intact: ✅") 
        print(f"   • Can make predictions: ✅")
        print(f"   • Binary classification setup: ✅")
        print(f"   • Ready for deployment: ✅")
        
        # 8. Performance Summary from Training
        if 'val_accuracy' in locals():
            print(f"\n🏆 FINAL PERFORMANCE:")
            print(f"   • Validation Accuracy: {val_accuracy:.1%}")
            print(f"   • Validation Loss: {val_loss:.4f}")
            print(f"   • Model Quality: {'🌟 EXCELLENT' if val_accuracy > 0.9 else '🥇 GOOD' if val_accuracy > 0.8 else '🥈 FAIR'}")
        
        return loaded_model
        
    except Exception as e:
        print(f"❌ Error analyzing model: {str(e)}")
        return None

# Analyze the trained model
print("Starting comprehensive model analysis...")
analyzed_model = analyze_trained_model(model_save_path)

In [None]:
# Step 18: Visual Model Inspection and Performance Visualization
def create_model_visualization(model, history=None):
    """
    Create comprehensive visual analysis of the model
    """
    print("📊 CREATING MODEL VISUALIZATIONS")
    print("="*50)
    
    # 1. Model Architecture Visualization
    try:
        print("🎨 Generating model architecture plot...")
        tf.keras.utils.plot_model(
            model, 
            to_file='model_architecture.png',
            show_shapes=True,
            show_layer_names=True,
            rankdir='TB',
            expand_nested=False,
            dpi=96
        )
        print("✅ Model architecture saved as 'model_architecture.png'")
    except Exception as e:
        print(f"⚠️ Could not create architecture plot: {e}")
    
    # 2. Model Weights Analysis
    print(f"\n⚖️ WEIGHTS ANALYSIS:")
    layer_weights_info = []
    
    for i, layer in enumerate(model.layers):
        if layer.weights:
            weights = layer.get_weights()
            if weights:
                weight_stats = {
                    'layer_name': layer.name,
                    'layer_type': type(layer).__name__,
                    'weight_shapes': [w.shape for w in weights],
                    'weight_means': [np.mean(w) for w in weights],
                    'weight_stds': [np.std(w) for w in weights],
                    'weight_mins': [np.min(w) for w in weights],
                    'weight_maxs': [np.max(w) for w in weights]
                }
                layer_weights_info.append(weight_stats)
                
                print(f"   Layer {i+1} ({layer.name}):")
                print(f"      Type: {weight_stats['layer_type']}")
                print(f"      Shapes: {weight_stats['weight_shapes']}")
                print(f"      Mean weights: {[f'{m:.4f}' for m in weight_stats['weight_means']]}")
                print(f"      Std weights: {[f'{s:.4f}' for s in weight_stats['weight_stds']]}")
    
    # 3. Performance Metrics Visualization
    if history is not None:
        print(f"\n📈 CREATING PERFORMANCE PLOTS...")
        
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        
        # Accuracy plot
        axes[0,0].plot(history.history['accuracy'], label='Training Accuracy', linewidth=2)
        axes[0,0].plot(history.history['val_accuracy'], label='Validation Accuracy', linewidth=2)
        axes[0,0].set_title('Model Accuracy Over Time', fontsize=14, fontweight='bold')
        axes[0,0].set_xlabel('Epoch')
        axes[0,0].set_ylabel('Accuracy')
        axes[0,0].legend()
        axes[0,0].grid(True, alpha=0.3)
        
        # Loss plot
        axes[0,1].plot(history.history['loss'], label='Training Loss', linewidth=2)
        axes[0,1].plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
        axes[0,1].set_title('Model Loss Over Time', fontsize=14, fontweight='bold')
        axes[0,1].set_xlabel('Epoch')
        axes[0,1].set_ylabel('Loss')
        axes[0,1].legend()
        axes[0,1].grid(True, alpha=0.3)
        
        # Learning rate plot (if available)
        if 'learning_rate' in history.history:
            axes[1,0].plot(history.history['learning_rate'], linewidth=2, color='orange')
            axes[1,0].set_title('Learning Rate Schedule', fontsize=14, fontweight='bold')
            axes[1,0].set_xlabel('Epoch')
            axes[1,0].set_ylabel('Learning Rate')
            axes[1,0].grid(True, alpha=0.3)
        else:
            axes[1,0].text(0.5, 0.5, 'Learning Rate\nData Not Available', 
                          ha='center', va='center', transform=axes[1,0].transAxes)
        
        # Final metrics summary
        final_train_acc = history.history['accuracy'][-1]
        final_val_acc = history.history['val_accuracy'][-1]
        final_train_loss = history.history['loss'][-1]
        final_val_loss = history.history['val_loss'][-1]
        
        metrics_text = f"""FINAL METRICS
        
Training Accuracy: {final_train_acc:.1%}
Validation Accuracy: {final_val_acc:.1%}
Training Loss: {final_train_loss:.4f}
Validation Loss: {final_val_loss:.4f}

Epochs Trained: {len(history.history['accuracy'])}
Overfitting Gap: {abs(final_train_acc - final_val_acc):.1%}"""
        
        axes[1,1].text(0.1, 0.5, metrics_text, transform=axes[1,1].transAxes, 
                      fontsize=12, verticalalignment='center',
                      bbox=dict(boxstyle="round,pad=0.5", facecolor="lightblue", alpha=0.8))
        axes[1,1].set_title('Training Summary', fontsize=14, fontweight='bold')
        axes[1,1].axis('off')
        
        plt.tight_layout()
        plt.show()
        
        print("✅ Performance visualization complete!")
    else:
        print("⚠️ No training history available for performance plots")
    
    return layer_weights_info

# 4. Model Comparison and Benchmarking
def benchmark_model_performance(model):
    """
    Benchmark the model against common standards
    """
    print(f"\n🏁 MODEL BENCHMARKING:")
    print("="*40)
    
    # Size benchmarking
    model_size_mb = os.path.getsize(model_save_path) / (1024 * 1024)
    print(f"📏 SIZE ANALYSIS:")
    if model_size_mb < 10:
        print(f"   • Size: {model_size_mb:.1f}MB - 🟢 Excellent (Mobile-friendly)")
    elif model_size_mb < 50:
        print(f"   • Size: {model_size_mb:.1f}MB - 🟡 Good (Web-friendly)")
    else:
        print(f"   • Size: {model_size_mb:.1f}MB - 🟠 Large (Consider optimization)")
    
    # Parameter benchmarking
    total_params = model.count_params()
    print(f"\n🔢 PARAMETER ANALYSIS:")
    if total_params < 1_000_000:
        print(f"   • Parameters: {total_params:,} - 🟢 Lightweight")
    elif total_params < 10_000_000:
        print(f"   • Parameters: {total_params:,} - 🟡 Medium")
    else:
        print(f"   • Parameters: {total_params:,} - 🟠 Heavy")
    
    # Architecture benchmarking
    print(f"\n🏗️ ARCHITECTURE ASSESSMENT:")
    conv_layers = sum(1 for layer in model.layers if 'conv' in layer.name.lower())
    dense_layers = sum(1 for layer in model.layers if 'dense' in layer.name.lower())
    dropout_layers = sum(1 for layer in model.layers if 'dropout' in layer.name.lower())
    
    print(f"   • Conv Layers: {conv_layers} - {'🟢 Good depth' if conv_layers >= 4 else '🟡 Light'}")
    print(f"   • Dense Layers: {dense_layers} - {'🟢 Good' if dense_layers >= 2 else '🟡 Simple'}")
    print(f"   • Regularization: {'🟢 Well regularized' if dropout_layers >= 2 else '🟡 Basic'}")
    
    # Overall rating
    if 'val_accuracy' in locals() and val_accuracy > 0.95:
        rating = "🌟 OUTSTANDING"
    elif 'val_accuracy' in locals() and val_accuracy > 0.90:
        rating = "🥇 EXCELLENT"
    elif 'val_accuracy' in locals() and val_accuracy > 0.85:
        rating = "🥈 VERY GOOD"
    else:
        rating = "🥉 GOOD"
    
    print(f"\n🏆 OVERALL MODEL RATING: {rating}")
    
    return {
        'size_mb': model_size_mb,
        'total_params': total_params,
        'conv_layers': conv_layers,
        'dense_layers': dense_layers,
        'dropout_layers': dropout_layers
    }

# Run comprehensive model analysis
if 'analyzed_model' in locals() and analyzed_model is not None:
    print("\n" + "="*70)
    weights_info = create_model_visualization(analyzed_model, history if 'history' in locals() else None)
    benchmark_results = benchmark_model_performance(analyzed_model)
    print("="*70)
else:
    print("⚠️ Please run the model analysis cell first!")