# üåç AI Waste Classification System
## Exploratory Data Analysis & Model Training

**Workshop**: Green Skilling & AI for Sustainability

**Objective**: Build a CNN model to classify waste into Organic and Recyclable categories

---

## üìö 1. Import Libraries

In [None]:
# Deep Learning & Computer Vision
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications import MobileNetV2

# Data Processing
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

# Utilities
from pathlib import Path
import os
from sklearn.metrics import classification_report, confusion_matrix

# Settings
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
np.random.seed(42)
tf.random.set_seed(42)

print(f"‚úì TensorFlow Version: {tf.__version__}")
print(f"‚úì GPU Available: {tf.config.list_physical_devices('GPU')}")

## üìä 2. Exploratory Data Analysis

In [None]:
# Dataset path
DATA_DIR = 'data/DATASET'

# Count images in each class
data_path = Path(DATA_DIR)
class_counts = {}

for class_dir in data_path.iterdir():
    if class_dir.is_dir():
        images = list(class_dir.glob('*.jpg')) + list(class_dir.glob('*.png'))
        class_name = 'Organic' if class_dir.name == 'O' else 'Recyclable'
        class_counts[class_name] = len(images)

print("Dataset Statistics:")
print("=" * 50)
for class_name, count in class_counts.items():
    print(f"{class_name:15s}: {count:5d} images")
print(f"{'Total':15s}: {sum(class_counts.values()):5d} images")
print("=" * 50)

In [None]:
# Visualize class distribution
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

# Bar plot
colors = ['#2E7D32', '#FF6F00']
ax[0].bar(class_counts.keys(), class_counts.values(), color=colors, alpha=0.8)
ax[0].set_title('Class Distribution', fontsize=16, fontweight='bold')
ax[0].set_ylabel('Number of Images', fontsize=12)
ax[0].grid(axis='y', alpha=0.3)

# Pie chart
ax[1].pie(class_counts.values(), labels=class_counts.keys(), autopct='%1.1f%%',
         colors=colors, startangle=90, textprops={'fontsize': 12})
ax[1].set_title('Class Proportion', fontsize=16, fontweight='bold')

plt.tight_layout()
plt.show()

print(f"\nClass Balance Ratio: {max(class_counts.values()) / min(class_counts.values()):.2f}:1")

## üñºÔ∏è 3. Sample Image Visualization

In [None]:
# Display sample images from each class
fig, axes = plt.subplots(2, 5, figsize=(20, 8))
fig.suptitle('Sample Waste Images', fontsize=20, fontweight='bold')

for idx, class_dir in enumerate(data_path.iterdir()):
    if not class_dir.is_dir():
        continue
    
    class_name = 'Organic' if class_dir.name == 'O' else 'Recyclable'
    images = list(class_dir.glob('*.jpg'))[:5]
    
    for i, img_path in enumerate(images):
        img = load_img(img_path, target_size=(224, 224))
        axes[idx, i].imshow(img)
        axes[idx, i].axis('off')
        axes[idx, i].set_title(f"{class_name}\n{img_path.name}", 
                              fontsize=10, fontweight='bold')

plt.tight_layout()
plt.show()

## üîß 4. Data Preparation

In [None]:
# Configuration
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
VALIDATION_SPLIT = 0.2

# Data augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=VALIDATION_SPLIT
)

# Load training data
train_generator = train_datagen.flow_from_directory(
    DATA_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training',
    shuffle=True,
    seed=42
)

# Load validation data
val_generator = train_datagen.flow_from_directory(
    DATA_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation',
    shuffle=False,
    seed=42
)

print(f"\nTraining samples: {train_generator.samples}")
print(f"Validation samples: {val_generator.samples}")
print(f"\nClass indices: {train_generator.class_indices}")

## üèóÔ∏è 5. Model Architecture

In [None]:
# Build model with transfer learning
def build_model():
    # Load pre-trained MobileNetV2
    base_model = MobileNetV2(
        input_shape=(*IMG_SIZE, 3),
        include_top=False,
        weights='imagenet'
    )
    
    # Freeze base model
    base_model.trainable = False
    
    # Build custom head
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.BatchNormalization(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(1, activation='sigmoid')
    ], name='Waste_Classifier')
    
    return model

# Create model
model = build_model()

# Display architecture
model.summary()

In [None]:
# Visualize model architecture
tf.keras.utils.plot_model(
    model,
    to_file='model_architecture.png',
    show_shapes=True,
    show_layer_names=True,
    dpi=150
)

from IPython.display import Image
Image('model_architecture.png')

## üéØ 6. Model Compilation

In [None]:
# Compile model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=[
        'accuracy',
        keras.metrics.Precision(name='precision'),
        keras.metrics.Recall(name='recall'),
        keras.metrics.AUC(name='auc')
    ]
)

print("‚úì Model compiled successfully!")
print("\nOptimizer: Adam (lr=0.001)")
print("Loss: Binary Cross-Entropy")
print("Metrics: Accuracy, Precision, Recall, AUC")

## üèãÔ∏è 7. Model Training

In [None]:
# Define callbacks
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    ),
    ModelCheckpoint(
        'best_model.h5',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=1e-7,
        verbose=1
    )
]

print("‚úì Callbacks configured")

In [None]:
# Train model
EPOCHS = 25

print(f"Starting training for {EPOCHS} epochs...\n")

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=callbacks,
    verbose=1
)

print("\n‚úì Training completed!")

## üìà 8. Training Visualization

In [None]:
# Plot training history
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Training History', fontsize=20, fontweight='bold')

# Accuracy
axes[0, 0].plot(history.history['accuracy'], label='Training', linewidth=2)
axes[0, 0].plot(history.history['val_accuracy'], label='Validation', linewidth=2)
axes[0, 0].set_title('Accuracy', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Loss
axes[0, 1].plot(history.history['loss'], label='Training', linewidth=2)
axes[0, 1].plot(history.history['val_loss'], label='Validation', linewidth=2)
axes[0, 1].set_title('Loss', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Precision
axes[1, 0].plot(history.history['precision'], label='Training', linewidth=2)
axes[1, 0].plot(history.history['val_precision'], label='Validation', linewidth=2)
axes[1, 0].set_title('Precision', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Precision')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Recall
axes[1, 1].plot(history.history['recall'], label='Training', linewidth=2)
axes[1, 1].plot(history.history['val_recall'], label='Validation', linewidth=2)
axes[1, 1].set_title('Recall', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Recall')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('training_history.png', dpi=300, bbox_inches='tight')
plt.show()

## üéØ 9. Model Evaluation

In [None]:
# Evaluate on validation set
print("Evaluating model on validation set...\n")

results = model.evaluate(val_generator, verbose=1)

print("\n" + "=" * 60)
print("VALIDATION RESULTS")
print("=" * 60)
print(f"Loss:      {results[0]:.4f}")
print(f"Accuracy:  {results[1]:.4f} ({results[1]*100:.2f}%)")
print(f"Precision: {results[2]:.4f}")
print(f"Recall:    {results[3]:.4f}")
print(f"AUC:       {results[4]:.4f}")

# Calculate F1-Score
f1 = 2 * (results[2] * results[3]) / (results[2] + results[3])
print(f"F1-Score:  {f1:.4f}")
print("=" * 60)

In [None]:
# Generate predictions
val_generator.reset()
y_pred_probs = model.predict(val_generator, verbose=1)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()
y_true = val_generator.classes

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Classification report
class_names = ['Organic', 'Recyclable']
report = classification_report(y_true, y_pred, target_names=class_names, digits=4)

print("\n" + "=" * 60)
print("CLASSIFICATION REPORT")
print("=" * 60)
print(report)
print("=" * 60)

In [None]:
# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
           xticklabels=class_names,
           yticklabels=class_names,
           cbar_kws={'label': 'Count'})
plt.title('Confusion Matrix', fontsize=16, fontweight='bold')
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.savefig('confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

# Print confusion matrix values
print("\nConfusion Matrix Values:")
print(f"True Negatives (Organic):  {cm[0, 0]}")
print(f"False Positives:           {cm[0, 1]}")
print(f"False Negatives:           {cm[1, 0]}")
print(f"True Positives (Recycl.):  {cm[1, 1]}")

## üîÆ 10. Sample Predictions

In [None]:
# Get sample images
val_generator.reset()
sample_batch = next(val_generator)
sample_images = sample_batch[0][:9]
sample_labels = sample_batch[1][:9]

# Make predictions
predictions = model.predict(sample_images)

# Plot predictions
fig, axes = plt.subplots(3, 3, figsize=(15, 15))
fig.suptitle('Sample Predictions', fontsize=20, fontweight='bold')

for idx, ax in enumerate(axes.flat):
    # Display image
    ax.imshow(sample_images[idx])
    
    # Get prediction
    pred_class = 'Recyclable' if predictions[idx] > 0.5 else 'Organic'
    true_class = 'Recyclable' if sample_labels[idx] == 1 else 'Organic'
    confidence = predictions[idx][0] if predictions[idx] > 0.5 else 1 - predictions[idx][0]
    
    # Set title color based on correctness
    color = 'green' if pred_class == true_class else 'red'
    
    ax.set_title(
        f"True: {true_class}\nPred: {pred_class}\nConf: {confidence*100:.1f}%",
        fontsize=12,
        color=color,
        fontweight='bold'
    )
    ax.axis('off')

plt.tight_layout()
plt.savefig('sample_predictions.png', dpi=300, bbox_inches='tight')
plt.show()

## üíæ 11. Save Model

In [None]:
# Save model in multiple formats

# Format 1: Keras H5
model.save('waste_classifier_final.h5')
print("‚úì Saved: waste_classifier_final.h5")

# Format 2: TensorFlow SavedModel
model.save('waste_classifier_savedmodel')
print("‚úì Saved: waste_classifier_savedmodel/")

# Format 3: TensorFlow Lite (for mobile/edge)
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open('waste_classifier.tflite', 'wb') as f:
    f.write(tflite_model)
print("‚úì Saved: waste_classifier.tflite")

print("\n" + "=" * 60)
print("ALL MODELS SAVED SUCCESSFULLY!")
print("=" * 60)

## üéâ 12. Summary & Next Steps

In [None]:
print("\n" + "=" * 70)
print("PROJECT SUMMARY")
print("=" * 70)
print(f"\n‚úì Dataset: {train_generator.samples + val_generator.samples} images")
print(f"‚úì Classes: Organic, Recyclable")
print(f"‚úì Model: MobileNetV2 with Transfer Learning")
print(f"‚úì Training Accuracy: {history.history['accuracy'][-1]*100:.2f}%")
print(f"‚úì Validation Accuracy: {results[1]*100:.2f}%")
print(f"‚úì F1-Score: {f1:.4f}")
print("\n" + "=" * 70)
print("NEXT STEPS")
print("=" * 70)
print("\n1. Test predictions: python predict.py --mode image --image test.jpg")
print("2. Real-time webcam: python predict.py --mode webcam")
print("3. Launch web app: streamlit run app.py")
print("4. Deploy to cloud or edge device")
print("\n" + "=" * 70)
print("‚ôªÔ∏è READY TO MAKE A DIFFERENCE! üåç")
print("=" * 70)