In [1]:
# Cell 1: Import and Setup
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

from PIL import Image
import matplotlib.pyplot as plt
import cv2

DATA_PATH  = r"D:\data\data original"
OUTPUT_DIR = r"D:\data mining\appendix"
MODEL_DIR  = r"D:\data mining\models"
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)

WEIGHT_PATH = os.path.join(MODEL_DIR, "trashnet_customcnn.pth")

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
NUM_CLASSES = 6
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 15
NUM_WORKERS = 0  

class_names = ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']

print("Device:", DEVICE)
print("GradCAM setup complete")

Device: cpu
GradCAM setup complete


In [2]:
# Cell 2: Dataset + Loader (NO split)

transform_train = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5]),
])

transform_eval = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5]),
])

dataset = datasets.ImageFolder(root=DATA_PATH, transform=transform_train)
print("ImageFolder classes:", dataset.classes)
assert len(dataset.classes) == NUM_CLASSES, dataset.classes

loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)

print("Total images:", len(dataset))

ImageFolder classes: ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']
Total images: 2527


In [3]:
# Cell 3: Custom CNN + Train + Save

class CustomCNN(nn.Module):
    def __init__(self, num_classes=6):
        super().__init__()
        self.features = nn.Sequential(
            # 224 -> 112
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            # 112 -> 56
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            # 56 -> 28
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            # 28 -> 14
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
        )
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.features(x)              # [B,256,14,14]
        x = self.pool(x).flatten(1)       # [B,256]
        x = self.classifier(x)            # [B,6]
        return x

model = CustomCNN(NUM_CLASSES).to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(1, EPOCHS + 1):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for imgs, labels in loader:
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)

        optimizer.zero_grad(set_to_none=True)
        logits = model(imgs)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * imgs.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    print(f"Epoch {epoch:02d}/{EPOCHS} | Loss {epoch_loss:.4f} | Train Acc {epoch_acc*100:.2f}%")

torch.save(model.state_dict(), WEIGHT_PATH)
print(" Saved:", WEIGHT_PATH)

Epoch 01/15 | Loss 1.2532 | Train Acc 52.87%
Epoch 02/15 | Loss 1.1136 | Train Acc 57.78%
Epoch 03/15 | Loss 1.0277 | Train Acc 60.78%
Epoch 04/15 | Loss 0.9680 | Train Acc 63.67%
Epoch 05/15 | Loss 0.9501 | Train Acc 64.54%
Epoch 06/15 | Loss 0.8941 | Train Acc 66.32%
Epoch 07/15 | Loss 0.8420 | Train Acc 69.41%
Epoch 08/15 | Loss 0.8325 | Train Acc 69.96%
Epoch 09/15 | Loss 0.7940 | Train Acc 70.12%
Epoch 10/15 | Loss 0.7676 | Train Acc 72.85%
Epoch 11/15 | Loss 0.7373 | Train Acc 73.49%
Epoch 12/15 | Loss 0.7392 | Train Acc 72.30%
Epoch 13/15 | Loss 0.7007 | Train Acc 75.98%
Epoch 14/15 | Loss 0.6800 | Train Acc 75.23%
Epoch 15/15 | Loss 0.6844 | Train Acc 75.03%
 Saved: D:\data mining\models\trashnet_customcnn.pth


In [4]:
# Cell 4: Load model for inference/GradCAM (NO pretrained)

infer_model = CustomCNN(NUM_CLASSES).to(DEVICE)
infer_model.load_state_dict(torch.load(WEIGHT_PATH, map_location=DEVICE))
infer_model.eval()


preprocess = transform_eval

print(" Loaded CustomCNN for inference/GradCAM")


 Loaded CustomCNN for inference/GradCAM


  infer_model.load_state_dict(torch.load(WEIGHT_PATH, map_location=DEVICE))


In [5]:
# Cell 5: GradCAM Class Implementation (stable)

class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.gradients = None
        self.activations = None

        self.target_layer.register_forward_hook(self._forward_hook)
        self.target_layer.register_full_backward_hook(self._backward_hook)

    def _forward_hook(self, module, inp, out):
        self.activations = out  # keep graph for backward

    def _backward_hook(self, module, grad_input, grad_output):
        self.gradients = grad_output[0]  # [B,C,H,W]

    def generate_cam(self, input_tensor, target_class: int):
        self.model.zero_grad(set_to_none=True)
        output = self.model(input_tensor)          # [1,6]
        score = output[0, int(target_class)]
        score.backward(retain_graph=True)

        grads = self.gradients[0]                  # [C,H,W]
        acts  = self.activations[0]                # [C,H,W]

        weights = grads.mean(dim=(1,2), keepdim=True)  # [C,1,1]
        cam = (weights * acts).sum(dim=0)              # [H,W]
        cam = F.relu(cam)

        cam = cam - cam.min()
        cam = cam / (cam.max() + 1e-8)

        cam_np = cam.detach().cpu().numpy()
        cam_np = np.squeeze(cam_np)
        if cam_np.ndim != 2:
            cam_np = cam_np[0]
        return cam_np

def overlay_cam_on_image(img_rgb_224, cam_224):
    cam_224 = np.squeeze(cam_224)
    if cam_224.ndim != 2:
        cam_224 = cam_224[0]
    heatmap = cv2.applyColorMap(np.uint8(255 * cam_224), cv2.COLORMAP_JET)
    heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
    overlay = np.uint8(heatmap * 0.4 + img_rgb_224 * 0.6)
    return heatmap, overlay


target_layer = None
for m in infer_model.modules():
    if isinstance(m, nn.Conv2d):
        target_layer = m
assert target_layer is not None

gradcam = GradCAM(infer_model, target_layer)
print(" GradCAM target layer:", target_layer)


 GradCAM target layer: Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [6]:
# Cell 6: GradCAM True vs Pred for one image

def predict_id(input_tensor):
    with torch.no_grad():
        logits = infer_model(input_tensor)
        return int(logits.argmax(dim=1).item())

def gradcam_true_vs_pred(img_path, true_class_id):
    original = Image.open(img_path).convert("RGB")
    img_224 = np.array(original.resize((224, 224)))

    x = preprocess(original).unsqueeze(0).to(DEVICE)

    pred_id = predict_id(x)

    cam_true = gradcam.generate_cam(x, target_class=int(true_class_id))
    cam_pred = gradcam.generate_cam(x, target_class=int(pred_id))

    cam_true_224 = cv2.resize(cam_true, (224, 224))
    cam_pred_224 = cv2.resize(cam_pred, (224, 224))

    _, overlay_true = overlay_cam_on_image(img_224, cam_true_224)
    _, overlay_pred = overlay_cam_on_image(img_224, cam_pred_224)

    fig, axes = plt.subplots(1, 3, figsize=(14, 5))
    axes[0].imshow(img_224);       axes[0].set_title("Original"); axes[0].axis("off")
    axes[1].imshow(overlay_true);  axes[1].set_title(f"GradCAM TRUE: {class_names[true_class_id]}"); axes[1].axis("off")
    axes[2].imshow(overlay_pred);  axes[2].set_title(f"GradCAM PRED: {class_names[pred_id]}"); axes[2].axis("off")
    plt.tight_layout()
    plt.show()

    return pred_id

# Example:
# img_path = r"D:\data\data original\plastic\xxx.jpg"
# true_id = class_names.index("plastic")
# pred = gradcam_true_vs_pred(img_path, true_id)
# print("Pred:", class_names[pred])

In [7]:
# Cell 7: Generate GradCAM for All Classes (save)
import os
DATA_PATH = r"D:\data\data original"
class_names = ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']
gradcam_dir = os.path.join(OUTPUT_DIR, "gradcam_customcnn")
os.makedirs(gradcam_dir, exist_ok=True)
from PIL import Image
print("Generating GradCAM for all classes...")

for class_idx, class_name in enumerate(class_names):
    class_path = os.path.join(DATA_PATH, class_name)
    files = [f for f in os.listdir(class_path) if f.lower().endswith((".jpg",".jpeg",".png",".bmp",".webp"))]
    files = files[:5]

    if len(files) == 0:
        print(" No images:", class_name)
        continue

    fig, axes = plt.subplots(len(files), 3, figsize=(12, 4 * len(files)))
    if len(files) == 1:
        axes = np.expand_dims(axes, axis=0)

    for r, fname in enumerate(files):
        img_path = os.path.join(class_path, fname)
        original = Image.open(img_path).convert("RGB")
        img_224 = np.array(original.resize((224, 224)))

        x = preprocess(original).unsqueeze(0).to(DEVICE)
        pred_id = predict_id(x)

        cam = gradcam.generate_cam(x, target_class=int(class_idx))  
        cam_224 = cv2.resize(cam, (224, 224))
        _, overlay = overlay_cam_on_image(img_224, cam_224)

        axes[r, 0].imshow(img_224);   axes[r, 0].set_title("Original"); axes[r, 0].axis("off")
        axes[r, 1].imshow(cam_224, cmap="jet"); axes[r, 1].set_title(f"GradCAM TRUE={class_name}"); axes[r, 1].axis("off")
        axes[r, 2].imshow(overlay);  axes[r, 2].set_title(f"Overlay PRED={class_names[pred_id]}"); axes[r, 2].axis("off")

    plt.suptitle(f"GradCAM - CustomCNN - TRUE CLASS: {class_name}", fontsize=14, fontweight="bold")
    plt.tight_layout()

    out_path = os.path.join(gradcam_dir, f"gradcam_{class_name}.png")
    plt.savefig(out_path, dpi=300, bbox_inches="tight")
    plt.close()

    print("  Completed:", class_name, "->", out_path)

print("\nAll GradCAM visualizations saved to:", gradcam_dir)

Generating GradCAM for all classes...
  Completed: cardboard -> D:\data mining\appendix\gradcam_customcnn\gradcam_cardboard.png
  Completed: glass -> D:\data mining\appendix\gradcam_customcnn\gradcam_glass.png
  Completed: metal -> D:\data mining\appendix\gradcam_customcnn\gradcam_metal.png
  Completed: paper -> D:\data mining\appendix\gradcam_customcnn\gradcam_paper.png
  Completed: plastic -> D:\data mining\appendix\gradcam_customcnn\gradcam_plastic.png
  Completed: trash -> D:\data mining\appendix\gradcam_customcnn\gradcam_trash.png

All GradCAM visualizations saved to: D:\data mining\appendix\gradcam_customcnn
