<a href="https://colab.research.google.com/github/lahirunie-dulsara/EN3150-Assignment-3-CNN/blob/Sakith/RealWaste.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive
Mounted at /content/drive


In [2]:
import zipfile, os, re, shutil
from PIL import Image

zip_path = "/content/drive/MyDrive/Real Waste.zip"

extract_path = "/content/extracted_zip"
os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)
print("✅ Unzipped to:", extract_path)


✅ Unzipped to: /content/extracted_zip


In [3]:
base_dir = "/content/extracted_zip/realwaste-main/RealWaste"
for folder in os.listdir(base_dir):
    count = len(os.listdir(os.path.join(base_dir, folder)))
    print(f"{folder}: {count} images")

Textile Trash: 318 images
Paper: 500 images
Food Organics: 411 images
Glass: 420 images
Miscellaneous Trash: 495 images
Vegetation: 436 images
Plastic: 921 images
Cardboard: 461 images
Metal: 790 images


In [4]:
pip install split-folders

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl.metadata (6.2 kB)
Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [5]:
import splitfolders  # install with: pip install split-folders

input_folder = "/content/extracted_zip/realwaste-main/RealWaste"
output_folder = "/content/extracted_zip/realwaste-main/realwaste_split"

splitfolders.ratio(input_folder, output=output_folder, seed=42, ratio=(.7, .15, .15))

Copying files: 4752 files [00:04, 1151.28 files/s]


In [6]:
base_dir = "/content/extracted_zip/realwaste-main/realwaste_split"

# Count images in each subfolder
for split in ['train', 'val', 'test']:
    split_path = os.path.join(base_dir, split)
    print(f"\n📁 {split.upper()} SET")
    total = 0
    for cls in os.listdir(split_path):
        cls_path = os.path.join(split_path, cls)
        count = len(os.listdir(cls_path))

        total += count
        print(f"  {cls}: {count} images")
    print(f"  ➜ Total {split}: {total} images")


📁 TRAIN SET
  Textile Trash: 222 images
  Paper: 350 images
  Food Organics: 287 images
  Glass: 294 images
  Miscellaneous Trash: 346 images
  Vegetation: 305 images
  Plastic: 644 images
  Cardboard: 322 images
  Metal: 553 images
  ➜ Total train: 3323 images

📁 VAL SET
  Textile Trash: 47 images
  Paper: 75 images
  Food Organics: 61 images
  Glass: 63 images
  Miscellaneous Trash: 74 images
  Vegetation: 65 images
  Plastic: 138 images
  Cardboard: 69 images
  Metal: 118 images
  ➜ Total val: 710 images

📁 TEST SET
  Textile Trash: 49 images
  Paper: 75 images
  Food Organics: 63 images
  Glass: 63 images
  Miscellaneous Trash: 75 images
  Vegetation: 66 images
  Plastic: 139 images
  Cardboard: 70 images
  Metal: 119 images
  ➜ Total test: 719 images


In [7]:
import os
import torch
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision import datasets, transforms

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
    transforms.ToTensor(),
])

val_test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_dataset = datasets.ImageFolder("/content/extracted_zip/realwaste-main/realwaste_split/train", transform=train_transforms)
val_dataset   = datasets.ImageFolder("/content/extracted_zip/realwaste-main/realwaste_split/val", transform=val_test_transforms)
test_dataset  = datasets.ImageFolder("/content/extracted_zip/realwaste-main/realwaste_split/test", transform=val_test_transforms)


In [8]:
import numpy as np

class_counts = np.bincount(train_dataset.targets)
print("Class counts:", class_counts)

# Compute class weights (inverse of frequency)
class_weights = 1. / torch.tensor(class_counts, dtype=torch.float)

sample_weights = [class_weights[label] for label in train_dataset.targets]

print("📊 Class counts per category:")
for cls, count in zip(train_dataset.classes, class_counts):
    print(f"  {cls:15s}: {count}")

print("\n⚖️ Class weights (inverse of frequency):")
for cls, w in zip(train_dataset.classes, class_weights):
    print(f"  {cls:15s}: {w:.6f}")

Class counts: [322 287 294 553 346 350 644 222 305]
📊 Class counts per category:
  Cardboard      : 322
  Food Organics  : 287
  Glass          : 294
  Metal          : 553
  Miscellaneous Trash: 346
  Paper          : 350
  Plastic        : 644
  Textile Trash  : 222
  Vegetation     : 305

⚖️ Class weights (inverse of frequency):
  Cardboard      : 0.003106
  Food Organics  : 0.003484
  Glass          : 0.003401
  Metal          : 0.001808
  Miscellaneous Trash: 0.002890
  Paper          : 0.002857
  Plastic        : 0.001553
  Textile Trash  : 0.004505
  Vegetation     : 0.003279


In [9]:
from torch.utils.data import WeightedRandomSampler, DataLoader

sampler = WeightedRandomSampler(
    weights=sample_weights,      # weight per sample
    num_samples=len(sample_weights),  # total samples to draw per epoch
    replacement=True             # allow repeated samples
)

train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler)

print(f"\n✅ WeightedRandomSampler created successfully!")
print(f"➡️ Total samples in epoch: {len(sample_weights)}")
print(f"➡️ Batch size: {train_loader.batch_size}")
print(f"➡️ Total batches per epoch: {len(train_loader)}")


✅ WeightedRandomSampler created successfully!
➡️ Total samples in epoch: 3323
➡️ Batch size: 32
➡️ Total batches per epoch: 104


In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class WasteCNN(nn.Module):
    def __init__(self, num_classes=9):
        super(WasteCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 28 * 28, 256)
        self.fc2 = nn.Linear(256, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # 32x112x112
        x = self.pool(F.relu(self.conv2(x)))  # 64x56x56
        x = self.pool(F.relu(self.conv3(x)))  # 128x28x28
        x = x.view(-1, 128 * 28 * 28)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x


In [11]:
import torch
print("✅ CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))


✅ CUDA available: True
GPU name: Tesla T4


In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = WasteCNN(num_classes=9).to(device)



In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for batch_idx, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()                 # reset previous gradients
        outputs = model(images)               # forward pass
        loss = criterion(outputs, labels)     # compute loss
        loss.backward()                       # backward pass
        optimizer.step()                      # update weights

        running_loss += loss.item()

        # Print every 100 batches to check progress
        if (batch_idx + 1) % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], "
                  f"Loss: {loss.item():.4f}")

    # Print average loss per epoch
    epoch_loss = running_loss / len(train_loader)
    print(f"✅ Epoch [{epoch+1}/{num_epochs}] completed — Average Loss: {epoch_loss:.4f}")



Epoch [1/100], Step [100/104], Loss: 0.9287
✅ Epoch [1/100] completed — Average Loss: 1.2577
Epoch [2/100], Step [100/104], Loss: 1.1599
✅ Epoch [2/100] completed — Average Loss: 1.2508
Epoch [3/100], Step [100/104], Loss: 1.3003
✅ Epoch [3/100] completed — Average Loss: 1.1670
Epoch [4/100], Step [100/104], Loss: 1.1286
✅ Epoch [4/100] completed — Average Loss: 1.1810
Epoch [5/100], Step [100/104], Loss: 1.4662
✅ Epoch [5/100] completed — Average Loss: 1.1289
Epoch [6/100], Step [100/104], Loss: 1.2124
✅ Epoch [6/100] completed — Average Loss: 1.1328
Epoch [7/100], Step [100/104], Loss: 0.9224
✅ Epoch [7/100] completed — Average Loss: 1.1003
Epoch [8/100], Step [100/104], Loss: 1.0077
✅ Epoch [8/100] completed — Average Loss: 1.1133
Epoch [9/100], Step [100/104], Loss: 0.9640
✅ Epoch [9/100] completed — Average Loss: 1.0670
Epoch [10/100], Step [100/104], Loss: 1.4295
✅ Epoch [10/100] completed — Average Loss: 1.0404
