<a href="https://colab.research.google.com/github/lahirunie-dulsara/EN3150-Assignment-3-CNN/blob/Sakith/RealWaste.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from google.colab import drive
drive.mount('/content/drive')
drive.mount("/content/drive", force_remount=True)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
import zipfile, os, re, shutil
from PIL import Image

zip_path = "/content/drive/MyDrive/Real Waste.zip"

extract_path = "/content/extracted_zip"
os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)
print("✅ Unzipped to:", extract_path)


✅ Unzipped to: /content/extracted_zip


In [7]:
base_dir = "/content/extracted_zip/realwaste-main/RealWaste"
for folder in os.listdir(base_dir):
    count = len(os.listdir(os.path.join(base_dir, folder)))
    print(f"{folder}: {count} images")

Miscellaneous Trash: 495 images
Paper: 500 images
Textile Trash: 318 images
Metal: 790 images
Vegetation: 436 images
Cardboard: 461 images
Food Organics: 411 images
Plastic: 921 images
Glass: 420 images


In [10]:
import splitfolders  # install with: pip install split-folders

input_folder = "/content/extracted_zip/realwaste-main/RealWaste"
output_folder = "/content/extracted_zip/realwaste-main/realwaste_split"

splitfolders.ratio(input_folder, output=output_folder, seed=42, ratio=(.7, .15, .15))

Copying files: 4752 files [00:08, 560.34 files/s]


In [12]:
base_dir = "/content/extracted_zip/realwaste-main/realwaste_split"

# Count images in each subfolder
for split in ['train', 'val', 'test']:
    split_path = os.path.join(base_dir, split)
    print(f"\n📁 {split.upper()} SET")
    total = 0
    for cls in os.listdir(split_path):
        cls_path = os.path.join(split_path, cls)
        count = len(os.listdir(cls_path))

        total += count
        print(f"  {cls}: {count} images")
    print(f"  ➜ Total {split}: {total} images")


📁 TRAIN SET
  Miscellaneous Trash: 346 images
  Paper: 350 images
  Textile Trash: 222 images
  Metal: 553 images
  Vegetation: 305 images
  Cardboard: 322 images
  Food Organics: 287 images
  Plastic: 644 images
  Glass: 294 images
  ➜ Total train: 3323 images

📁 VAL SET
  Miscellaneous Trash: 74 images
  Paper: 75 images
  Textile Trash: 47 images
  Metal: 118 images
  Vegetation: 65 images
  Cardboard: 69 images
  Food Organics: 61 images
  Plastic: 138 images
  Glass: 63 images
  ➜ Total val: 710 images

📁 TEST SET
  Miscellaneous Trash: 75 images
  Paper: 75 images
  Textile Trash: 49 images
  Metal: 119 images
  Vegetation: 66 images
  Cardboard: 70 images
  Food Organics: 63 images
  Plastic: 139 images
  Glass: 63 images
  ➜ Total test: 719 images


In [13]:
import os
import torch
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision import datasets, transforms

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
    transforms.ToTensor(),
])

val_test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_dataset = datasets.ImageFolder("/content/extracted_zip/realwaste-main/realwaste_split/train", transform=train_transforms)
val_dataset   = datasets.ImageFolder("/content/extracted_zip/realwaste-main/realwaste_split/val", transform=val_test_transforms)
test_dataset  = datasets.ImageFolder("/content/extracted_zip/realwaste-main/realwaste_split/test", transform=val_test_transforms)


In [16]:
import numpy as np

class_counts = np.bincount(train_dataset.targets)
print("Class counts:", class_counts)

# Compute class weights (inverse of frequency)
class_weights = 1. / torch.tensor(class_counts, dtype=torch.float)

sample_weights = [class_weights[label] for label in train_dataset.targets]

print("📊 Class counts per category:")
for cls, count in zip(train_dataset.classes, class_counts):
    print(f"  {cls:15s}: {count}")

print("\n⚖️ Class weights (inverse of frequency):")
for cls, w in zip(train_dataset.classes, class_weights):
    print(f"  {cls:15s}: {w:.6f}")

Class counts: [322 287 294 553 346 350 644 222 305]
📊 Class counts per category:
  Cardboard      : 322
  Food Organics  : 287
  Glass          : 294
  Metal          : 553
  Miscellaneous Trash: 346
  Paper          : 350
  Plastic        : 644
  Textile Trash  : 222
  Vegetation     : 305

⚖️ Class weights (inverse of frequency):
  Cardboard      : 0.003106
  Food Organics  : 0.003484
  Glass          : 0.003401
  Metal          : 0.001808
  Miscellaneous Trash: 0.002890
  Paper          : 0.002857
  Plastic        : 0.001553
  Textile Trash  : 0.004505
  Vegetation     : 0.003279


In [17]:
from torch.utils.data import WeightedRandomSampler, DataLoader

sampler = WeightedRandomSampler(
    weights=sample_weights,      # weight per sample
    num_samples=len(sample_weights),  # total samples to draw per epoch
    replacement=True             # allow repeated samples
)

train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler)

print(f"\n✅ WeightedRandomSampler created successfully!")
print(f"➡️ Total samples in epoch: {len(sample_weights)}")
print(f"➡️ Batch size: {train_loader.batch_size}")
print(f"➡️ Total batches per epoch: {len(train_loader)}")


✅ WeightedRandomSampler created successfully!
➡️ Total samples in epoch: 3323
➡️ Batch size: 32
➡️ Total batches per epoch: 104


In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class WasteCNN(nn.Module):
    def __init__(self, num_classes=9):
        super(WasteCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 28 * 28, 256)
        self.fc2 = nn.Linear(256, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # 32x112x112
        x = self.pool(F.relu(self.conv2(x)))  # 64x56x56
        x = self.pool(F.relu(self.conv3(x)))  # 128x28x28
        x = x.view(-1, 128 * 28 * 28)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x
