In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from sklearn.model_selection import train_test_split
import timm
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import kagglehub
import os

In [None]:
data_dir = kagglehub.dataset_download("siddhantmaji/unified-waste-classification-dataset")

print("Path to dataset files:", data_dir)

Path to dataset files: /kaggle/input/unified-waste-classification-dataset


In [None]:
data_dir = f"{data_dir}/content/unified_dataset"
BATCH_SIZE = 32
IMG_SIZE = 224

# Preprocessing transforms only, no data augmentation
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load full dataset
dataset = datasets.ImageFolder(root=data_dir, transform=transform)

# Stratified train/val split
targets = [sample[1] for sample in dataset.samples]
train_idx, val_idx = train_test_split(
    range(len(dataset)),
    test_size=0.2,
    stratify=targets,
    random_state=42
)

train_dataset = torch.utils.data.Subset(dataset, train_idx)
val_dataset = torch.utils.data.Subset(dataset, val_idx)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# MobileNetV3-Large from timm, pretrained on ImageNet
model = timm.create_model("mobilenetv3_large_100", pretrained=True, num_classes=8)
model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/22.1M [00:00<?, ?B/s]

MobileNetV3(
  (conv_stem): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): Hardswish()
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
        (bn1): BatchNormAct2d(
          16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU(inplace=True)
        )
        (aa): Identity()
        (se): Identity()
        (conv_pw): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNormAct2d(
          16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
    )
    (1): Sequential(
     

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
EPOCHS = 10

for epoch in range(EPOCHS):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = outputs.max(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    acc = 100 * correct / total
    print(f"Epoch {epoch+1}: Loss = {running_loss/len(train_loader):.4f} | Accuracy = {acc:.2f}%")

Epoch 1/10: 100%|██████████| 1600/1600 [05:38<00:00,  4.72it/s]


Epoch 1: Loss = 0.4737 | Accuracy = 85.79%


Epoch 2/10: 100%|██████████| 1600/1600 [04:12<00:00,  6.34it/s]


Epoch 2: Loss = 0.1149 | Accuracy = 96.20%


Epoch 3/10: 100%|██████████| 1600/1600 [04:12<00:00,  6.34it/s]


Epoch 3: Loss = 0.0609 | Accuracy = 97.95%


Epoch 4/10: 100%|██████████| 1600/1600 [04:12<00:00,  6.34it/s]


Epoch 4: Loss = 0.0408 | Accuracy = 98.68%


Epoch 5/10: 100%|██████████| 1600/1600 [04:17<00:00,  6.20it/s]


Epoch 5: Loss = 0.0329 | Accuracy = 98.87%


Epoch 6/10: 100%|██████████| 1600/1600 [04:14<00:00,  6.28it/s]


Epoch 6: Loss = 0.0258 | Accuracy = 99.16%


Epoch 7/10: 100%|██████████| 1600/1600 [04:12<00:00,  6.34it/s]


Epoch 7: Loss = 0.0246 | Accuracy = 99.20%


Epoch 8/10: 100%|██████████| 1600/1600 [04:11<00:00,  6.36it/s]


Epoch 8: Loss = 0.0207 | Accuracy = 99.36%


Epoch 9/10: 100%|██████████| 1600/1600 [04:11<00:00,  6.36it/s]


Epoch 9: Loss = 0.0202 | Accuracy = 99.33%


Epoch 10/10: 100%|██████████| 1600/1600 [04:11<00:00,  6.36it/s]

Epoch 10: Loss = 0.0153 | Accuracy = 99.50%





In [None]:
model.eval()
val_loss, correct, total = 0.0, 0, 0

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        val_loss += loss.item()
        _, preds = outputs.max(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

val_acc = 100 * correct / total
print(f"Validation Accuracy: {val_acc:.2f}% | Loss: {val_loss/len(val_loader):.4f}")

Validation Accuracy: 98.07% | Loss: 0.0790


In [None]:
torch.save(model.state_dict(), "mobilenetv3_garbage_classifier.pth")

In [None]:
from google.colab import files
files.download("mobilenetv3_garbage_classifier.pth")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>