In [14]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
import random

## Custom Dataset Class

We define a custom `WasteDataset` class that inherits from PyTorch's `Dataset` class. This class is responsible for loading and preprocessing the images from the dataset.

### Initialization

The `__init__` method takes the following parameters:
- `root_dir`: The root directory containing the dataset images.
- `split`: The dataset split (train, validation, or test).
- `transform`: Optional image transformations to be applied.

Inside the `__init__` method, we:
1. Store the `root_dir`, `transform`, and `split` parameters.
2. Get the list of class names by listing the directories in `root_dir`.
3. Initialize empty lists for `image_paths` and `labels`.
4. Iterate over each class directory and its subfolders ('default' and 'real_world').
5. Shuffle the image names in each subfolder.
6. Based on the `split` parameter, select a portion of the images (60% for train, 20% for validation, 20% for test).
7. Append the image paths and corresponding labels to the respective lists.

### Length and Item Retrieval

The `__len__` method returns the total number of images in the dataset.

The `__getitem__` method takes an `index` and returns the image and its corresponding label at that index. It:
1. Retrieves the image path and label using the provided index.
2. Opens the image using `Image.open()` and converts it to RGB format.
3. Applies the specified image transformations, if any.
4. Returns the transformed image and its label.

This custom dataset class allows us to easily load and preprocess the waste images for training, validation, and testing.

In [15]:
# Define the dataset class (modified to include a split parameter)
class WasteDataset(Dataset):
    def __init__(self, root_dir, split, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(root_dir))
        self.image_paths = []
        self.labels = []
        
        for i, class_name in enumerate(self.classes):
            class_dir = os.path.join(root_dir, class_name)
            image_names = os.listdir(class_dir)
            random.shuffle(image_names)
            
            if split == 'train':
                image_names = image_names[:int(0.6 * len(image_names))]
            elif split == 'val':
                image_names = image_names[int(0.6 * len(image_names)):int(0.8 * len(image_names))]
            else:  # split == 'test'
                image_names = image_names[int(0.8 * len(image_names)):]
            
            for image_name in image_names:
                self.image_paths.append(os.path.join(class_dir, image_name))
                self.labels.append(i)
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, index):
        image_path = self.image_paths[index]
        label = self.labels[index]
        image = Image.open(image_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

## CNN Model Architecture

We define a convolutional neural network (CNN) model called `CNN` that inherits from PyTorch's `nn.Module` class. This model architecture consists of convolutional layers, pooling layers, and fully connected layers.

### Initialization

The `__init__` method takes the following parameter:
- `num_classes`: The number of output classes in the classification task.

Inside the `__init__` method, we define the layers of the CNN:
1. `conv1`: A 2D convolutional layer with 3 input channels, 32 output channels, a kernel size of 3, stride of 1, and padding of 1.
2. `relu`: A ReLU activation function.
3. `maxpool`: A 2D max pooling layer with a kernel size of 2 and stride of 2.
4. `conv2`: Another 2D convolutional layer with 32 input channels, 64 output channels, a kernel size of 3, stride of 1, and padding of 1.
5. `fc1`: A fully connected layer that takes the flattened output of `conv2` and maps it to 512 features.
6. `fc2`: The final fully connected layer that takes the 512 features and maps them to the number of output classes.

### Forward Pass

The `forward` method defines the forward pass of the CNN model. It takes an input tensor `x` and applies the following operations:
1. Pass `x` through `conv1`, followed by `relu` activation and `maxpool`.
2. Pass the output through `conv2`, followed by `relu` activation and `maxpool`.
3. Flatten the output of `conv2` using `x.view(x.size(0), -1)`.
4. Pass the flattened tensor through `fc1`, followed by `relu` activation.
5. Pass the output of `fc1` through `fc2` to obtain the final output.

The output of the `forward` method represents the predicted class scores for each input sample.

This CNN architecture is designed to learn hierarchical features from the input images and make predictions based on those features. The convolutional layers capture local patterns, the pooling layers reduce spatial dimensions, and the fully connected layers perform the final classification.

In [16]:
class CNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)

        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.adaptive_pool = nn.AdaptiveAvgPool2d((7, 7))  # 新增自適應池化層

        self.fc1 = nn.Linear(64 * 7 * 7, 512)  # 修改 fc1 的輸入大小
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.conv2(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.adaptive_pool(x)  # 應用自適應池化層
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.dropout(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

## Dataset Path and Hyperparameters

We set the following dataset path and hyperparameters:
- `dataset_path`: The path to the directory containing the dataset images.
- `batch_size`: The number of samples per batch during training and evaluation.
- `num_epochs`: The number of epochs to train the model.
- `learning_rate`: The learning rate for the optimizer.

These hyperparameters can be adjusted based on the specific requirements and available computational resources.

In [17]:
# Set the dataset path and hyperparameters
dataset_path = 'clean_dataset'
batch_size = 32
num_epochs = 30
learning_rate = 0.001

## Data Preprocessing and Loaders

We define a composition of image transformations using `transforms.Compose`:
1. `transforms.Resize((224, 224))`: Resizes the images to a fixed size of (224, 224) pixels.
2. `transforms.ToTensor()`: Converts the images to PyTorch tensors.
3. `transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])`: Normalizes the image tensors using the specified mean and standard deviation values.

These transformations ensure that the images are preprocessed consistently before being fed into the model.

We create instances of the `WasteDataset` class for the train, validation, and test splits, passing the `dataset_path`, `split`, and `transform` parameters. This allows us to load the dataset images with the specified transformations for each split.

Finally, we create data loaders for each dataset using `DataLoader`:
- `train_dataloader`: Loads the training data in batches of size `batch_size` and shuffles the samples.
- `val_dataloader`: Loads the validation data in batches of size `batch_size` without shuffling.
- `test_dataloader`: Loads the test data in batches of size `batch_size` without shuffling.

The data loaders provide an efficient way to iterate over the dataset during training and evaluation, handling batching and shuffling as specified.

In [18]:
from PIL import Image
from rembg import remove

def rotate_and_pad_dynamic(image, angle, background_color=(0, 0, 0), min_size=400):
    # Step 1: 旋轉並展開
    # image = remove(image)
    rotated = image.rotate(angle, expand=True, fillcolor=background_color)

    # Step 2: 動態計算正方形背景尺寸（取最大邊長，與 min_size 比較）
    side = max(min_size, rotated.width, rotated.height)
    
    # Step 3: 建立正方形背景並貼上圖片
    background = Image.new("RGB", (side, side), background_color)
    paste_x = (side - rotated.width) // 2
    paste_y = (side - rotated.height) // 2
    background.paste(rotated, (paste_x, paste_y))

    return background

In [19]:
# Create the datasets and data loaders
transform = transforms.Compose([
    transforms.Lambda(lambda img: rotate_and_pad_dynamic(img, angle=random.randint(-30, 30))),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_dataset = WasteDataset(dataset_path, split='train', transform=transform)
val_dataset = WasteDataset(dataset_path, split='val', transform=transform)
test_dataset = WasteDataset(dataset_path, split='test', transform=transform)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Model Initialization

In [20]:
# Create the model, loss function, and optimizer
num_classes = len(train_dataset.classes)
model = CNN(num_classes).to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training


In [21]:
class EarlyStopping:
    def __init__(self, patience=5, delta=0.001):
        self.patience = patience  # 忍耐次數
        self.delta = delta        # 最小改善
        self.best_loss = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None or val_loss < self.best_loss - self.delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True


In [None]:
# Lists to store the training and validation losses
train_losses = []
val_losses = []
early_stopper = EarlyStopping(patience=5)

# Training loop
for epoch in range(num_epochs):
    # Training
    model.train()
    train_loss = 0.0
    for images, labels in train_dataloader:
        images = images.to('cuda')
        labels = labels.to('cuda')
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * images.size(0)
    
    train_loss /= len(train_dataset)
    train_losses.append(train_loss)
    
    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_dataloader:
            images = images.to('cuda')
            labels = labels.to('cuda')
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item() * images.size(0)
    
    val_loss /= len(val_dataset)
    val_losses.append(val_loss)
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
    
    early_stopper(val_loss)
    if early_stopper.early_stop:
        print("Early stopping triggered!")
        break
print("Training completed!")

Epoch [1/30], Train Loss: 1.5567, Val Loss: 1.4177
Epoch [2/30], Train Loss: 1.3789, Val Loss: 1.2304
Epoch [3/30], Train Loss: 1.2848, Val Loss: 1.1979
Epoch [4/30], Train Loss: 1.2205, Val Loss: 1.1336
Epoch [5/30], Train Loss: 1.1179, Val Loss: 1.1582
Epoch [6/30], Train Loss: 1.1108, Val Loss: 1.0891
Epoch [7/30], Train Loss: 1.0584, Val Loss: 1.0583
Epoch [8/30], Train Loss: 1.0481, Val Loss: 1.0806
Epoch [9/30], Train Loss: 1.0341, Val Loss: 0.9942
Epoch [10/30], Train Loss: 1.0065, Val Loss: 1.0017
Epoch [11/30], Train Loss: 0.9616, Val Loss: 0.9788
Epoch [12/30], Train Loss: 0.9293, Val Loss: 0.9828
Epoch [13/30], Train Loss: 0.9043, Val Loss: 0.9331
Epoch [14/30], Train Loss: 0.9038, Val Loss: 0.9337
Epoch [15/30], Train Loss: 0.8819, Val Loss: 0.9163
Epoch [16/30], Train Loss: 0.8626, Val Loss: 0.9959
Epoch [17/30], Train Loss: 0.8359, Val Loss: 1.0101
Epoch [18/30], Train Loss: 0.7970, Val Loss: 0.9578
Epoch [19/30], Train Loss: 0.7942, Val Loss: 0.8420
Epoch [20/30], Train 

In [None]:
torch.save(model, 'waste_classification_model.pth')
print("Model saved successfully!")

Model saved successfully!


# 載入並測試模型

In [None]:
device = 'cuda'

model.eval()  # 切換到評估模式
correct = 0
total = 0

with torch.no_grad():  # 評估時不計算梯度
    for images, labels in test_dataloader:
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)             # 前向推論，shape=(batch_size, num_classes)
        _, preds = torch.max(outputs, 1)    # 取每列最大值的索引作為預測

        correct += (preds == labels).sum().item()
        total += labels.size(0)

accuracy = correct / total
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Test Accuracy: 83.33%


In [None]:
import os
from PIL import Image
import torch
from torchvision import transforms
from rembg import remove

# 設定測試圖片資料夾
test_image_dir = 'custom_test/'

# 模型輸入所需的轉換（依照你訓練時使用的 transform）
transform = transforms.Compose([
    #transforms.Lambda(lambda img: rotate_and_pad_dynamic(img, angle=random.randint(-10, 10))),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# 載入你訓練好的模型
testmodel = torch.load('waste_classification_model.pth')
testmodel.eval()
testmodel.to('cuda')
# 類別標籤（順序需與訓練時相符）
class_names = train_dataset.classes

# 預測每一張圖片
for filename in os.listdir(test_image_dir):
    if filename.lower().endswith(('.jpg', '.png', '.jpeg')):
        image_path = os.path.join(test_image_dir, filename)
        image = Image.open(image_path).convert('RGB')
        image = remove(image).convert('RGB')
        input_tensor = transform(image).unsqueeze(0)  # 增加 batch dimension
        input_tensor = input_tensor.to('cuda')
        
        with torch.no_grad():
            output = testmodel(input_tensor)
            probabilities = torch.nn.functional.softmax(output, dim=1)
            predicted_class = class_names[output.argmax(1).item()]

        if predicted_class.startswith('aluminum_'):
            print(f"{filename} → 預測為: 鐵鋁罐")
        elif predicted_class.startswith('paper_meal'):
            print(f"{filename} → 預測為: 紙餐盒")
        elif predicted_class.startswith('plastic'):
            print(f"{filename} → 預測為: 寶特瓶")
        elif predicted_class.startswith('paper_c'):
            print(f"{filename} → 預測為: 紙杯")

  testmodel = torch.load('waste_classification_model.pth')


image2.png → 預測為: 紙杯
image3.png → 預測為: 紙餐盒
image4.png → 預測為: 紙杯
image5.png → 預測為: 紙杯
