Loading libraries

In [23]:
import os
import torch
from PIL import Image
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, random_split

Implementing entropy loss 

In [24]:
class PartialCrossEntropyLoss(nn.Module):
    def __init__(self, ignore_index=None):
        super(PartialCrossEntropyLoss, self).__init__()
        self.ignore_index = ignore_index
    
    def forward(self, inputs, targets):
        if self.ignore_index is not None:
            mask = targets != self.ignore_index
            inputs = inputs[mask]
            targets = targets[mask]
        return nn.functional.cross_entropy(inputs, targets)
    

# inputs = torch.randn(3, 5, requires_grad=True)  # Batch of 3, 5 classes
# targets = torch.tensor([1, 0, 4])  # Corresponding targets
# ignore_index = -1  # Example of an index to ignore

# criterion = PartialCrossEntropyLoss(ignore_index=ignore_index)
# loss = criterion(inputs, targets)
# print('Loss:', loss.item())

Using my dataset

In [25]:
class RoadDataset(Dataset):
    def __init__(self, image_dir, centerline_dir, label_dir, transform=None):
        self.image_dir = image_dir
        self.centerline_dir = centerline_dir
        self.label_dir = label_dir
        self.transform = transform
        self.image_names = sorted(os.listdir(image_dir))  # This will list files like 'image160.bmp'

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        image_filename = self.image_names[idx]
        base_name = image_filename.replace('image', '').replace('.bmp', '')
        
        image_path = os.path.join(self.image_dir, f'image{base_name}.bmp')
        centerline_path = os.path.join(self.centerline_dir, f'new_line{base_name}.bmp')
        label_path = os.path.join(self.label_dir, f'{base_name}.bmp')

        image = Image.open(image_path).convert('RGB')
        centerline = Image.open(centerline_path).convert('RGB')
        label = Image.open(label_path).convert('L')

        if self.transform:
            image = self.transform(image)
            centerline = self.transform(centerline)
            label = self.transform(label)

        return image, centerline, label

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

# dataset = RoadDataset('./Train/image', './Train/centerline', './Train/label', transform=transform)


Using CNN

In [26]:
class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1):
        super(UNet, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.decoder = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, out_channels, kernel_size=3, padding=1),
            nn.Sigmoid()
        )
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

    def forward(self, x):
        x1 = self.encoder(x)
        x = self.decoder(x1)
        x = self.upsample(x)
        return x
    
# model = UNet(in_channels=3, out_channels=1)


Training

In [27]:
image_dir = './Train/image'
centerline_dir = './Train/centerline'
label_dir = './Train/label'

dataset = RoadDataset(image_dir, centerline_dir, label_dir, transform=transform)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
learning_rates = [0.01, 0.001, 0.0001]
batch_sizes = [4, 8, 16]

for lr in learning_rates:
    for batch_size in batch_sizes:
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

        model = UNet(in_channels=3, out_channels=1)
        criterion = PartialCrossEntropyLoss(ignore_index=-1)
        optimizer = optim.Adam(model.parameters(), lr=lr)

        # 5 epochs
        for epoch in range(5):
            model.train()
            for inputs, _, targets in train_loader:
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
            print(f'Epoch {epoch+1}, Loss: {loss.item()}')

        model.eval()
        with torch.no_grad():
            for inputs, _, targets in test_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                print(f'Test Loss with LR={lr}, Batch Size={batch_size}: {loss.item()}')

Epoch 1, Loss: 354501.125
Epoch 2, Loss: 305896.65625
Epoch 3, Loss: 371656.375
Epoch 4, Loss: 339111.0625
Epoch 5, Loss: 333065.09375
Test Loss with LR=0.01, Batch Size=4: 277759.53125
Test Loss with LR=0.01, Batch Size=4: 261847.53125
Test Loss with LR=0.01, Batch Size=4: 255384.390625
Test Loss with LR=0.01, Batch Size=4: 297801.21875
Test Loss with LR=0.01, Batch Size=4: 417157.46875
Test Loss with LR=0.01, Batch Size=4: 389102.78125
Test Loss with LR=0.01, Batch Size=4: 442484.3125
Test Loss with LR=0.01, Batch Size=4: 233657.8125
Epoch 1, Loss: 690752.75
Epoch 2, Loss: 714409.5
Epoch 3, Loss: 769842.5
Epoch 4, Loss: 594310.875
Epoch 5, Loss: 604569.125
Test Loss with LR=0.01, Batch Size=8: 569585.25
Test Loss with LR=0.01, Batch Size=8: 583918.125
Test Loss with LR=0.01, Batch Size=8: 851052.5
Test Loss with LR=0.01, Batch Size=8: 713705.5625
Epoch 1, Loss: 1293841.125
Epoch 2, Loss: 1400333.0
Epoch 3, Loss: 1421643.25
Epoch 4, Loss: 1637108.0
Epoch 5, Loss: 1643180.0
Test Loss w

## Techincal report

#### Method:
- I used UNet architecture for road segmentation from satellite images. The UNet model is a convolutional neural network (CNN) commonly used for semantic segmentation tasks due to its ability to capture both global and local features efficiently.

- The UNet architecture consists of an encoder-decoder structure. The encoder extracts features from the input image through a series of convolutional layers followed by max-pooling operations, reducing the spatial dimensions while increasing the depth. The decoder then upsamples the features back to the original resolution through transpose convolutions, allowing the network to generate pixel-wise predictions.

- Additionally, **RoadDataset**, was created to handle the input images, centerline images, and corresponding labels. The dataset class loads the images and labels, applies any specified transformations, and returns them as tensors.

- For training, the **Adam optimizer** was utilized with different learning rates (LR) ranging from *0.01 to 0.0001*. The partial cross-entropy loss function's objective is optimization. This loss function calculates the cross-entropy only for the pixels labeled as road (ignoring background pixels) to mitigate class imbalance issues.

### Experiment:

- #### Purpose:
    - The objective of the experiment was to evaluate the performance of the UNet model for road segmentation on satellite images. The experiment aimed to analyze the effect of different learning rates and batch sizes on the model's training and testing performance.

- #### Hypothesis:
    - It was hypothesized that varying the learning rates and batch sizes would influence the convergence speed and final accuracy of the UNet model. *Higher learning rates might accelerate the training process* but could lead to unstable convergence, while larger batch sizes could provide computational efficiency but might result in degraded performance due to reduced batch diversity.

- #### Experimental Process:
    - The experiment involved training the UNet model for *5 epochs* on a dataset consisting of satellite images, centerline images, and corresponding road segmentation labels. The dataset was divided into training and testing sets with an *80:20 split*. The model was trained using different combinations of **learning rates** *(0.01, 0.001, 0.0001)* and **batch sizes** *(4, 8, 16)*.

### Results:

**Learning Rate (LR) = 0.01:**

- Batch Size = 4: Test Losses ranged from 233657.8125 to 442484.3125.
- Batch Size = 8: Test Losses ranged from 569585.25 to 851052.5.
- Batch Size = 16: Test Losses ranged from 1214214.0 to 1647113.75.

**Learning Rate (LR) = 0.001:**

- Batch Size = 4: Test Losses ranged from 224709.140625 to 432694.09375.
- Batch Size = 8: Test Losses ranged from 555091.125 to 832148.5625.
- Batch Size = 16: Test Losses ranged from 1186513.25 to 1616655.125.

**Learning Rate (LR) = 0.0001:**

- Batch Size = 4: Test Losses ranged from 225144.96875 to 438047.96875.
- Batch Size = 8: Test Losses ranged from 559954.0 to 835905.125.
- Batch Size = 16: Test Losses ranged from 1208766.5 to 1639852.625.

### Conclusion:
The results demonstrate that both learning rate and batch size significantly impact the model's performance. *Lower learning rates generally ensure more stable training and better convergence but require more time*. Higher learning rates can accelerate convergence but may cause instability. Smaller batch sizes introduce more randomness, potentially improving convergence and generalization but can **add noise**. Larger batch sizes enhance **computational efficiency but might limit parameter space exploration**.

Further hyperparameter tuning and exploring alternative optimization techniques could enhance performance and convergence. Adding evaluation metrics like *Intersection over Union (IoU) or F1 score* would offer a more detailed assessment of the model's segmentation accuracy.