## Importing required libraries

In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
import torch.optim as optim
from torch.utils.data import Dataset
import torch.nn.functional as F
from torch.utils.data import DataLoader
import os
from PIL import Image
import numpy as np

In [2]:
class TrainDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.image_dir = os.path.join(self.data_dir, 'leftImg8bit','train','3')
        self.label_dir = os.path.join(self.data_dir, 'label_processed','train','3')
        self.img_ids = os.listdir(self.image_dir)

    def __getitem__(self, index):
        # Load the image and label
        img_id = self.img_ids[index].split('_leftImg8bit.png')[0]
        image_path = os.path.join(self.image_dir, f"{img_id}_leftImg8bit.png")
        label_path = os.path.join(self.label_dir, f"{img_id}_gtFine_polygons.png")
        image = Image.open(image_path).convert('RGB')
        label = Image.open(label_path)

        # Preprocess the image and label
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        image = transform(image)
        label = torch.tensor(np.array(label), dtype=torch.long)

        return image, label

    def __len__(self):
        return len(self.img_ids)


In [3]:
class ValDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.image_dir = os.path.join(self.data_dir, 'leftImg8bit','val','3')
        self.label_dir = os.path.join(self.data_dir, 'label_processed','val','3')
        self.img_ids = os.listdir(self.image_dir)

    def __getitem__(self, index):
        # Load the image and label
        img_id = self.img_ids[index].split('_leftImg8bit.png')[0]
        image_path = os.path.join(self.image_dir, f"{img_id}_leftImg8bit.png")
        label_path = os.path.join(self.label_dir, f"{img_id}_gtFine_polygons.png")
        image = Image.open(image_path).convert('RGB')
        label = Image.open(label_path)

        # Preprocess the image and label
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            
            ])
        image = transform(image)
        label = torch.tensor(np.array(label), dtype=torch.long)

        return image, label

    def __len__(self):
        return len(self.img_ids)

In [4]:
# Define the directories for the custom dataset
curr_dir = os.getcwd()
data_dir = os.path.join(curr_dir, 'data_subset')

# Load the custom dataset
train_dataset = TrainDataset(data_dir)
val_dataset = ValDataset(data_dir)

# Define the dataloaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

In [5]:
for images, labels in train_loader:
    print(images.shape,labels.shape)

torch.Size([16, 3, 720, 1280]) torch.Size([16, 720, 1280])
torch.Size([16, 3, 720, 1280]) torch.Size([16, 720, 1280])
torch.Size([16, 3, 720, 1280]) torch.Size([16, 720, 1280])
torch.Size([16, 3, 720, 1280]) torch.Size([16, 720, 1280])
torch.Size([16, 3, 720, 1280]) torch.Size([16, 720, 1280])
torch.Size([3, 3, 720, 1280]) torch.Size([3, 720, 1280])


In [6]:
# Load the pre-trained SegNet model from torchvision
model = models.segmentation.fcn_resnet50(pretrained=True, progress=True)

# Replace the last layer with a new one to fit the number of classes in the custom dataset
num_classes = 5 # Change this to the number of classes in your custom dataset
model.classifier[4] = nn.Conv2d(512, num_classes, kernel_size=(1, 1), stride=(1, 1))

Downloading: "https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth" to C:\Users\samme/.cache\torch\hub\checkpoints\fcn_resnet50_coco-1167a1af.pth
100%|██████████| 135M/135M [00:04<00:00, 33.3MB/s] 


In [7]:
#define number of epochs 
num_epochs = 10
# Define the device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("using {}".format(device))
model.to(device)

using cpu


FCN(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequenti

In [8]:
# Define the loss function
criterion = nn.CrossEntropyLoss()

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
# Train the model
for epoch in range(num_epochs):
    # Training
    model.train()
    train_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)['out']
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
    train_loss /= len(train_loader.dataset)
    
    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)['out']
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
        val_loss /= len(val_loader.dataset)
        
    # Print the loss for each epoch
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

In [None]:
torch.save(model.state_dict(), 'resnet50_segmentation.pth')