In [None]:
import torch
import torch.nn as nn
from torchvision import models
from torch.nn.functional import relu

class UNet(nn.Module):
    def __init__(self, n_class):
        super().__init__()
        
        # Encoder
        # In the encoder, convolutional layers with the Conv2d function are used to extract features from the input image. 
        # Each block in the encoder consists of two convolutional layers followed by a max-pooling layer, with the exception of the last block which does not include a max-pooling layer.
        # -------
        # input: 572x572x3
        self.e11 = nn.Conv2d(3, 64, kernel_size=3, padding=1) # output: 570x570x64
        self.e12 = nn.Conv2d(64, 64, kernel_size=3, padding=1) # output: 568x568x64
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 284x284x64

        # input: 284x284x64
        self.e21 = nn.Conv2d(64, 128, kernel_size=3, padding=1) # output: 282x282x128
        self.e22 = nn.Conv2d(128, 128, kernel_size=3, padding=1) # output: 280x280x128
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 140x140x128

        # input: 140x140x128
        self.e31 = nn.Conv2d(128, 256, kernel_size=3, padding=1) # output: 138x138x256
        self.e32 = nn.Conv2d(256, 256, kernel_size=3, padding=1) # output: 136x136x256
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 68x68x256

        # input: 68x68x256
        self.e41 = nn.Conv2d(256, 512, kernel_size=3, padding=1) # output: 66x66x512
        self.e42 = nn.Conv2d(512, 512, kernel_size=3, padding=1) # output: 64x64x512
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 32x32x512

        # input: 32x32x512
        self.e51 = nn.Conv2d(512, 1024, kernel_size=3, padding=1) # output: 30x30x1024
        self.e52 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1) # output: 28x28x1024


        # Decoder
        self.upconv1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.d11 = nn.Conv2d(1024, 512, kernel_size=3, padding=1)
        self.d12 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        self.upconv2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.d21 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
        self.d22 = nn.Conv2d(256, 256, kernel_size=3, padding=1)

        self.upconv3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.d31 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
        self.d32 = nn.Conv2d(128, 128, kernel_size=3, padding=1)

        self.upconv4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.d41 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
        self.d42 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        

        # Output layer
        self.outconv = nn.Conv2d(64, n_class, kernel_size=1)

    def forward(self, x):
            # Encoder
            xe11 = relu(self.e11(x))
            xe12 = relu(self.e12(xe11))
            xp1 = self.pool1(xe12)

            xe21 = relu(self.e21(xp1))
            xe22 = relu(self.e22(xe21))
            xp2 = self.pool2(xe22)

            xe31 = relu(self.e31(xp2))
            xe32 = relu(self.e32(xe31))
            xp3 = self.pool3(xe32)

            xe41 = relu(self.e41(xp3))
            xe42 = relu(self.e42(xe41))
            xp4 = self.pool4(xe42)

            xe51 = relu(self.e51(xp4))
            xe52 = relu(self.e52(xe51))
            
            # Decoder
            xu1 = self.upconv1(xe52)
            print('xu1,', xu1.shape)
            print('xe42,', xe42.shape)
            xu11 = torch.cat([xu1, xe42], dim=1)
            xd11 = relu(self.d11(xu11))
            xd12 = relu(self.d12(xd11))

            xu2 = self.upconv2(xd12)
            xu22 = torch.cat([xu2, xe32], dim=1)
            xd21 = relu(self.d21(xu22))
            xd22 = relu(self.d22(xd21))

            xu3 = self.upconv3(xd22)
            xu33 = torch.cat([xu3, xe22], dim=1)
            xd31 = relu(self.d31(xu33))
            xd32 = relu(self.d32(xd31))

            xu4 = self.upconv4(xd32)
            xu44 = torch.cat([xu4, xe12], dim=1)
            xd41 = relu(self.d41(xu44))
            xd42 = relu(self.d42(xd41))

            # Output layer
            out = self.outconv(xd42)

            return out

# Define dataset and dataloader
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, image_folder, mask_folder, transform=None):
        self.image_folder = image_folder
        self.mask_folder = mask_folder
        self.transform = transform
        self.images = os.listdir(image_folder)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, self.images[idx])
        mask_name = os.path.join(self.mask_folder, self.images[idx])  # Assuming mask file names are the same as image file names
        image = Image.open(img_name).convert("RGB")
        mask = Image.open(mask_name).convert("L")  # Convert to grayscale

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return image, mask

# Define data transformations
data_transform = ToTensor()

# Define data paths
image_folder = "/Users/mohammadfaridnaufal/Library/CloudStorage/OneDrive-UniversitasSurabaya/S3/Project/Program/U-Net/train/images"
mask_folder = "/Users/mohammadfaridnaufal/Library/CloudStorage/OneDrive-UniversitasSurabaya/S3/Project/Program/U-Net/train/masks"

# Create dataset and dataloader
dataset = CustomDataset(image_folder, mask_folder, transform=data_transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# Initialize UNet model
model = UNet(n_class=2)  # Set n_class=3 for three classes: background, class 1, class 2

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50

# Training loop
for epoch in range(num_epochs):
    running_loss = 0.0
    for images, masks in dataloader:
        print(images.shape)
        print(masks.shape)
        optimizer.zero_grad()
        outputs = model(images)
        
        # Assuming masks are of type LongTensor
        masks = masks.squeeze(1)  # Remove the channel dimension (1)
        
        loss = criterion(outputs, masks.long())  # Ensure masks are of type long
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(dataloader)}")

Version 2

In [None]:
import torch
import torch.nn as nn
from torchvision import models
from torch.nn.functional import relu
from torchvision import transforms


class UNet(nn.Module):
    def __init__(self, n_class):
        super().__init__()
        
        # Encoder
        # In the encoder, convolutional layers with the Conv2d function are used to extract features from the input image. 
        # Each block in the encoder consists of two convolutional layers followed by a max-pooling layer, with the exception of the last block which does not include a max-pooling layer.
        # -------
        # input: 572x572x3
        self.e11 = nn.Conv2d(3, 64, kernel_size=3, padding=0) # output: 570x570x64
        self.e12 = nn.Conv2d(64, 64, kernel_size=3, padding=0) # output: 568x568x64
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 284x284x64

        # input: 284x284x64
        self.e21 = nn.Conv2d(64, 128, kernel_size=3, padding=0) # output: 282x282x128
        self.e22 = nn.Conv2d(128, 128, kernel_size=3, padding=0) # output: 280x280x128
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 140x140x128

        # input: 140x140x128
        self.e31 = nn.Conv2d(128, 256, kernel_size=3, padding=0) # output: 138x138x256
        self.e32 = nn.Conv2d(256, 256, kernel_size=3, padding=0) # output: 136x136x256
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 68x68x256

        # input: 68x68x256
        self.e41 = nn.Conv2d(256, 512, kernel_size=3, padding=0) # output: 66x66x512
        self.e42 = nn.Conv2d(512, 512, kernel_size=3, padding=0) # output: 64x64x512
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 32x32x512

        # input: 32x32x512
        self.e51 = nn.Conv2d(512, 1024, kernel_size=3, padding=0) # output: 30x30x1024
        self.e52 = nn.Conv2d(1024, 1024, kernel_size=3, padding=0) # output: 28x28x1024


        # Decoder
        self.upconv1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.d11 = nn.Conv2d(1024, 512, kernel_size=3, padding=1)
        self.d12 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        self.upconv2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.d21 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
        self.d22 = nn.Conv2d(256, 256, kernel_size=3, padding=1)

        self.upconv3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.d31 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
        self.d32 = nn.Conv2d(128, 128, kernel_size=3, padding=1)

        self.upconv4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.d41 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
        self.d42 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        

        # Output layer
        self.outconv = nn.Conv2d(64, n_class, kernel_size=1)

    def forward(self, x):
            # Encoder
            xe11 = relu(self.e11(x))
            xe12 = relu(self.e12(xe11))
            xp1 = self.pool1(xe12)

            xe21 = relu(self.e21(xp1))
            xe22 = relu(self.e22(xe21))
            xp2 = self.pool2(xe22)

            xe31 = relu(self.e31(xp2))
            xe32 = relu(self.e32(xe31))
            xp3 = self.pool3(xe32)

            xe41 = relu(self.e41(xp3))
            xe42 = relu(self.e42(xe41))
            xp4 = self.pool4(xe42)

            xe51 = relu(self.e51(xp4))
            xe52 = relu(self.e52(xe51))
            
            # Decoder
            xu1 = self.upconv1(xe52)
            print('xu1,', xu1.shape)
            print('xe42,', xe42.shape)
            xu11 = torch.cat([xu1, xe42], dim=1)
            xd11 = relu(self.d11(xu11))
            xd12 = relu(self.d12(xd11))

            xu2 = self.upconv2(xd12)
            xu22 = torch.cat([xu2, xe32], dim=1)
            xd21 = relu(self.d21(xu22))
            xd22 = relu(self.d22(xd21))

            xu3 = self.upconv3(xd22)
            xu33 = torch.cat([xu3, xe22], dim=1)
            xd31 = relu(self.d31(xu33))
            xd32 = relu(self.d32(xd31))

            xu4 = self.upconv4(xd32)
            xu44 = torch.cat([xu4, xe12], dim=1)
            xd41 = relu(self.d41(xu44))
            xd42 = relu(self.d42(xd41))

            # Output layer
            out = self.outconv(xd42)

            return out

# Define dataset and dataloader
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, image_folder, mask_folder, transform=None):
        self.image_folder = image_folder
        self.mask_folder = mask_folder
        self.transform = transform
        self.images = os.listdir(image_folder)
        self.resize = transforms.Resize((572, 572))  # Resize images to 572x572

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, self.images[idx])
        mask_name = os.path.join(self.mask_folder, self.images[idx])  # Assuming mask file names are the same as image file names
        image = Image.open(img_name).convert("RGB")
        mask = Image.open(mask_name).convert("L")  # Convert to grayscale

        # Resize image and mask
        image = self.resize(image)
        mask = self.resize(mask)

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return image, mask

# Define data transformations
data_transform = ToTensor()

# Define data paths
image_folder = "/Users/mohammadfaridnaufal/Library/CloudStorage/OneDrive-UniversitasSurabaya/S3/Project/Program/U-Net/train/images"
mask_folder = "/Users/mohammadfaridnaufal/Library/CloudStorage/OneDrive-UniversitasSurabaya/S3/Project/Program/U-Net/train/masks"

# Create dataset and dataloader
dataset = CustomDataset(image_folder, mask_folder, transform=data_transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# Initialize UNet model
model = UNet(n_class=2)  # Set n_class=3 for three classes: background, class 1, class 2

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50

# Training loop
for epoch in range(num_epochs):
    running_loss = 0.0
    for images, masks in dataloader:
        print(images.shape)
        print(masks.shape)
        optimizer.zero_grad()
        outputs = model(images)
        
        # Assuming masks are of type LongTensor
        masks = masks.squeeze(1)  # Remove the channel dimension (1)
        
        loss = criterion(outputs, masks.long())  # Ensure masks are of type long
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(dataloader)}")

Version 3
Sudah bisa crop dari hasil encode untuk skip connnection

In [None]:
import torch
import torch.nn as nn
from torchvision import models
from torch.nn.functional import relu
from torchvision import transforms


class UNet(nn.Module):
    def __init__(self, n_class):
        super().__init__()
        
        # Encoder
        # In the encoder, convolutional layers with the Conv2d function are used to extract features from the input image. 
        # Each block in the encoder consists of two convolutional layers followed by a max-pooling layer, with the exception of the last block which does not include a max-pooling layer.
        # -------
        # input: 572x572x3
        self.e11 = nn.Conv2d(3, 64, kernel_size=3, padding=0) # output: 570x570x64
        self.e12 = nn.Conv2d(64, 64, kernel_size=3, padding=0) # output: 568x568x64
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 284x284x64

        # input: 284x284x64
        self.e21 = nn.Conv2d(64, 128, kernel_size=3, padding=0) # output: 282x282x128
        self.e22 = nn.Conv2d(128, 128, kernel_size=3, padding=0) # output: 280x280x128
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 140x140x128

        # input: 140x140x128
        self.e31 = nn.Conv2d(128, 256, kernel_size=3, padding=0) # output: 138x138x256
        self.e32 = nn.Conv2d(256, 256, kernel_size=3, padding=0) # output: 136x136x256
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 68x68x256

        # input: 68x68x256
        self.e41 = nn.Conv2d(256, 512, kernel_size=3, padding=0) # output: 66x66x512
        self.e42 = nn.Conv2d(512, 512, kernel_size=3, padding=0) # output: 64x64x512
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 32x32x512

        # input: 32x32x512
        self.e51 = nn.Conv2d(512, 1024, kernel_size=3, padding=0) # output: 30x30x1024
        self.e52 = nn.Conv2d(1024, 1024, kernel_size=3, padding=0) # output: 28x28x1024


        # Decoder
        self.upconv1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.d11 = nn.Conv2d(1024, 512, kernel_size=3, padding=1)
        self.d12 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        self.upconv2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.d21 = nn.Conv2d(512, 256, kernel_size=3, padding=1)
        self.d22 = nn.Conv2d(256, 256, kernel_size=3, padding=1)

        self.upconv3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.d31 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
        self.d32 = nn.Conv2d(128, 128, kernel_size=3, padding=1)

        self.upconv4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.d41 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
        self.d42 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        

        # Output layer
        self.outconv = nn.Conv2d(64, n_class, kernel_size=1)

    def forward(self, x):
            # Encoder
            xe11 = relu(self.e11(x))
            xe12 = relu(self.e12(xe11))
            xp1 = self.pool1(xe12)

            xe21 = relu(self.e21(xp1))
            xe22 = relu(self.e22(xe21))
            xp2 = self.pool2(xe22)

            xe31 = relu(self.e31(xp2))
            xe32 = relu(self.e32(xe31))
            xp3 = self.pool3(xe32)

            xe41 = relu(self.e41(xp3))
            xe42 = relu(self.e42(xe41))
            xp4 = self.pool4(xe42)

            xe51 = relu(self.e51(xp4))
            xe52 = relu(self.e52(xe51))
            
            # Decoder
            xu1 = self.upconv1(xe52)
            crop_size = (xe42.size()[2] - xu1.size()[2]) // 2
            xe42_crop = xe42[:, :, crop_size:crop_size + xu1.size()[2], crop_size:crop_size + xu1.size()[2]]
            print('xu1,', xu1.shape)
            print('xe42,', xe42_crop.shape)
            xu11 = torch.cat([xu1, xe42_crop], dim=1)
            xd11 = relu(self.d11(xu11))
            xd12 = relu(self.d12(xd11))

            xu2 = self.upconv2(xd12)
            crop_size = (xe32.size()[2] - xu2.size()[2]) // 2
            xe32_crop = xe32[:, :, crop_size:crop_size + xu2.size()[2], crop_size:crop_size + xu2.size()[2]]
            xu22 = torch.cat([xu2, xe32_crop], dim=1)
            xd21 = relu(self.d21(xu22))
            xd22 = relu(self.d22(xd21))

            xu3 = self.upconv3(xd22)
            crop_size = (xe22.size()[2] - xu3.size()[2]) // 2
            xe22_crop = xe22[:, :, crop_size:crop_size + xu3.size()[2], crop_size:crop_size + xu3.size()[2]]
            xu33 = torch.cat([xu3, xe22_crop], dim=1)
            xd31 = relu(self.d31(xu33))
            xd32 = relu(self.d32(xd31))

            xu4 = self.upconv4(xd32)
            crop_size = (xe12.size()[2] - xu4.size()[2]) // 2
            xe12_crop = xe12[:, :, crop_size:crop_size + xu4.size()[2], crop_size:crop_size + xu4.size()[2]]
            xu44 = torch.cat([xu4, xe12_crop], dim=1)
            xd41 = relu(self.d41(xu44))
            xd42 = relu(self.d42(xd41))

            # Output layer
            out = self.outconv(xd42)

            return out

# Define dataset and dataloader
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, image_folder, mask_folder, transform=None):
        self.image_folder = image_folder
        self.mask_folder = mask_folder
        self.transform = transform
        self.images = os.listdir(image_folder)
        self.resize = transforms.Resize((572, 572))  # Resize images to 572x572

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, self.images[idx])
        mask_name = os.path.join(self.mask_folder, self.images[idx])  # Assuming mask file names are the same as image file names
        image = Image.open(img_name).convert("RGB")
        mask = Image.open(mask_name).convert("L")  # Convert to grayscale

        # Resize image and mask
        image = self.resize(image)
        mask = self.resize(mask)

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return image, mask

# Define data transformations
data_transform = ToTensor()

# Define data paths
image_folder = "/Users/mohammadfaridnaufal/Library/CloudStorage/OneDrive-UniversitasSurabaya/S3/Project/Program/U-Net/train/images"
mask_folder = "/Users/mohammadfaridnaufal/Library/CloudStorage/OneDrive-UniversitasSurabaya/S3/Project/Program/U-Net/train/masks"

# Create dataset and dataloader
dataset = CustomDataset(image_folder, mask_folder, transform=data_transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# Initialize UNet model
model = UNet(n_class=3)  # Set n_class=3 for three classes: background, class 1, class 2

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50

# Training loop
for epoch in range(num_epochs):
    running_loss = 0.0
    for images, masks in dataloader:
        print(images.shape)
        print(masks.shape)
        optimizer.zero_grad()
        outputs = model(images)
        
        # Assuming masks are of type LongTensor
        masks = masks.squeeze(1)  # Remove the channel dimension (1)
        
        loss = criterion(outputs, masks.long())  # Ensure masks are of type long
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(dataloader)}")

Version 4

In [None]:
import torch
import torch.nn as nn
from torchvision import models
from torch.nn.functional import relu
from torchvision import transforms
from torchvision.transforms import ToTensor
import os
from torch.utils.data import DataLoader
import torch.optim as optim
from PIL import Image


class UNet(nn.Module):
    def __init__(self, n_class):
        super().__init__()
        
        # Encoder
        # In the encoder, convolutional layers with the Conv2d function are used to extract features from the input image. 
        # Each block in the encoder consists of two convolutional layers followed by a max-pooling layer, with the exception of the last block which does not include a max-pooling layer.
        # -------
        # input: 572x572x3
        self.e11 = nn.Conv2d(3, 64, kernel_size=3, padding=0) # output: 570x570x64
        self.e12 = nn.Conv2d(64, 64, kernel_size=3, padding=0) # output: 568x568x64
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 284x284x64

        # input: 284x284x64
        self.e21 = nn.Conv2d(64, 128, kernel_size=3, padding=0) # output: 282x282x128
        self.e22 = nn.Conv2d(128, 128, kernel_size=3, padding=0) # output: 280x280x128
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 140x140x128

        # input: 140x140x128
        self.e31 = nn.Conv2d(128, 256, kernel_size=3, padding=0) # output: 138x138x256
        self.e32 = nn.Conv2d(256, 256, kernel_size=3, padding=0) # output: 136x136x256
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 68x68x256

        # input: 68x68x256
        self.e41 = nn.Conv2d(256, 512, kernel_size=3, padding=0) # output: 66x66x512
        self.e42 = nn.Conv2d(512, 512, kernel_size=3, padding=0) # output: 64x64x512
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 32x32x512

        # input: 32x32x512
        self.e51 = nn.Conv2d(512, 1024, kernel_size=3, padding=0) # output: 30x30x1024
        self.e52 = nn.Conv2d(1024, 1024, kernel_size=3, padding=0) # output: 28x28x1024


        # Decoder
        self.upconv1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.d11 = nn.Conv2d(1024, 512, kernel_size=3, padding=0)
        self.d12 = nn.Conv2d(512, 512, kernel_size=3, padding=0)

        self.upconv2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.d21 = nn.Conv2d(512, 256, kernel_size=3, padding=0)
        self.d22 = nn.Conv2d(256, 256, kernel_size=3, padding=0)

        self.upconv3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.d31 = nn.Conv2d(256, 128, kernel_size=3, padding=0)
        self.d32 = nn.Conv2d(128, 128, kernel_size=3, padding=0)

        self.upconv4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.d41 = nn.Conv2d(128, 64, kernel_size=3, padding=0)
        self.d42 = nn.Conv2d(64, 64, kernel_size=3, padding=0)
        

        # Output layer
        self.outconv = nn.Conv2d(64, n_class, kernel_size=1)

    def forward(self, x):
        # Encoder
        xe11 = relu(self.e11(x))
        xe12 = relu(self.e12(xe11))
        xp1 = self.pool1(xe12)

        xe21 = relu(self.e21(xp1))
        xe22 = relu(self.e22(xe21))
        xp2 = self.pool2(xe22)

        xe31 = relu(self.e31(xp2))
        xe32 = relu(self.e32(xe31))
        xp3 = self.pool3(xe32)

        xe41 = relu(self.e41(xp3))
        xe42 = relu(self.e42(xe41))
        xp4 = self.pool4(xe42)

        xe51 = relu(self.e51(xp4))
        xe52 = relu(self.e52(xe51))

        # Decoder
        xu1 = self.upconv1(xe52)
        xe42_resized = nn.functional.interpolate(xe42, size=xu1.size()[2:], mode='bilinear', align_corners=True)
        print('xu1,', xu1.shape)
        print('xe42,', xe42_resized.shape)
        xu11 = torch.cat([xu1, xe42_resized], dim=1)
        xd11 = relu(self.d11(xu11))
        xd12 = relu(self.d12(xd11))

        xu2 = self.upconv2(xd12)
        xe32_resized = nn.functional.interpolate(xe32, size=xu2.size()[2:], mode='bilinear', align_corners=True)
        print('xu2,', xu2.shape)
        print('xe32,', xe32_resized.shape)
        xu22 = torch.cat([xu2, xe32_resized], dim=1)
        xd21 = relu(self.d21(xu22))
        xd22 = relu(self.d22(xd21))

        xu3 = self.upconv3(xd22)
        xe22_resized = nn.functional.interpolate(xe22, size=xu3.size()[2:], mode='bilinear', align_corners=True)
        print('xu3,', xu3.shape)
        print('xe22,', xe22_resized.shape)
        xu33 = torch.cat([xu3, xe22_resized], dim=1)
        xd31 = relu(self.d31(xu33))
        xd32 = relu(self.d32(xd31))

        xu4 = self.upconv4(xd32)
        xe12_resized = nn.functional.interpolate(xe12, size=xu4.size()[2:], mode='bilinear', align_corners=True)
        print('xu4,', xu4.shape)
        print('xe12,', xe12_resized.shape)
        xu44 = torch.cat([xu4, xe12_resized], dim=1)
        xd41 = relu(self.d41(xu44))
        xd42 = relu(self.d42(xd41))

        # Output layer
        out = self.outconv(xd42)
        out_resized = nn.functional.interpolate(out, size=(572, 572), mode='bilinear', align_corners=True)

        return out_resized

# Define dataset and dataloader
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, image_folder, mask_folder, transform=None):
        self.image_folder = image_folder
        self.mask_folder = mask_folder
        self.transform = transform
        self.images = os.listdir(image_folder)
        self.resize = transforms.Resize((572, 572))  # Resize images to 572x572

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, self.images[idx])
        mask_name = os.path.join(self.mask_folder, self.images[idx])  # Assuming mask file names are the same as image file names
        image = Image.open(img_name).convert("RGB")
        mask = Image.open(mask_name).convert("L")  # Convert to grayscale

        # Resize image and mask
        image = self.resize(image)
        mask = self.resize(mask)

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return image, mask

# Define data transformations
data_transform = ToTensor()

# Define data paths
image_folder = "/Users/mohammadfaridnaufal/Library/CloudStorage/OneDrive-UniversitasSurabaya/S3/Project/Program/U-Net/train/images"
mask_folder = "/Users/mohammadfaridnaufal/Library/CloudStorage/OneDrive-UniversitasSurabaya/S3/Project/Program/U-Net/train/masks"

# Create dataset and dataloader
dataset = CustomDataset(image_folder, mask_folder, transform=data_transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# Initialize UNet model
model = UNet(n_class=3)  # Set n_class=3 for three classes: background, class 1, class 2

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50

# Training loop
for epoch in range(num_epochs):
    print('epoch', epoch)
    running_loss = 0.0
    for images, masks in dataloader:
        print(images.shape)
        print(masks.shape)
        optimizer.zero_grad()
        outputs = model(images)
        
        # Assuming masks are of type LongTensor
        masks = masks.squeeze(1)  # Remove the channel dimension (1)
        
        loss = criterion(outputs, masks.long())  # Ensure masks are of type long
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(dataloader)}")

Version 5

In [3]:
import torch
import torch.nn as nn
from torchvision import models
from torch.nn.functional import relu
from torchvision import transforms
from torchvision.transforms import ToTensor
import os
from torch.utils.data import DataLoader
import torch.optim as optim
from PIL import Image

class UNet(nn.Module):
    def __init__(self, n_class):
        super().__init__()
        
        # Encoder
        # In the encoder, convolutional layers with the Conv2d function are used to extract features from the input image. 
        # Each block in the encoder consists of two convolutional layers followed by a max-pooling layer, with the exception of the last block which does not include a max-pooling layer.
        # -------
        # input: 572x572x3
        self.e11 = nn.Conv2d(3, 64, kernel_size=3, padding=0) # output: 570x570x64
        self.e12 = nn.Conv2d(64, 64, kernel_size=3, padding=0) # output: 568x568x64
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 284x284x64

        # input: 284x284x64
        self.e21 = nn.Conv2d(64, 128, kernel_size=3, padding=0) # output: 282x282x128
        self.e22 = nn.Conv2d(128, 128, kernel_size=3, padding=0) # output: 280x280x128
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 140x140x128

        # input: 140x140x128
        self.e31 = nn.Conv2d(128, 256, kernel_size=3, padding=0) # output: 138x138x256
        self.e32 = nn.Conv2d(256, 256, kernel_size=3, padding=0) # output: 136x136x256
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 68x68x256

        # input: 68x68x256
        self.e41 = nn.Conv2d(256, 512, kernel_size=3, padding=0) # output: 66x66x512
        self.e42 = nn.Conv2d(512, 512, kernel_size=3, padding=0) # output: 64x64x512
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 32x32x512

        # input: 32x32x512
        self.e51 = nn.Conv2d(512, 1024, kernel_size=3, padding=0) # output: 30x30x1024
        self.e52 = nn.Conv2d(1024, 1024, kernel_size=3, padding=0) # output: 28x28x1024


        # Decoder
        self.upconv1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.d11 = nn.Conv2d(1024, 512, kernel_size=3, padding=0)
        self.d12 = nn.Conv2d(512, 512, kernel_size=3, padding=0)

        self.upconv2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.d21 = nn.Conv2d(512, 256, kernel_size=3, padding=0)
        self.d22 = nn.Conv2d(256, 256, kernel_size=3, padding=0)

        self.upconv3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.d31 = nn.Conv2d(256, 128, kernel_size=3, padding=0)
        self.d32 = nn.Conv2d(128, 128, kernel_size=3, padding=0)

        self.upconv4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.d41 = nn.Conv2d(128, 64, kernel_size=3, padding=0)
        self.d42 = nn.Conv2d(64, 64, kernel_size=3, padding=0)
        

        # Output layer
        self.outconv = nn.Conv2d(64, n_class, kernel_size=1)

    def forward(self, x):
        # Encoder
        xe11 = relu(self.e11(x))
        xe12 = relu(self.e12(xe11))
        xp1 = self.pool1(xe12)

        xe21 = relu(self.e21(xp1))
        xe22 = relu(self.e22(xe21))
        xp2 = self.pool2(xe22)

        xe31 = relu(self.e31(xp2))
        xe32 = relu(self.e32(xe31))
        xp3 = self.pool3(xe32)

        xe41 = relu(self.e41(xp3))
        xe42 = relu(self.e42(xe41))
        xp4 = self.pool4(xe42)

        xe51 = relu(self.e51(xp4))
        xe52 = relu(self.e52(xe51))

        # Decoder
        xu1 = self.upconv1(xe52)
        xe42_resized = nn.functional.interpolate(xe42, size=xu1.size()[2:], mode='bilinear', align_corners=True)
        # print('xu1,', xu1.shape)
        # print('xe42,', xe42_resized.shape)
        xu11 = torch.cat([xu1, xe42_resized], dim=1)
        xd11 = relu(self.d11(xu11))
        xd12 = relu(self.d12(xd11))

        xu2 = self.upconv2(xd12)
        xe32_resized = nn.functional.interpolate(xe32, size=xu2.size()[2:], mode='bilinear', align_corners=True)
        # print('xu2,', xu2.shape)
        # print('xe32,', xe32_resized.shape)
        xu22 = torch.cat([xu2, xe32_resized], dim=1)
        xd21 = relu(self.d21(xu22))
        xd22 = relu(self.d22(xd21))

        xu3 = self.upconv3(xd22)
        xe22_resized = nn.functional.interpolate(xe22, size=xu3.size()[2:], mode='bilinear', align_corners=True)
        # print('xu3,', xu3.shape)
        # print('xe22,', xe22_resized.shape)
        xu33 = torch.cat([xu3, xe22_resized], dim=1)
        xd31 = relu(self.d31(xu33))
        xd32 = relu(self.d32(xd31))

        xu4 = self.upconv4(xd32)
        xe12_resized = nn.functional.interpolate(xe12, size=xu4.size()[2:], mode='bilinear', align_corners=True)
        # print('xu4,', xu4.shape)
        # print('xe12,', xe12_resized.shape)
        xu44 = torch.cat([xu4, xe12_resized], dim=1)
        xd41 = relu(self.d41(xu44))
        xd42 = relu(self.d42(xd41))

        # Output layer
        out = self.outconv(xd42)
        out_resized = nn.functional.interpolate(out, size=(572, 572), mode='bilinear', align_corners=True)

        return out_resized

# Define dataset and dataloader
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, image_folder, mask_folder, transform=None):
        self.image_folder = image_folder
        self.mask_folder = mask_folder
        self.transform = transform
        self.images = os.listdir(image_folder)
        self.resize = transforms.Resize((572, 572))  # Resize images to 572x572

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, self.images[idx])
        mask_name = os.path.join(self.mask_folder, self.images[idx])  # Assuming mask file names are the same as image file names
        image = Image.open(img_name).convert("RGB")
        mask = Image.open(mask_name).convert("L")  # Convert to grayscale

        # Resize image and mask
        image = self.resize(image)
        mask = self.resize(mask)

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return image, mask

# Define data transformations
data_transform = ToTensor()

# Define data paths
image_folder = "/Users/mohammadfaridnaufal/Library/CloudStorage/OneDrive-UniversitasSurabaya/S3/Project/Program/U-Net/train/images"
mask_folder = "/Users/mohammadfaridnaufal/Library/CloudStorage/OneDrive-UniversitasSurabaya/S3/Project/Program/U-Net/train/masks"

# Create dataset and dataloader
dataset = CustomDataset(image_folder, mask_folder, transform=data_transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# Initialize UNet model
model = UNet(n_class=3)  # Set n_class=3 for three classes: background, class 1, class 2

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50

best_loss = float('inf')  # Initialize best loss with infinity

for epoch in range(num_epochs):
    running_loss = 0.0
    total_iou = 0.0
    class_count = torch.zeros(3)  # Count of each class

    print("epoch", epoch)
    for images, masks in dataloader:
        optimizer.zero_grad()
        outputs = model(images)
        
        # Assuming masks are of type LongTensor
        masks = masks.squeeze(1)  # Remove the channel dimension (1)
        
        loss = criterion(outputs, masks.long())  # Ensure masks are of type long
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

        # Calculate IOU
        predictions = torch.argmax(outputs, dim=1)

        # Calculate intersection and union for each class
        intersection = torch.zeros(3).to(predictions.device)
        union = torch.zeros(3).to(predictions.device)

        for i in range(3):
            intersection[i] += ((predictions == i) & (masks == i)).sum().float()
            union[i] += ((predictions == i) | (masks == i)).sum().float()

        iou = (intersection + 1e-6) / (union + 1e-6)  # Add a small value to avoid division by zero
        total_iou += iou.mean().item()

    # Calculate average loss and IOU
    average_loss = running_loss / len(dataloader)
    average_iou = total_iou / len(dataloader)


    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss}, IOU: {average_iou}")

    # Save the model if the current loss is better than the best loss
    if average_loss < best_loss:
        best_loss = average_loss
        # torch.save(model.state_dict(), 'best_model.pth')

epoch 0


Version 6 with CUDA Enabled

In [None]:
import torch
import torch.nn as nn
from torchvision import models
from torch.nn.functional import relu
from torchvision import transforms
from torchvision.transforms import ToTensor
import os
from torch.utils.data import DataLoader
import torch.optim as optim
from PIL import Image

class UNet(nn.Module):
    def __init__(self, n_class):
        super().__init__()
        
        # Encoder
        # In the encoder, convolutional layers with the Conv2d function are used to extract features from the input image. 
        # Each block in the encoder consists of two convolutional layers followed by a max-pooling layer, with the exception of the last block which does not include a max-pooling layer.
        # -------
        # input: 572x572x3
        self.e11 = nn.Conv2d(3, 64, kernel_size=3, padding=0) # output: 570x570x64
        self.e12 = nn.Conv2d(64, 64, kernel_size=3, padding=0) # output: 568x568x64
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 284x284x64

        # input: 284x284x64
        self.e21 = nn.Conv2d(64, 128, kernel_size=3, padding=0) # output: 282x282x128
        self.e22 = nn.Conv2d(128, 128, kernel_size=3, padding=0) # output: 280x280x128
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 140x140x128

        # input: 140x140x128
        self.e31 = nn.Conv2d(128, 256, kernel_size=3, padding=0) # output: 138x138x256
        self.e32 = nn.Conv2d(256, 256, kernel_size=3, padding=0) # output: 136x136x256
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 68x68x256

        # input: 68x68x256
        self.e41 = nn.Conv2d(256, 512, kernel_size=3, padding=0) # output: 66x66x512
        self.e42 = nn.Conv2d(512, 512, kernel_size=3, padding=0) # output: 64x64x512
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 32x32x512

        # input: 32x32x512
        self.e51 = nn.Conv2d(512, 1024, kernel_size=3, padding=0) # output: 30x30x1024
        self.e52 = nn.Conv2d(1024, 1024, kernel_size=3, padding=0) # output: 28x28x1024


        # Decoder
        self.upconv1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.d11 = nn.Conv2d(1024, 512, kernel_size=3, padding=0)
        self.d12 = nn.Conv2d(512, 512, kernel_size=3, padding=0)

        self.upconv2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.d21 = nn.Conv2d(512, 256, kernel_size=3, padding=0)
        self.d22 = nn.Conv2d(256, 256, kernel_size=3, padding=0)

        self.upconv3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.d31 = nn.Conv2d(256, 128, kernel_size=3, padding=0)
        self.d32 = nn.Conv2d(128, 128, kernel_size=3, padding=0)

        self.upconv4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.d41 = nn.Conv2d(128, 64, kernel_size=3, padding=0)
        self.d42 = nn.Conv2d(64, 64, kernel_size=3, padding=0)
        

        # Output layer
        self.outconv = nn.Conv2d(64, n_class, kernel_size=1)

    def forward(self, x):
        # Encoder
        xe11 = relu(self.e11(x))
        xe12 = relu(self.e12(xe11))
        xp1 = self.pool1(xe12)

        xe21 = relu(self.e21(xp1))
        xe22 = relu(self.e22(xe21))
        xp2 = self.pool2(xe22)

        xe31 = relu(self.e31(xp2))
        xe32 = relu(self.e32(xe31))
        xp3 = self.pool3(xe32)

        xe41 = relu(self.e41(xp3))
        xe42 = relu(self.e42(xe41))
        xp4 = self.pool4(xe42)

        xe51 = relu(self.e51(xp4))
        xe52 = relu(self.e52(xe51))

        # Decoder
        xu1 = self.upconv1(xe52)
        xe42_resized = nn.functional.interpolate(xe42, size=xu1.size()[2:], mode='bilinear', align_corners=True)
        # print('xu1,', xu1.shape)
        # print('xe42,', xe42_resized.shape)
        xu11 = torch.cat([xu1, xe42_resized], dim=1)
        xd11 = relu(self.d11(xu11))
        xd12 = relu(self.d12(xd11))

        xu2 = self.upconv2(xd12)
        xe32_resized = nn.functional.interpolate(xe32, size=xu2.size()[2:], mode='bilinear', align_corners=True)
        # print('xu2,', xu2.shape)
        # print('xe32,', xe32_resized.shape)
        xu22 = torch.cat([xu2, xe32_resized], dim=1)
        xd21 = relu(self.d21(xu22))
        xd22 = relu(self.d22(xd21))

        xu3 = self.upconv3(xd22)
        xe22_resized = nn.functional.interpolate(xe22, size=xu3.size()[2:], mode='bilinear', align_corners=True)
        # print('xu3,', xu3.shape)
        # print('xe22,', xe22_resized.shape)
        xu33 = torch.cat([xu3, xe22_resized], dim=1)
        xd31 = relu(self.d31(xu33))
        xd32 = relu(self.d32(xd31))

        xu4 = self.upconv4(xd32)
        xe12_resized = nn.functional.interpolate(xe12, size=xu4.size()[2:], mode='bilinear', align_corners=True)
        # print('xu4,', xu4.shape)
        # print('xe12,', xe12_resized.shape)
        xu44 = torch.cat([xu4, xe12_resized], dim=1)
        xd41 = relu(self.d41(xu44))
        xd42 = relu(self.d42(xd41))

        # Output layer
        out = self.outconv(xd42)
        out_resized = nn.functional.interpolate(out, size=(572, 572), mode='bilinear', align_corners=True)

        return out_resized

# Define dataset and dataloader
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, image_folder, mask_folder, transform=None):
        self.image_folder = image_folder
        self.mask_folder = mask_folder
        self.transform = transform
        self.images = os.listdir(image_folder)
        self.resize = transforms.Resize((572, 572))  # Resize images to 572x572

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, self.images[idx])
        mask_name = os.path.join(self.mask_folder, self.images[idx])  # Assuming mask file names are the same as image file names
        image = Image.open(img_name).convert("RGB")
        mask = Image.open(mask_name).convert("L")  # Convert to grayscale

        # Resize image and mask
        image = self.resize(image)
        mask = self.resize(mask)

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return image, mask

# Define data transformations
data_transform = ToTensor()

# Define data paths
image_folder = r"C:\Users\Admin\Documents\Naufal\Program\U-Net\train\images"
mask_folder = r"C:\Users\Admin\Documents\Naufal\Program\U-Net\train\masks"

# Create dataset and dataloader
dataset = CustomDataset(image_folder, mask_folder, transform=data_transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)


# Initialize UNet model and move it to GPU
# Move model to CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = UNet(n_class=3).to(device)


# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50

best_loss = float('inf')  # Initialize best loss with infinity

for epoch in range(num_epochs):
    running_loss = 0.0
    total_iou = 0.0
    class_count = torch.zeros(3)  # Count of each class

    print("epoch", epoch)
    for images, masks in dataloader:
        # Move data to CUDA
        images, masks = images.to(device), masks.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        
        # Assuming masks are of type LongTensor
        masks = masks.squeeze(1)  # Remove the channel dimension (1)
        
        loss = criterion(outputs, masks.long())  # Ensure masks are of type long
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

        # Calculate IOU
        predictions = torch.argmax(outputs, dim=1)

        # Calculate intersection and union for each class
        intersection = torch.zeros(3).to(predictions.device)
        union = torch.zeros(3).to(predictions.device)

        for i in range(3):
            intersection[i] += ((predictions == i) & (masks == i)).sum().float()
            union[i] += ((predictions == i) | (masks == i)).sum().float()

        iou = (intersection + 1e-6) / (union + 1e-6)  # Add a small value to avoid division by zero
        total_iou += iou.mean().item()

    # Calculate average loss and IOU
    average_loss = running_loss / len(dataloader)
    average_iou = total_iou / len(dataloader)


    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss}, IOU: {average_iou}")

    # Save the model if the current loss is better than the best loss
    if average_loss < best_loss:
        best_loss = average_loss
        # torch.save(model.state_dict(), 'best_model.pth')

Version 7 Unstable

In [9]:
import torch
import torch.nn as nn
from torchvision import models
from torch.nn.functional import relu
from torchvision import transforms
from torchvision.transforms import ToTensor
import os
from torch.utils.data import DataLoader
import torch.optim as optim
from PIL import Image
import numpy as np

class UNet(nn.Module):
    def __init__(self, n_class):
        super().__init__()
        
        # Encoder
        # In the encoder, convolutional layers with the Conv2d function are used to extract features from the input image. 
        # Each block in the encoder consists of two convolutional layers followed by a max-pooling layer, with the exception of the last block which does not include a max-pooling layer.
        # -------
        # input: 572x572x3
        self.e11 = nn.Conv2d(3, 64, kernel_size=3, padding=0) # output: 570x570x64
        self.e12 = nn.Conv2d(64, 64, kernel_size=3, padding=0) # output: 568x568x64
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 284x284x64

        # input: 284x284x64
        self.e21 = nn.Conv2d(64, 128, kernel_size=3, padding=0) # output: 282x282x128
        self.e22 = nn.Conv2d(128, 128, kernel_size=3, padding=0) # output: 280x280x128
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 140x140x128

        # input: 140x140x128
        self.e31 = nn.Conv2d(128, 256, kernel_size=3, padding=0) # output: 138x138x256
        self.e32 = nn.Conv2d(256, 256, kernel_size=3, padding=0) # output: 136x136x256
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 68x68x256

        # input: 68x68x256
        self.e41 = nn.Conv2d(256, 512, kernel_size=3, padding=0) # output: 66x66x512
        self.e42 = nn.Conv2d(512, 512, kernel_size=3, padding=0) # output: 64x64x512
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) # output: 32x32x512

        # input: 32x32x512
        self.e51 = nn.Conv2d(512, 1024, kernel_size=3, padding=0) # output: 30x30x1024
        self.e52 = nn.Conv2d(1024, 1024, kernel_size=3, padding=0) # output: 28x28x1024


        # Decoder
        self.upconv1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.d11 = nn.Conv2d(1024, 512, kernel_size=3, padding=0)
        self.d12 = nn.Conv2d(512, 512, kernel_size=3, padding=0)

        self.upconv2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.d21 = nn.Conv2d(512, 256, kernel_size=3, padding=0)
        self.d22 = nn.Conv2d(256, 256, kernel_size=3, padding=0)

        self.upconv3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.d31 = nn.Conv2d(256, 128, kernel_size=3, padding=0)
        self.d32 = nn.Conv2d(128, 128, kernel_size=3, padding=0)

        self.upconv4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.d41 = nn.Conv2d(128, 64, kernel_size=3, padding=0)
        self.d42 = nn.Conv2d(64, 64, kernel_size=3, padding=0)
        

        # Output layer
        self.outconv = nn.Conv2d(64, n_class, kernel_size=1)

    def forward(self, x):
        # Encoder
        xe11 = relu(self.e11(x))
        xe12 = relu(self.e12(xe11))
        xp1 = self.pool1(xe12)

        xe21 = relu(self.e21(xp1))
        xe22 = relu(self.e22(xe21))
        xp2 = self.pool2(xe22)

        xe31 = relu(self.e31(xp2))
        xe32 = relu(self.e32(xe31))
        xp3 = self.pool3(xe32)

        xe41 = relu(self.e41(xp3))
        xe42 = relu(self.e42(xe41))
        xp4 = self.pool4(xe42)

        xe51 = relu(self.e51(xp4))
        xe52 = relu(self.e52(xe51))

        # Decoder
        xu1 = self.upconv1(xe52)
        xe42_resized = nn.functional.interpolate(xe42, size=xu1.size()[2:], mode='bilinear', align_corners=True)
        # print('xu1,', xu1.shape)
        # print('xe42,', xe42_resized.shape)
        xu11 = torch.cat([xu1, xe42_resized], dim=1)
        xd11 = relu(self.d11(xu11))
        xd12 = relu(self.d12(xd11))

        xu2 = self.upconv2(xd12)
        xe32_resized = nn.functional.interpolate(xe32, size=xu2.size()[2:], mode='bilinear', align_corners=True)
        # print('xu2,', xu2.shape)
        # print('xe32,', xe32_resized.shape)
        xu22 = torch.cat([xu2, xe32_resized], dim=1)
        xd21 = relu(self.d21(xu22))
        xd22 = relu(self.d22(xd21))

        xu3 = self.upconv3(xd22)
        xe22_resized = nn.functional.interpolate(xe22, size=xu3.size()[2:], mode='bilinear', align_corners=True)
        # print('xu3,', xu3.shape)
        # print('xe22,', xe22_resized.shape)
        xu33 = torch.cat([xu3, xe22_resized], dim=1)
        xd31 = relu(self.d31(xu33))
        xd32 = relu(self.d32(xd31))

        xu4 = self.upconv4(xd32)
        xe12_resized = nn.functional.interpolate(xe12, size=xu4.size()[2:], mode='bilinear', align_corners=True)
        # print('xu4,', xu4.shape)
        # print('xe12,', xe12_resized.shape)
        xu44 = torch.cat([xu4, xe12_resized], dim=1)
        xd41 = relu(self.d41(xu44))
        xd42 = relu(self.d42(xd41))

        # Output layer
        out = self.outconv(xd42)
        out_resized = nn.functional.interpolate(out, size=(572, 572), mode='bilinear', align_corners=True)

        return out_resized

# Define dataset and dataloader
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, image_folder, mask_folder, transform=None):
        self.image_folder = image_folder
        self.mask_folder = mask_folder
        self.transform = transform
        self.images = os.listdir(image_folder)
        self.resize = transforms.Resize((572, 572))  # Resize images to 572x572

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, self.images[idx])
        mask_name = os.path.join(self.mask_folder, self.images[idx])  # Assuming mask file names are the same as image file names
        image = Image.open(img_name).convert("RGB")
        mask = Image.open(mask_name).convert("L")  # Convert to grayscale
        # print(np.max(mask))
        # Resize image and mask
        image = self.resize(image)
        mask = self.resize(mask)

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return image, mask

# Define data transformations
data_transform = ToTensor()

# Define data paths
image_folder = "/Users/mohammadfaridnaufal/Library/CloudStorage/OneDrive-UniversitasSurabaya/S3/Project/Program/U-Net/train/images"
mask_folder = "/Users/mohammadfaridnaufal/Library/CloudStorage/OneDrive-UniversitasSurabaya/S3/Project/Program/U-Net/train/images"

# Create dataset and dataloader
dataset = CustomDataset(image_folder, mask_folder, transform=data_transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)


# Initialize UNet model and move it to GPU
# Move model to CUDA
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = UNet(n_class=3).to(device)
model = UNet(n_class=3)  # Set n_class=3 for three classes: background, class 1, class 2


# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50

best_loss = float('inf')  # Initialize best loss with infinity

for epoch in range(num_epochs):
    running_loss = 0.0
    total_iou = 0.0
    class_count = torch.zeros(3)  # Count of each class

    print("epoch", epoch)
    for images, masks in dataloader:
        # Move data to CUDA
        # images, masks = images.to(device), masks.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        
        # Assuming masks are of type LongTensor
        masks = masks.squeeze(1)  # Remove the channel dimension (1)
        
        loss = criterion(outputs, masks.long())  # Ensure masks are of type long
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

        # Calculate IOU
        predictions = torch.argmax(outputs, dim=1)
        print(predictions.shape)
        print("min", predictions.min().item())
        print("max", predictions.max().item())

        # Calculate intersection and union for each class
        intersection = torch.zeros(3).to(predictions.device)
        union = torch.zeros(3).to(predictions.device)

        for i in range(3):
            intersection[i] += ((predictions == i) & (masks == i)).sum().float()
            union[i] += ((predictions == i) | (masks == i)).sum().float()

        iou = (intersection + 1e-6) / (union + 1e-6)  # Add a small value to avoid division by zero
        total_iou += iou.mean().item()

    # Calculate average loss and IOU
    average_loss = running_loss / len(dataloader)
    average_iou = total_iou / len(dataloader)


    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss}, IOU: {average_iou}")

    # Save the model if the current loss is better than the best loss
    if average_loss < best_loss:
        best_loss = average_loss
        # torch.save(model.state_dict(), 'best_model.pth')

epoch 0
255
255
255
255
torch.Size([572, 572])
0.0


KeyboardInterrupt: 