In [1]:
# Hyperparameters
num_epochs = 10
batch_size = 2
image_size = 128
num_classes = 21  # Replace with the number of classes in your dataset
learning_rate = 0.001

In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import os
import glob
import cv2
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder

class DataProcessor:
    @staticmethod
    def get_data(directory_path, flag):
        images = []
        for img_path in glob.glob(os.path.join(directory_path, "*.tif")):
            img = cv2.imread(img_path, flag)
            images.append(img)
            
        images = np.array(images)
        return images

    @staticmethod
    def shuffle_data(images, masks):
        images, masks = shuffle(images, masks, random_state=0)
        return images, masks

    def preprocess_data(self, train_images, train_masks, val_images, val_masks, test_images, test_masks):
        train_images = np.array(train_images)
        train_masks = np.array(train_masks)
        val_images = np.array(val_images)
        val_masks = np.array(val_masks)
        test_images = np.array(test_images)
        test_masks = np.array(test_masks)

        # Label encoding for training masks
        labelencoder = LabelEncoder()
        n, h, w = train_masks.shape
        train_masks_reshaped = train_masks.reshape(-1, 1)
        train_masks_reshaped_encoded = labelencoder.fit_transform(train_masks_reshaped)
        train_masks_encoded_original_shape = train_masks_reshaped_encoded.reshape(n, h, w)

        X_train = train_images
        y_train = train_masks_encoded_original_shape
        X_test = test_images
        y_test = test_masks
        X_val = val_images
        y_val = val_masks
        X_train = torch.tensor(train_images.transpose(0, 3, 1, 2), dtype=torch.float32)  # Transpose image dimensions
        y_train = torch.tensor(train_masks_encoded_original_shape, dtype=torch.long)
        X_test = torch.tensor(test_images.transpose(0, 3, 1, 2), dtype=torch.float32)    # Transpose image dimensions
        y_test = torch.tensor(test_masks, dtype=torch.long)
        X_val = torch.tensor(val_images.transpose(0, 3, 1, 2), dtype=torch.float32)      # Transpose image dimensions
        y_val = torch.tensor(val_masks, dtype=torch.long)
        n_classes = len(np.unique(y_train))
        print(f"Number of classes = {n_classes}")

        return X_train, y_train, X_test, y_test, X_val, y_val, n_classes


In [3]:
data_processor = DataProcessor()

# Load and preprocess data
train_images = np.array(data_processor.get_data("Dataset/train/noisy_images/", 1))
train_masks = np.array(data_processor.get_data("Dataset/train/noisy_masks/", 0))
val_images = np.array(data_processor.get_data("Dataset/val/noisy_images/", 1))
val_masks = np.array(data_processor.get_data("Dataset/val/noisy_masks/", 0))
test_images = np.array(data_processor.get_data("Dataset/test/noisy_images/", 1))
test_masks = np.array(data_processor.get_data("Dataset/test/noisy_masks/", 0))

# Shuffle the data
train_images, train_masks = data_processor.shuffle_data(train_images, train_masks)
val_images, val_masks = data_processor.shuffle_data(val_images, val_masks)
test_images, test_masks = data_processor.shuffle_data(test_images, test_masks)

# Preprocess the data
X_train, y_train, X_test, y_test, X_val, y_val, n_classes = data_processor.preprocess_data(
    train_images, train_masks, val_images, val_masks, test_images, test_masks
)

# Combine your data into a single DataLoader
train_data = list(zip(X_train, y_train))
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True,pin_memory=True)

# Validation DataLoader (similarly, create a DataLoader for test data if needed)
val_data = list(zip(X_val, y_val))
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False,pin_memory=True)

  y = column_or_1d(y, warn=True)


Number of classes = 11


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import numpy as np



# Define a custom ViT model for segmentation
class CustomViT(nn.Module):
    def __init__(self, num_classes, image_size, hidden_size=256, num_heads=4, num_layers=6):
        super(CustomViT, self).__init__()

        # Convolutional backbone
        self.conv = nn.Sequential(
            nn.Conv2d(3, hidden_size, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_size, hidden_size, kernel_size=3, stride=1, padding=1),
            nn.ReLU()
        )

        num_patches = (image_size // 16) ** 2
        self.embedding = nn.Embedding(num_patches, hidden_size)
        self.transformer_layers = nn.ModuleList([
            nn.TransformerEncoderLayer(hidden_size, num_heads)
            for _ in range(num_layers)
        ])
        self.segmentation_head = nn.Conv2d(hidden_size, num_classes, kernel_size=1)

    def forward(self, x):
        x = self.conv(x)
        x = x.permute(0, 2, 3, 1).reshape(x.shape[0], -1, x.shape[1])
        x = self.embedding(x.long())
        for layer in self.transformer_layers:
            x = layer(x)
        segmentation_map = self.segmentation_head(x.permute(0, 2, 1).reshape(-1, x.shape[2], x.shape[1], x.shape[3]))
        return segmentation_map

# Initialize the custom ViT model
model = CustomViT(num_classes=num_classes, image_size=image_size)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = model.to(device)
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)



In [5]:
from torch.cuda.amp import autocast, GradScaler

# Initialize the scaler for mixed precision training
scaler = GradScaler()

for epoch in range(num_epochs):
    model.train()
    for images, masks in train_loader:
        optimizer.zero_grad()
        images = images.to(device)
        masks = masks.to(device)
        with autocast():
            outputs = model(images)
            loss = criterion(outputs, masks)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

    print(f"Epoch [{epoch + 1}/{num_epochs}] | Loss: {loss.item():.4f}")


OutOfMemoryError: CUDA out of memory. Tried to allocate 8.00 GiB (GPU 0; 3.82 GiB total capacity; 195.04 MiB already allocated; 2.64 GiB free; 226.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF