In [1]:
from os import putenv
putenv("HSA_OVERRIDE_GFX_VERSION", "10.3.0")
putenv("PYTORCH_ROCM_ARCH", "gfx1030")
putenv("TORCH_USE_HIP_DSA", "1")
putenv("AMD_SERIALIZE_KERNEL", "3")


In [2]:
import torch
from torch import nn
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms

In [3]:
# import os

# os.environ["HSA_OVERRIDE_GFX_VERSION"] = "10.3.0"
# os.environ["TORCH_USE_HIP_DSA"] = "1"
# os.environ["AMD_SERIALIZE_KERNEL"] = "3"
# os.environ["HIP_LOG_LEVEL"] = "3"  # Set to debug level
# os.environ["HIP_VISIBLE_DEVICES"] = "0"  # Ensure only the first GPU is visible

In [4]:
# !which python
torch.cuda.empty_cache()

In [5]:
print("CUDA Available:", torch.cuda.is_available())
print("ROCm Available:", torch.version.hip is not None)
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")
print(torch.version.hip)

CUDA Available: True
ROCm Available: True
GPU Name: AMD Radeon RX 6600
6.0.32831-


In [6]:
# # Create a tensor and move it to the GPU
# tensor = torch.randn(3, 3).to('cuda')
# print(tensor)

# # Check if the tensor is on the GPU
# print(tensor.device)

In [7]:
def plot_img(x):
    plt.figure()
    combined_image = np.concatenate((x[0][0].detach().numpy(), x[0][10].detach().numpy(), x[0][-1].detach().numpy()), axis=1)
    plt.imshow(combined_image)

    return

def crop_image(source_tensor, target_tensor):
    source_tensor_size = source_tensor.size()[2]
    target_tensor_size = target_tensor.size()[2]

    start_x = (source_tensor.size()[2] - target_tensor.size()[2]) // 2
    start_y = (source_tensor.size()[3] - target_tensor.size()[3]) // 2

    return source_tensor[:, :, start_x:start_x + target_tensor.size()[2], start_y:start_y + target_tensor.size()[3]]

In [8]:
def double_conv(in_channel, out_channel):
    return nn.Sequential(
        nn.Conv2d(in_channel, out_channel, kernel_size=3),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channel, out_channel, kernel_size=3),
        nn.ReLU(inplace=True)
    )

class UNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.down_conv1 = double_conv(3, 64)
        self.down_conv2 = double_conv(64, 128)
        self.down_conv3 = double_conv(128, 256)
        self.down_conv4 = double_conv(256, 512)
        self.down_conv5 = double_conv(512, 1024)

        self.tarnspose_conv_1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.tarnspose_conv_2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.tarnspose_conv_3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.tarnspose_conv_4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.up_conv1 = double_conv(1024, 512)
        self.up_conv2 = double_conv(512, 256)
        self.up_conv3 = double_conv(256, 128)
        self.up_conv4 = double_conv(128, 64)

        self.out = nn.Conv2d(64, 1, kernel_size=1)

        # self.up_conv1 = double_conv(1024, 512)
        # self.up_conv2 = double_conv()

    def forward(self, image):
        # ////////////// ENCODER //////////////
        x1 = self.down_conv1(image)
        m1 = self.max_pool(x1)
        # plot_img(x)
        x2 = self.down_conv2(m1)
        m2 = self.max_pool(x2)
        # plot_img(x)
        x3 = self.down_conv3(m2)
        m3 = self.max_pool(x3)
        # plot_img(x)
        x4 = self.down_conv4(m3)
        m4 = self.max_pool(x4)
        # plot_img(x)
        x5 = self.down_conv5(m4)
        # plot_img(x)

        # ////////////// DECODER //////////////
        x = self.tarnspose_conv_1(x5)
        # print(x.shape)
        x = self.up_conv1(torch.cat([x, crop_image(x4, x)], 1))
        # print(x.shape)
        x = self.tarnspose_conv_2(x)
        # print(x.shape)
        x = self.up_conv2(torch.cat([x, crop_image(x3, x)], 1))
        # print(x.shape)
        x = self.tarnspose_conv_3(x)
        # print(x.shape)
        x = self.up_conv3(torch.cat([x, crop_image(x2, x)], 1))
        # print(x.shape)
        x = self.tarnspose_conv_4(x)
        # print(x.shape)
        x = self.up_conv4(torch.cat([x, crop_image(x1, x)], 1))
        # print(x.shape)
        x = self.out(x)

        # print(x.shape)

        return x

In [9]:
# image_path = "./test.png"
# image = Image.open(image_path)
# print(type(image))
# image = np.array(image)
# print(image.size)
# image_tensor = torch.from_numpy(image)
# plt.imshow(image_tensor)
# image_tensor = image_tensor.float()
# image_tensor = image_tensor.permute(2, 0, 1)
# image_tensor = image_tensor.unsqueeze(0)
# print(image_tensor.size())


In [10]:
# model = UNet()
# output = model(image_tensor)


In [11]:
# combined_image = np.concatenate((output[0][0].detach().numpy(), output[0][1].detach().numpy()), axis=1)
# print(output.shape)
# plt.imshow(output[0][0].detach().numpy())

In [12]:
class SegmentationDataset(Dataset):
    def __init__(self, image_paths, mask_paths, transform=None):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # image = Image.open(self.image_paths[idx]).convert("L")
        # mask = Image.open(self.mask_paths[idx]).convert("L")
        image = Image.open(self.image_paths[idx])
        mask = Image.open(self.mask_paths[idx])
        
        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)
        
        return image, mask
    

In [13]:
transform = transforms.Compose([
    transforms.Resize((1024, 1024)),
    transforms.ToTensor()
])

# test_transform = transforms.Compose([
#     transforms.Resize((836, 836)),
#     transforms.ToTensor()
# ])

In [14]:
import os
from glob import glob
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import torch.nn.functional as F


image_dir = "./data"
mask_dir = "./mask"

image_paths = sorted(glob(os.path.join(image_dir, '*.[jp][pn]g')))
mask_paths = sorted(glob(os.path.join(mask_dir, '*.[jp][pn]g')))
X_train, X_val, y_train, y_val = train_test_split(image_paths, mask_paths, test_size=0.2, random_state=42)

train_dataset = SegmentationDataset(X_train, y_train, transform=transform)
val_dataset = SegmentationDataset(X_val, y_val, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=2, pin_memory=True)

In [15]:
def train(model, train_loader, test_loader, optimizer, criterion, num_epochs=20):
    model.train()
    for epoch in range(num_epochs):
        # Training phase
        epoch_loss = 0
        model.train()  # Ensure the model is in training mode
        for images, masks in train_loader:
            images = images.to(device)
            masks = masks.to(device)

            # Forward pass
            outputs = model(images)
            # print(outputs.shape)
            # print(masks.shape)
            masks_resized = F.interpolate(masks, size=outputs.shape[2:], mode='bilinear', align_corners=False)
            masks_resized = masks_resized.mean(dim=1, keepdim=True)
            loss = criterion(outputs, masks_resized)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_loss/len(train_loader)}')

        # Validation phase
        model.eval()  # Set the model to evaluation mode
        with torch.no_grad():  # No gradient calculation during validation
            val_loss = 0
            for images, masks in test_loader:
                images = images.to(device)
                masks = masks.to(device)

                # Forward pass
                outputs = model(images)
                masks_resized = F.interpolate(masks, size=outputs.shape[2:], mode='bilinear', align_corners=False)
                masks_resized = masks_resized.mean(dim=1, keepdim=True)
                loss = criterion(outputs, masks_resized)

                val_loss += loss.item()

        print(f'Epoch [{epoch+1}/{num_epochs}], Validation Loss: {val_loss/len(test_loader)}')

In [None]:

model = UNet()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()

# Use GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device("cpu")
model = model.to(device)

train(model, train_loader, val_loader, optimizer, criterion)


In [None]:
# import torch
# import torch.nn as nn

# class SimpleNN(nn.Module):
#     def __init__(self):
#         super(SimpleNN, self).__init__()
#         self.fc1 = nn.Linear(3, 3)

#     def forward(self, x):
#         return self.fc1(x)

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = SimpleNN().to(device)

# # Test with a small tensor
# input_tensor = torch.randn(3, 3).to(device)
# output = model(input_tensor)
# print(output)

In [None]:
# import torch
# print(torch.__version__)

In [13]:
# import torch
# import time

# # Set device
# device_cpu = torch.device('cpu')
# device_gpu = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# # Define the size of the matrices
# matrix_size = 10000  # Adjust this size based on your GPU memory

# # Create random matrices on CPU
# x_cpu = torch.randn(matrix_size, matrix_size).to(device_cpu)
# y_cpu = torch.randn(matrix_size, matrix_size).to(device_cpu)

# # Measure execution time on CPU
# start_time_cpu = time.time()
# z_cpu = torch.matmul(x_cpu, y_cpu)
# end_time_cpu = time.time()

# # Print CPU results
# print(f"CPU Execution Time: {end_time_cpu - start_time_cpu:.4f} seconds")

# # Create random matrices on GPU
# x_gpu = torch.randn(matrix_size, matrix_size).to(device_gpu)
# y_gpu = torch.randn(matrix_size, matrix_size).to(device_gpu)

# # Measure execution time on GPU
# start_time_gpu = time.time()
# z_gpu = torch.matmul(x_gpu, y_gpu)
# end_time_gpu = time.time()

# # Print GPU results
# print(f"GPU Execution Time: {end_time_gpu - start_time_gpu:.4f} seconds")

CPU Execution Time: 2.5202 seconds
GPU Execution Time: 0.3050 seconds


In [15]:
# num_runs = 50  # Number of runs for averaging

# # CPU timing
# cpu_times = []
# for _ in range(num_runs):
#     start_time_cpu = time.time()
#     z_cpu = torch.matmul(x_cpu, y_cpu)
#     end_time_cpu = time.time()
#     cpu_times.append(end_time_cpu - start_time_cpu)

# total_cpu_time = sum(cpu_times)
# avg_cpu_time = total_cpu_time / num_runs
# print(f"Average CPU Execution Time: {avg_cpu_time:.4f} seconds")
# print(f"Total CPU Execution Time: {total_cpu_time:.4f} seconds")

# # GPU timing
# gpu_times = []
# for _ in range(num_runs):
#     start_time_gpu = time.time()
#     z_gpu = torch.matmul(x_gpu, y_gpu)
#     end_time_gpu = time.time()
#     gpu_times.append(end_time_gpu - start_time_gpu)

# total_gpu_time = sum(gpu_times)
# avg_gpu_time = total_gpu_time / num_runs
# print(f"Average GPU Execution Time: {avg_gpu_time:.4f} seconds")
# print(f"Total GPU Execution Time: {total_gpu_time:.4f} seconds")

Average CPU Execution Time: 2.7786 seconds
Total CPU Execution Time: 138.9290 seconds
Average GPU Execution Time: 0.3309 seconds
Total GPU Execution Time: 16.5429 seconds
