In [None]:
### Training a model to segment an image into 3 classes: background, foreground, and boundary
### The model is a U-Net with a ResNet34 encoder
### The model is trained on the ISBI 2012 dataset
### The model is trained using the Dice loss function
### The model is trained using the Adam optimizer
### The model is trained using a learning rate scheduler

import os
import sys
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
from torchvision import models
from torchvision import transforms
from torchvision.datasets import ImageFolder
from unet import UNet


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=3):
        super(UNet, self).__init__()

        def conv_block(in_c, out_c):
            return nn.Sequential(
                nn.Conv2d(in_c, out_c, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_c),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_c, out_c, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_c),
                nn.ReLU(inplace=True)
            )

        self.encoder1 = conv_block(in_channels, 64)
        self.encoder2 = conv_block(64, 128)
        self.encoder3 = conv_block(128, 256)
        self.encoder4 = conv_block(256, 512)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.bottleneck = conv_block(512, 1024)

        self.upconv4 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.decoder4 = conv_block(1024, 512)

        self.upconv3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.decoder3 = conv_block(512, 256)

        self.upconv2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.decoder2 = conv_block(256, 128)

        self.upconv1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.decoder1 = conv_block(128, 64)

        self.conv_final = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        # Encoder
        e1 = self.encoder1(x)
        e2 = self.encoder2(self.pool(e1))
        e3 = self.encoder3(self.pool(e2))
        e4 = self.encoder4(self.pool(e3))

        # Bottleneck
        b = self.bottleneck(self.pool(e4))

        # Decoder
        d4 = self.upconv4(b)
        d4 = torch.cat((d4, e4), dim=1)
        d4 = self.decoder4(d4)

        d3 = self.upconv3(d4)
        d3 = torch.cat((d3, e3), dim=1)
        d3 = self.decoder3(d3)

        d2 = self.upconv2(d3)
        d2 = torch.cat((d2, e2), dim=1)
        d2 = self.decoder2(d2)

        d1 = self.upconv1(d2)
        d1 = torch.cat((d1, e1), dim=1)
        d1 = self.decoder1(d1)

        return torch.sigmoid(self.conv_final(d1))


In [None]:
# 2. Update the Dataset class to properly handle image and mask processing
from torch.utils.data import Dataset
class ISBI2012Dataset(Dataset):
    def __init__(self, root, transform=None):
        super().__init__()
        self.root = root
        self.transform = transform
        self.images = []
        self.masks = []
        for filename in os.listdir(root):
            if filename.endswith("train-volume.tif"):
                self.images.append(filename)
            elif filename.endswith("train-labels.tif"):
                self.masks.append(filename)
        self.images.sort()
        self.masks.sort()
        
        # Define separate transforms for images and masks
        self.image_transform = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225])
        ])
        
        self.mask_transform = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image_path = os.path.join(self.root, self.images[idx])
        mask_path = os.path.join(self.root, self.masks[idx])
        
        image = Image.open(image_path).convert('RGB')
        mask = Image.open(mask_path).convert('L')  # Convert mask to grayscale
        
        image = self.image_transform(image)
        mask = self.mask_transform(mask)
        
        # Ensure mask is binary
        mask = (mask > 0.5).float()
        
        return image, mask

In [None]:
class ImageSegmenter:
    def __init__(self, model, loss_fn, optimizer, device, scheduler=None):
        self.model = model.to(device)
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.device = device

    def train_step(self, batch):
        images, masks = batch
        images = images.to(self.device)
        masks = masks.to(self.device)
        
        self.model.train()
        self.optimizer.zero_grad()
        
        predictions = self.model(images)
        loss = self.loss_fn(predictions, masks)
        
        loss.backward()
        self.optimizer.step()
        
        return loss.item()

    def eval_step(self, batch):
        images, masks = batch
        images = images.to(self.device)
        masks = masks.to(self.device)
        
        self.model.eval()
        with torch.no_grad():
            predictions = self.model(images)
            loss = self.loss_fn(predictions, masks)
            
        return loss.item(), predictions

    def fit(self, train_loader, val_loader, epochs):
        best_val_loss = float('inf')
        for epoch in range(epochs):
            # Training
            train_losses = []
            for batch in train_loader:
                loss = self.train_step(batch)
                train_losses.append(loss)
            
            # Validation
            val_losses = []
            self.model.eval()
            with torch.no_grad():
                for batch in val_loader:
                    loss, _ = self.eval_step(batch)
                    val_losses.append(loss)
            
            avg_train_loss = sum(train_losses) / len(train_losses)
            avg_val_loss = sum(val_losses) / len(val_losses)
            
            if self.scheduler is not None:
                self.scheduler.step(avg_val_loss)
            
            # Save best model
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                torch.save(self.model.state_dict(), 'best_model.pth')
            
            print(f"Epoch {epoch+1}/{epochs}")
            print(f"Train Loss: {avg_train_loss:.4f}")
            print(f"Val Loss: {avg_val_loss:.4f}")
            print("-" * 30)

In [None]:
import torch.optim as optim
from torchvision.transforms import ToTensor
from torchvision.transforms import Compose
from torchvision.transforms import Resize
from torchvision.transforms import Lambda
# Dataset and DataLoader

def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Initialize model and training components
    model = UNet(in_channels=3, out_channels=1)
    criterion = nn.BCELoss()  # Binary Cross Entropy Loss
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)
    
    # Create data loaders
    train_dataset = ISBI2012Dataset(root='data')
    val_dataset = ISBI2012Dataset(root='data')
    
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=8)
    
    # Initialize trainer
    segmenter = ImageSegmenter(model, criterion, optimizer, device, scheduler)
    
    # Train the model
    segmenter.fit(train_loader, val_loader, epochs=50)
main()



In [None]:
# from torchvision.transforms import Compose, Resize, ToTensor
# from PIL import Image
# import torch
# import numpy as np
# import matplotlib.pyplot as plt

# # Set up the device
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# # Load the model once
# model = UNet()
# model.load_state_dict(torch.load('model.pth', map_location=device))
# model.eval().to(device)

# # Preprocess the image
# transform = Compose([Resize((256, 256)), ToTensor()])
# image = Image.open('pexels-pixabay-206959.jpg')
# input_tensor = transform(image).unsqueeze(0).to(device)

# # Inference
# with torch.no_grad():
#     output = model(input_tensor)

# # Post-process the result
# output = torch.sigmoid(output).squeeze().cpu().numpy()  # Remove all extra dimensions
# print("Output shape:", output.shape)

# # Make sure output is 2D
# if len(output.shape) > 2:
#     output = output.squeeze()  # Remove any extra dimensions
#     # If it's still 3D, take the first channel
#     if len(output.shape) > 2:
#         output = output[0]

# # Convert the predicted mask to an image
# output_image = (output * 255).astype(np.uint8)
# predicted_mask_image = Image.fromarray(output_image, mode='L')  # Specify mode='L' for grayscale

# # Save the predicted mask image
# predicted_mask_image.save('predicted_mask.png')

# # Visualize the result with matplotlib
# plt.figure(figsize=(12, 6))

# # Show the original image
# plt.subplot(1, 2, 1)
# plt.title('Original Image')
# plt.imshow(image)
# plt.axis('off')

# # Show the predicted mask
# plt.subplot(1, 2, 2)
# plt.title('Predicted Mask')
# plt.imshow(output_image, cmap='gray')
# plt.axis('off')

# plt.tight_layout()
# plt.show()

In [3]:


import torch
import torchvision.transforms as T
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from unet import UNet  # Your UNet model

# Load the trained model
def load_model(model_path, device):
    model = UNet()
    model.load_state_dict(torch.load(model_path), strict=False)
    model = model.to(device)
    model.eval()
    return model

# Preprocess input image (resize, normalize, etc.)
def preprocess_image(image_path, device):
    image = Image.open(image_path).convert('RGB')  # Open the image and convert to RGB
    transform = T.Compose([
        T.Resize((256, 256)),  # Resize to match model input size
        T.ToTensor(),  # Convert to tensor
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
    ])
    image = transform(image).unsqueeze(0)  # Add batch dimension
    return image.to(device)

# Run the model on the input image and get the output mask
def predict_segmentation(model, image, device):
    with torch.no_grad():
        output = model(image)
        # Apply sigmoid to get probabilities
        output = torch.sigmoid(output)
        # Convert the output to a binary mask (thresholding at 0.5)
        mask = output.squeeze().cpu().numpy() > 0.5
    return mask

# Visualize the image and the predicted mask
def visualize_results(image_path, mask):
    image = Image.open(image_path).convert('RGB')
    image = np.array(image)

    plt.figure(figsize=(10, 5))

    # Original image
    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.title('Original Image')

    # Predicted mask
    plt.subplot(1, 2, 2)
    plt.imshow(mask, cmap='gray')
    plt.title('Predicted Mask')

    plt.show()

def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Load the trained model
    model = load_model('best_model.pth', device)

    # Preprocess the input image
    image_path = 'pexels-pixabay-206959.jpg'  # Replace with your image path
    image = preprocess_image(image_path, device)

    # Get the segmentation mask prediction
    mask = predict_segmentation(model, image, device)

    # Visualize the results
    visualize_results(image_path, mask)

if __name__ == "__main__":
    main()


  model.load_state_dict(torch.load(model_path), strict=False)


RuntimeError: Given groups=1, weight of size [64, 1, 3, 3], expected input[1, 3, 256, 256] to have 1 channels, but got 3 channels instead