In [None]:
#version 0

In [1]:
import os
import numpy as np
from PIL import Image
import torch
from torchvision import models, transforms
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from tqdm import tqdm
import random
import seaborn as sns
from sklearn.metrics import confusion_matrix
import pandas as pd

In [2]:
def visualize_grid(images, masks, boxes, scores, threshold=0.5, grid_size=(3, 3)):
    fig, axes = plt.subplots(grid_size[0], grid_size[1], figsize=(12, 12))
    axes = axes.ravel()

    # Select a random subset of images to display
    indices = random.sample(range(len(images)), min(grid_size[0] * grid_size[1], len(images)))

    for idx, ax in zip(indices, axes):
        ax.imshow(images[idx])

        # Display masks, bounding boxes, and labels
        num_instances = masks[idx].shape[0]
        for i in range(num_instances):
            if scores[idx][i] >= threshold:
                # Mask
                mask = masks[idx][i, 0].cpu().numpy()
                masked_image = np.ma.masked_where(mask == 0, mask)
                ax.imshow(masked_image, cmap='jet', alpha=0.5)

                # Bounding Box
                box = boxes[idx][i].cpu().numpy()
                rect = Rectangle((box[0], box[1]), box[2] - box[0], box[3] - box[1], linewidth=2, edgecolor='r', facecolor='none')
                ax.add_patch(rect)

                # Score
                ax.text(box[0], box[1] - 10, f'Score: {scores[idx][i]:.2f}', 
                        color='white', fontsize=8, bbox=dict(facecolor='red', alpha=0.5))

        ax.axis('off')
    plt.tight_layout()
    plt.show()

In [3]:
def load_image_and_mask(image_path, mask_path):
    """
    Load an image and its corresponding mask.
    :param image_path: Path to the image.
    :param mask_path: Path to the mask.
    :return: image_array, mask_array
    """
    image = Image.open(image_path)
    mask = Image.open(mask_path)
    
    # Convert both to numpy arrays
    image_array = np.array(image)
    mask_array = np.array(mask)
    
    return image_array, mask_array


In [4]:
def process_image_with_model(image, model, transform):
    # Apply transformations
    image_tensor = transform(image).unsqueeze(0)  # Add batch dimension

    # Move image to GPU if available
    if torch.cuda.is_available():
        image_tensor = image_tensor.cuda()

    model.eval()
    
    # Enable mixed precision to save memory
    with torch.cuda.amp.autocast(), torch.no_grad():
        prediction = model(image_tensor)[0]

    # Clear unused cache memory to avoid overflow
    torch.cuda.empty_cache()

    return prediction

In [5]:
# Main function to process dataset and visualize results
def process_dataset(image_dir, mask_dir, model, transform):
    """
    Process all images in a dataset directory (train or val), run them through Mask R-CNN, and visualize results.
    :param image_dir: Directory containing images.
    :param mask_dir: Directory containing corresponding masks.
    :param model: Pre-trained Mask R-CNN model.
    :param transform: Transformation for image preprocessing.
    """
    # List of images in the directory
    image_names = os.listdir(image_dir)
    
    all_images = []
    all_masks = []
    all_boxes = []
    all_scores = []
    
    for image_name in tqdm(image_names):
        image_path = os.path.join(image_dir, image_name)
        mask_path = os.path.join(mask_dir, image_name)
        
        # Load image and mask
        image, mask = load_image_and_mask(image_path, mask_path)

        # Process the image through the model
        prediction = process_image_with_model(image, model, transform)
        
        # Store predictions
        all_images.append(image)
        all_masks.append(prediction['masks'])
        all_boxes.append(prediction['boxes'])
        all_scores.append(prediction['scores'])

    # Visualize a grid of random images
    print("Visualizing a random grid of predictions...")
    visualize_grid(all_images, all_masks, all_boxes, all_scores)


In [6]:
if __name__ == '__main__':
    # Load a pre-trained Mask R-CNN model (trained on COCO dataset)
    model = models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    # Define image transformations
    transform = transforms.Compose([
        transforms.ToTensor(),  # Convert PIL Image to Tensor
    ])
    
    # Directories containing the original images and masks
    dataset_base_dir = '/home/idrone2/Desktop/rk/BC/BCSS'
    image_dir = os.path.join(dataset_base_dir, 'train')  # For training images
    mask_dir = os.path.join(dataset_base_dir, 'train_mask')  # For corresponding masks
    
    # Process training images
    print("Processing training dataset...")
    process_dataset(image_dir, mask_dir, model, transform)
    
    # Process validation dataset
    val_image_dir = os.path.join(dataset_base_dir, 'val')  # For validation images
    val_mask_dir = os.path.join(dataset_base_dir, 'val_mask')  # For corresponding masks
    
    print("Processing validation dataset...")
    process_dataset(val_image_dir, val_mask_dir, model, transform)



Processing training dataset...


 35%|███▍      | 10757/30760 [13:13<24:35, 13.56it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 

In [None]:
#version 1

In [1]:
import os
import numpy as np
from PIL import Image
import torch
from torchvision import models, transforms
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from tqdm import tqdm
import random
import seaborn as sns
from sklearn.metrics import confusion_matrix
import pandas as pd

In [2]:
def visualize_grid(images, masks, boxes, labels, scores, threshold=0.5, grid_size=(3, 3)):
    fig, axes = plt.subplots(grid_size[0], grid_size[1], figsize=(12, 12))
    axes = axes.ravel()

    # Select a random subset of images to display
    indices = random.sample(range(len(images)), min(grid_size[0] * grid_size[1], len(images)))

    for idx, ax in zip(indices, axes):
        ax.imshow(images[idx])

        # Display masks, bounding boxes, and labels
        num_instances = masks[idx].shape[0]
        for i in range(num_instances):
            if scores[idx][i] >= threshold:
                # Mask
                mask = masks[idx][i, 0].cpu().numpy()
                masked_image = np.ma.masked_where(mask == 0, mask)
                ax.imshow(masked_image, cmap='jet', alpha=0.5)

                # Bounding Box
                box = boxes[idx][i].cpu().numpy()
                rect = Rectangle((box[0], box[1]), box[2] - box[0], box[3] - box[1], linewidth=2, edgecolor='r', facecolor='none')
                ax.add_patch(rect)

                # Label and score
                ax.text(box[0], box[1] - 10, f'Label: {labels[idx][i]} | Score: {scores[idx][i]:.2f}', 
                        color='white', fontsize=8, bbox=dict(facecolor='red', alpha=0.5))

        ax.axis('off')
    plt.tight_layout()
    plt.show()

In [3]:
def load_image_and_mask(image_path, mask_path):
    """
    Load an image and its corresponding mask.
    :param image_path: Path to the image.
    :param mask_path: Path to the mask.
    :return: image_array, mask_array
    """
    image = Image.open(image_path)
    mask = Image.open(mask_path)
    
    # Convert both to numpy arrays
    image_array = np.array(image)
    mask_array = np.array(mask)
    
    return image_array, mask_array

In [4]:
def process_image_with_model(image, model, transform):
    """
    Preprocess the image, pass through Mask R-CNN, and return predictions.
    :param image: Input image as a numpy array.
    :param model: Pre-trained Mask R-CNN model.
    :param transform: Preprocessing transformation.
    :return: Predictions from the model.
    """
    image_tensor = transform(Image.fromarray(image)).unsqueeze(0).to(device)
    model.eval()
    with torch.no_grad():
        prediction = model(image_tensor)[0]
    return prediction

In [5]:
# Function to display confusion matrix
def plot_confusion_matrix(true_labels, pred_labels, class_names):
    """
    Plot a confusion matrix to compare the true and predicted labels.
    :param true_labels: Ground truth labels.
    :param pred_labels: Predicted labels.
    :param class_names: List of class names.
    """
    cm = confusion_matrix(true_labels, pred_labels, labels=class_names)
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title("Confusion Matrix")
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()

In [6]:
# Function to visualize label scores
def plot_label_scores(labels, scores):
    """
    Plot a bar chart of label prediction scores.
    :param labels: List of label names.
    :param scores: Corresponding prediction scores.
    """
    df = pd.DataFrame({"Labels": labels, "Scores": scores})
    plt.figure(figsize=(10, 6))
    sns.barplot(x='Labels', y='Scores', data=df, palette='viridis')
    plt.title("Label Prediction Scores")
    plt.xticks(rotation=90)
    plt.show()

In [7]:
# Main function to process dataset and visualize results
def process_dataset(image_dir, mask_dir, model, transform, class_names):
    """
    Process all images in a dataset directory (train or val), run them through Mask R-CNN, and visualize results.
    :param image_dir: Directory containing images.
    :param mask_dir: Directory containing corresponding masks.
    :param model: Pre-trained Mask R-CNN model.
    :param transform: Transformation for image preprocessing.
    :param class_names: List of class names for visualization.
    """
    # List of images in the directory
    image_names = os.listdir(image_dir)
    
    all_images = []
    all_masks = []
    all_boxes = []
    all_labels = []
    all_scores = []
    
    pred_labels = []
    true_labels = []

    for image_name in tqdm(image_names):
        image_path = os.path.join(image_dir, image_name)
        mask_path = os.path.join(mask_dir, image_name)
        
        # Load image and mask
        image, mask = load_image_and_mask(image_path, mask_path)
        true_labels.append(mask)  # Append ground truth

        # Process the image through the model
        prediction = process_image_with_model(image, model, transform)
        
        # Store predictions
        all_images.append(image)
        all_masks.append(prediction['masks'])
        all_boxes.append(prediction['boxes'])
        all_labels.append(prediction['labels'])
        all_scores.append(prediction['scores'])

        # Collect predicted labels
        pred_labels.extend(prediction['labels'].cpu().numpy())

    # Visualize a grid of random images
    print("Visualizing a random grid of predictions...")
    visualize_grid(all_images, all_masks, all_boxes, all_labels, all_scores)

    # Visualize the confusion matrix
    print("Visualizing confusion matrix...")
    plot_confusion_matrix(true_labels, pred_labels, class_names)

    # Visualize prediction scores
    print("Visualizing label prediction scores...")
    all_flat_labels = [lbl for sublist in all_labels for lbl in sublist.cpu().numpy()]
    all_flat_scores = [score for sublist in all_scores for score in sublist.cpu().numpy()]
    plot_label_scores(all_flat_labels, all_flat_scores)

In [9]:
if __name__ == '__main__':
    # Load a pre-trained Mask R-CNN model (trained on COCO dataset)
    model = models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    # Define image transformations
    transform = transforms.Compose([
        transforms.ToTensor(),  # Convert PIL Image to Tensor
    ])
    
     

    class_names = ['Background', 'Tumor', 'Other Tissue']

    # Directories containing the original images and masks
    dataset_base_dir = '/home/idrone2/Desktop/rk/BC/BCSS'
    image_dir = os.path.join(dataset_base_dir, 'train')  # For training images
    mask_dir = os.path.join(dataset_base_dir, 'train_mask')  # For corresponding masks
    
    # Process training images
    print("Processing training dataset...")
    process_dataset(image_dir, mask_dir, model, transform)
    
    # Process validation dataset
    val_image_dir = os.path.join(dataset_base_dir, 'val')  # For validation images
    val_mask_dir = os.path.join(dataset_base_dir, 'val_mask')  # For corresponding masks
    
    print("Processing validation dataset...")
    process_dataset(val_image_dir, val_mask_dir, model, transform)

Processing training dataset...


TypeError: process_dataset() missing 1 required positional argument: 'class_names'

In [None]:
#version 2

In [10]:
import torch
import torchvision
from torchvision import transforms
import os
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


In [11]:
# Load Mask R-CNN model
def get_model_instance_segmentation(num_classes):
    # Load a pre-trained model for instance segmentation
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # Get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # Replace the head with a new one (for our dataset)
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

    return model

In [12]:
# Load the TSV file and return label information for each image
def load_ground_truth(tsv_file):
    gtruth_data = pd.read_csv(tsv_file, sep='\t')
    return gtruth_data

In [13]:
# Map image name to corresponding class label from the TSV file
def get_label_from_tsv(image_name, gtruth_data):
    label_info = gtruth_data[gtruth_data['image_name'] == image_name]
    if label_info.empty:
        return None  # No label found
    return label_info['class'].values[0]  

In [14]:
# Function to visualize the predictions in a 3x3 grid
def visualize_predictions_grid(images, predictions, class_names, num_images=9):
    fig, axes = plt.subplots(3, 3, figsize=(12, 12))

    for idx, ax in enumerate(axes.flat):
        if idx < len(images):
            image = images[idx]
            prediction = predictions[idx]

            ax.imshow(image)
            ax.axis('off')

            # Draw the masks and boxes on the images
            for i, mask in enumerate(prediction['masks']):
                masked_image = np.ma.masked_where(mask.cpu().numpy() == 0, mask.cpu().numpy())
                ax.imshow(masked_image, cmap='jet', alpha=0.5)

                # Draw the bounding boxes
                box = prediction['boxes'][i].cpu().numpy()
                score = prediction['scores'][i].cpu().numpy()
                label = class_names[prediction['labels'][i].cpu().numpy()]
                ax.text(box[0], box[1], f'{label}: {score:.2f}', color='white', fontsize=8, backgroundcolor='black')

    plt.tight_layout()
    plt.show()

In [15]:
# Process the dataset and make predictions using Mask R-CNN
def process_dataset(image_dir, gtruth_data, model, transform, class_names, num_classes):
    images = []
    predictions = []

    model.eval()  # Set model to evaluation mode

    with torch.no_grad():
        for image_name in os.listdir(image_dir):
            image_path = os.path.join(image_dir, image_name)

            # Get label from the TSV file
            label = get_label_from_tsv(image_name, gtruth_data)

            if label is None:
                print(f"Label for image {image_name} not found!")
                continue

            # Load and transform image
            image = Image.open(image_path).convert("RGB")
            image_tensor = transform(image).unsqueeze(0).to(device)

            # Make predictions with Mask R-CNN
            prediction = model(image_tensor)[0]

            # Store image and prediction for visualization
            images.append(np.array(image))
            predictions.append({
                'masks': prediction['masks'],
                'boxes': prediction['boxes'],
                'labels': prediction['labels'],
                'scores': prediction['scores']
            })

            # Only store the first 'num_images' images for visualization
            if len(images) >= num_classes:
                break

    # Visualize in a 3x3 grid
    visualize_predictions_grid(images, predictions, class_names, num_images=num_classes)


In [17]:
if __name__ == '__main__':
    # Define device (use GPU if available)
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # Number of classes (including background)
    num_classes = 3  # For your case: Background, Tumor, and Other

    # Load the pre-trained Mask R-CNN model and update for your number of classes
    model = get_model_instance_segmentation(num_classes)
    model.to(device)

    # Define transformations
    transform = transforms.Compose([
        transforms.ToTensor(),  # Convert PIL image to tensor
    ])

    # Load ground truth labels from the TSV file
    gtruth_file = '/home/idrone2/Desktop/rk/BC/gtruth_codes_512.tsv'
    gtruth_data = load_ground_truth(gtruth_file)

    # Directories containing images
    dataset_base_dir = '/home/idrone2/Desktop/rk/BC/BCSS_512'
    image_dir = os.path.join(dataset_base_dir, 'train_512')  # Path to the images

    # Define class names
    #class_names = ['Background', 'Tumor Cells', 'Stroma', 'Necrosis', 'Lymphocytes','Blood vessels', 'Connective tissue','Other epithelial structures','Adipose tissue','Muscle tissue','Inflammatory cells','Calcifications','Mitotic figures','Non-malignant epithelium','Glandular structures','Fibrosis','Extracellular matrix','Nerve cells','Mucus-secreting cells','Dead cells','Immune cells','Artifact']  # Adjust as needed
    class_names = [
    "outside_roi", "tumor", "stroma", "lymphocytic_infiltrate", 
    "necrosis_or_debris", "glandular_secretions", "blood", 
    "exclude", "metaplasia_NOS", "fat", "plasma_cells", 
    "other_immune_infiltrate", "mucoid_material", 
    "normal_acinus_or_duct", "lymphatics", "undetermined", 
    "nerve", "skin_adnexa", "blood_vessel", "angioinvasion", 
    "dcis", "other"
    ]
    # Process and visualize the dataset
    process_dataset(image_dir, gtruth_data, model, transform, class_names, num_classes)



KeyError: 'image_name'

In [None]:
#version 3

In [18]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torchvision.transforms as T
import torch
from torchvision.models.detection import maskrcnn_resnet50_fpn

# Load ground truth data
gtruth_data = pd.read_csv('/home/idrone2/Desktop/rk/BC/gtruth_codes_512.tsv', sep='\t')

# Define your class names based on the TSV file
class_names = [
    "outside_roi", "tumor", "stroma", "lymphocytic_infiltrate", 
    "necrosis_or_debris", "glandular_secretions", "blood", 
    "exclude", "metaplasia_NOS", "fat", "plasma_cells", 
    "other_immune_infiltrate", "mucoid_material", 
    "normal_acinus_or_duct", "lymphatics", "undetermined", 
    "nerve", "skin_adnexa", "blood_vessel", "angioinvasion", 
    "dcis", "other"
]

num_classes = len(class_names)

# Load the Mask R-CNN model pre-trained on COCO
model = maskrcnn_resnet50_fpn(pretrained=True)
model.eval()  # Set the model to evaluation mode

# Define image transformations
transform = T.Compose([
    T.ToTensor(),  # Convert images to PyTorch tensors
])

# Function to get label from the TSV file
def get_label_from_tsv(image_name, gtruth_data):
    label_info = gtruth_data[gtruth_data['label'] == image_name]  # Adjust based on naming convention
    if label_info.empty:
        return None  # No label found
    return label_info['GT_code'].values[0]  # Get the corresponding GT_code

# Function to process the dataset
def process_dataset(image_dir, gtruth_data, model, transform, class_names):
    image_names = os.listdir(image_dir)
    for image_name in image_names:
        image_path = os.path.join(image_dir, image_name)

        # Load and transform the image
        image = Image.open(image_path).convert("RGB")
        image_tensor = transform(image).unsqueeze(0)  # Add batch dimension

        # Get label from the TSV file
        label_code = get_label_from_tsv(image_name.split('.')[0], gtruth_data)

        if label_code is None:
            print(f"Label for image {image_name} not found!")
            continue

        # Perform prediction
        with torch.no_grad():
            predictions = model(image_tensor)

        # Visualize predictions (you can customize this part)
        visualize_predictions(image, predictions)

# Function to visualize predictions
def visualize_predictions(image, predictions, threshold=0.5):
    masks = predictions[0]['masks']
    boxes = predictions[0]['boxes']
    scores = predictions[0]['scores']
    labels = predictions[0]['labels']

    plt.figure(figsize=(12, 12))
    plt.imshow(image)

    for i in range(len(masks)):
        if scores[i] > threshold:
            mask = masks[i, 0].mul(255).byte().cpu().numpy()
            plt.imshow(mask, alpha=0.5, cmap='jet')  # Overlay the mask

            # Draw bounding box
            box = boxes[i].cpu().numpy()
            plt.gca().add_patch(plt.Rectangle((box[0], box[1]), box[2] - box[0], box[3] - box[1], 
                                                edgecolor='blue', linewidth=2, fill=False))

    plt.axis('off')
    plt.show()

if __name__ == '__main__':
    # Specify your dataset directories
    train_image_dir = '/home/idrone2/Desktop/rk/BC/BCSS_512/train_512'  # Adjust the path to your training images
    val_image_dir = '/home/idrone2/Desktop/rk/BC/BCSS_512/val_512'      # Adjust the path to your validation images

    # Process the training dataset
    print("Processing training dataset...")
    process_dataset(train_image_dir, gtruth_data, model, transform, class_names)

    # Process the validation dataset
    print("Processing validation dataset...")
    process_dataset(val_image_dir, gtruth_data, model, transform, class_names)



Processing training dataset...
Label for image TCGA-A2-A0YM-DX1_xmin44520_ymin66915_MPP-0_2560_4096_size512.png not found!
Label for image TCGA-BH-A18V-DX1_xmin50030_ymin48454_MPP-0_512_4096_size512.png not found!
Label for image TCGA-BH-A18V-DX1_xmin50030_ymin48454_MPP-0_512_512_size512.png not found!
Label for image TCGA-A2-A0YM-DX1_xmin44520_ymin66915_MPP-0_2048_4096_size512.png not found!
Label for image TCGA-BH-A0WA-DX1_xmin56581_ymin24774_MPP-0_4096_3584_size512.png not found!
Label for image TCGA-AO-A03U-DX1_xmin17362_ymin25363_MPP-0_2560_512_size512.png not found!
Label for image TCGA-D8-A1JG-DX1_xmin15677_ymin69205_MPP-0_512_3072_size512.png not found!
Label for image TCGA-AN-A0AR-DX1_xmin8468_ymin21166_MPP-0_3584_0_size512.png not found!
Label for image TCGA-A2-A04T-DX1_xmin72145_ymin39078_MPP-0_512_0_size512.png not found!
Label for image TCGA-BH-A0WA-DX1_xmin56581_ymin24774_MPP-0_4608_2560_size512.png not found!
Label for image TCGA-BH-A1EW-DX1_xmin66752_ymin42706_MPP-0_204