In [2]:
import os
from PIL import Image
import numpy as np
import cv2


# Generate

# For Classifiaction (Low,High)

In [56]:
PATH_TO_TILES = "Tiles"
PATH_TO_COORDS = "Coordinates"
GENERATED_DIR = "Generated"
IMAGE_SIZE_W = 256
IMAGE_SIZE_L = 256

os.makedirs(GENERATED_DIR, exist_ok=True)

for subfolder in os.listdir(PATH_TO_COORDS):
    coords_subfolder_path = os.path.join(PATH_TO_COORDS, subfolder)
    tiles_subfolder_path = os.path.join(PATH_TO_TILES, subfolder)
    generated_subfolder_path = os.path.join(GENERATED_DIR, subfolder)

    if not os.path.exists(tiles_subfolder_path):
        continue

    os.makedirs(generated_subfolder_path, exist_ok=True)

    for coord_file in os.listdir(coords_subfolder_path):
        if coord_file.endswith("_coords.txt"):
            base_name = coord_file[:-11]

            coord_file_path = os.path.join(coords_subfolder_path, coord_file)
            tile_image_path = os.path.join(tiles_subfolder_path, base_name + ".png")

            if not os.path.exists(tile_image_path):
                continue

            with open(coord_file_path, 'r') as file:
                line_count = file.read().count('\n')

            label = "low" if line_count < 5 else "high"

            with Image.open(tile_image_path) as img:
                resized_img = img.resize((IMAGE_SIZE_W, IMAGE_SIZE_L))
                output_file_name = f"{base_name}_{label}.png"
                output_file_path = os.path.join(generated_subfolder_path, output_file_name)
                resized_img.save(output_file_path)

# For Regression (Msaked based)

In [None]:
import os
import numpy as np
from PIL import Image
from skimage.draw import polygon2mask

# Paths
PATH_TO_TILES = "Tiles"
PATH_TO_COORDS = "Coordinates"
GENERATED_DIR = "Generated_Masks"
RESIZED_TILES_DIR = "Resized_Tiles"

# Target Mask Size
IMAGE_SIZE_W = 512
IMAGE_SIZE_L = 512

# Ensure output directories exist
os.makedirs(GENERATED_DIR, exist_ok=True)
os.makedirs(RESIZED_TILES_DIR, exist_ok=True)

# Process each subfolder
for subfolder in os.listdir(PATH_TO_COORDS):
    coords_subfolder_path = os.path.join(PATH_TO_COORDS, subfolder)
    tiles_subfolder_path = os.path.join(PATH_TO_TILES, subfolder)
    generated_subfolder_path = os.path.join(GENERATED_DIR, subfolder)
    resized_subfolder_path = os.path.join(RESIZED_TILES_DIR, subfolder)

    if not os.path.exists(tiles_subfolder_path):
        continue

    # Create subfolders in output directories
    os.makedirs(generated_subfolder_path, exist_ok=True)
    os.makedirs(resized_subfolder_path, exist_ok=True)

    # Process each coordinate file
    for coord_file in os.listdir(coords_subfolder_path):
        if coord_file.endswith("_coords.txt"):
            base_name = coord_file[:-11]  # Remove "_coords.txt" from the filename

            coord_file_path = os.path.join(coords_subfolder_path, coord_file)
            tile_image_path = os.path.join(tiles_subfolder_path, base_name + ".png")

            if not os.path.exists(tile_image_path):
                continue

            # Load the image to get original size
            with Image.open(tile_image_path) as img:
                orig_w, orig_h = img.size  # Original dimensions of tile

                # Create a black mask (all zeros) with the original dimensions
                mask = np.zeros((orig_h, orig_w), dtype=np.uint8)

                # Read bounding box coordinates from the text file
                with open(coord_file_path, "r") as file:
                    for line in file:
                        coords = line.strip().split(",")
                        if len(coords) >= 4:  # Ensure at least four values exist
                            x1, y1, x2, y2 = map(int, coords[:4])
                            # Fill the bounding box region with white (255)
                            mask[y1:y2, x1:x2] = 255

                # Resize the mask to the target size
                mask_resized = Image.fromarray(mask).resize((IMAGE_SIZE_W, IMAGE_SIZE_L), Image.NEAREST)

                # Resize the tile image to the target size
                img_resized = img.resize((IMAGE_SIZE_W, IMAGE_SIZE_L))

                # Save the resized tile
                resized_tile_path = os.path.join(resized_subfolder_path, base_name + ".png")
                img_resized.save(resized_tile_path)
                print(f"Saved resized tile: {resized_tile_path}")

                # Save the resized mask
                output_file_name = f"{base_name}_mask.png"
                output_file_path = os.path.join(generated_subfolder_path, output_file_name)
                mask_resized.save(output_file_path)
                print(f"Saved mask: {output_file_name}")

Saved resized tile: Resized_Tiles/123/tile_31103_116052.png
Saved mask: tile_31103_116052_mask.png
Saved resized tile: Resized_Tiles/123/tile_29466_121142.png
Saved mask: tile_29466_121142_mask.png
Saved resized tile: Resized_Tiles/123/tile_31103_115034.png
Saved mask: tile_31103_115034_mask.png
Saved resized tile: Resized_Tiles/123/tile_31103_117070.png
Saved mask: tile_31103_117070_mask.png
Saved resized tile: Resized_Tiles/123/tile_29466_120124.png
Saved mask: tile_29466_120124_mask.png


# Loading

In [64]:
image_data = np.empty((0, 256, 256, 3), dtype=np.float32) 
labels = np.empty((0,), dtype=np.int32)

def label_to_numeric(label):
    return 0 if label == "low" else 1  # Map "low" to 0 and "high" to 1

for subfolder in os.listdir(GENERATED_DIR):
    subfolder_path = os.path.join(GENERATED_DIR, subfolder)

    if os.path.isdir(subfolder_path): 
        for file_name in os.listdir(subfolder_path):
            file_path = os.path.join(subfolder_path, file_name)

            if file_name.endswith(".png"):
                label = file_name.split("_")[-1].split(".")[0]  # Get "low" or "high"

                with Image.open(file_path) as img:
                    if img.mode == "RGBA":
                        img = img.convert("RGB")

                    img_array = np.array(img) / 255.0  

                    if img_array.shape != (256, 256, 3):
                        print(f"Skipping image due to unexpected shape: {img_array.shape}")
                        continue

                    image_data = np.vstack([image_data, img_array[np.newaxis, ...]])
                    labels = np.append(labels, label_to_numeric(label))



# Pytorch

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

class GeneratedDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = torch.tensor(self.images[idx], dtype=torch.float32).permute(2, 0, 1)  
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return image, label

dataset = GeneratedDataset(image_data, labels)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

for batch_images, batch_labels in dataloader:
    print(batch_images.shape, batch_labels.shape)
    break

# Tensorflow

In [74]:
import tensorflow as tf

dataset = tf.data.Dataset.from_tensor_slices((image_data, labels))

dataset = dataset.shuffle(len(image_data)).batch(32)
