In [11]:
import os
import shutil

def copy_matching_files(root_folder, annotation_subfolder, jpeg_subfolder, output_images_dir, output_masks_dir):
    # Create the output directories if they don't already exist
    os.makedirs(output_images_dir, exist_ok=True)
    os.makedirs(output_masks_dir, exist_ok=True)

    # Get a list of all subfolders in the annotation directory
    annotation_subfolder_path = os.path.join(root_folder, annotation_subfolder)
    subfolders = os.listdir(annotation_subfolder_path)

    # Initialize counter
    counter = 0

    # Loop over the subfolders
    for subfolder in subfolders:
        # Check if there is a corresponding subfolder in the JPEG directory
        jpeg_subfolder_path = os.path.join(root_folder, jpeg_subfolder, subfolder)
        if os.path.exists(jpeg_subfolder_path):
            # Get a list of all the files in the annotation subfolder
            annotation_files = os.listdir(os.path.join(annotation_subfolder_path, subfolder))

            # Loop over the annotation files
            for annotation_file in annotation_files:
                # Check if there is a corresponding image file
                image_filename, ext = os.path.splitext(annotation_file)
                image_path = os.path.join(jpeg_subfolder_path, image_filename + ".*")
                matching_image_files = [f for f in os.listdir(jpeg_subfolder_path) if f.startswith(image_filename)]

                if len(matching_image_files) > 0:
                    # Copy the image file to the output directory
                    for matching_image_file in matching_image_files:
                        # Get the file extension and construct the new filename
                        _, ext = os.path.splitext(matching_image_file)
                        new_filename = f"{counter:06d}{ext}"
                        
                        # Copy the image and annotation files to the output directories with the same name
                        shutil.copy(os.path.join(jpeg_subfolder_path, matching_image_file), os.path.join(output_images_dir, new_filename))
                        shutil.copy(os.path.join(annotation_subfolder_path, subfolder, annotation_file), os.path.join(output_masks_dir, f"{counter:06d}.png"))

                        counter += 1


In [12]:
root_folder = "/home/msccomputer/Desktop/water_v2/water_v2"
annotation_subfolder = "Annotations"
jpeg_subfolder = "JPEGImages"
output_images_dir = "/home/msccomputer/Desktop/Water_dataset/Images"
output_masks_dir = "/home/msccomputer/Desktop/Water_dataset/Annotations"

copy_matching_files(root_folder, annotation_subfolder, jpeg_subfolder, output_images_dir, output_masks_dir)



In [1]:
import os
import numpy as np
from PIL import Image

def modify_images(folder_path):
    # Get a list of all the files in the folder
    files = os.listdir(folder_path)

    # Loop over the files
    for file in files:
        # Check if the file is an image
        if file.endswith(".png") or file.endswith(".jpg") or file.endswith(".jpeg"):
            # Load the image
            image_path = os.path.join(folder_path, file)
            image = Image.open(image_path)

             # Check if the image is in RGBA mode
            if image.mode == 'RGBA':
              # Convert the image to RGB mode
              image = image.convert('RGB')

            # Convert the image to a NumPy array
            arr = np.array(image)

            # Modify the array
            arr = arr*255

            # Convert the array to grayscale image
            modified_image = Image.fromarray(arr).convert('L')

            # Save the modified image
            try:
              modified_image.save(image_path)
            except Exception as e:
              print("Error: ", file, ". Error-message: ", e)


In [2]:
output_masks_dir = "/home/msccomputer/Desktop/Water_dataset/Annotations"

modify_images(output_masks_dir)

In [22]:
import logging
import numpy as np
import torch
from PIL import Image
from functools import lru_cache
from functools import partial
from itertools import repeat
from multiprocessing import Pool
from os import listdir
from os.path import splitext, isfile, join
from pathlib import Path
from torch.utils.data import Dataset
from tqdm import tqdm


def unique_mask_values(mask_file):
    mask = np.asarray(load_image(mask_file))
    print(mask.shape)
    if mask.ndim == 2:
        return np.unique(mask)
    elif mask.ndim == 3:
        mask = mask.reshape(-1, mask.shape[-1])
        return np.unique(mask, axis=0)
    else:
        raise ValueError(f'Loaded masks should have 2 or 3 dimensions, found {mask.ndim}')
    
def load_image(filename):
    ext = splitext(filename)[1]
    if ext == '.npy':
        return Image.fromarray(np.load(filename))
    elif ext in ['.pt', '.pth']:
        return Image.fromarray(torch.load(filename).numpy())
    else:
        return Image.open(filename)

In [23]:
# unique_mask_values("/home/msccomputer/Downloads/train_masks/0cdf5b5d0ce1_08_mask.gif")

unique_mask_values("/home/msccomputer/Downloads/media_images_pred_384_2654463431696e21f2c4.png")

(149, 200, 4)


array([[  0,   0,   0, 255]], dtype=uint8)

In [None]:
import os
from PIL import Image

def print_non_rgb_images(folder_path):
    # Get a list of all the image files in the folder
    files = [f for f in os.listdir(folder_path) if f.endswith(".jpg") or f.endswith(".jpeg") or f.endswith(".png")]

    # Loop over the files
    for file in files:
        # Open the image
        image_path = os.path.join(folder_path, file)
        image = Image.open(image_path)

        # Check if the image has 3 dimensions
        if len(image.getbands()) != 3:
            print(f"{file} has {len(image.getbands())} dimensions")

print_non_rgb_images('/home/msccomputer/Desktop/Water_dataset/Images')


In [62]:
import os
from PIL import Image

def print_unique_resolutions(folder_path):
    # Get a list of all the image files in the folder
    files = [f for f in os.listdir(folder_path) if f.endswith(".jpg") or f.endswith(".jpeg") or f.endswith(".png")]

    # Loop over the files
    resolutions = set()
    num_small_resolutions = 0
    smallest_res =1e50
    for file in files:
        # Open the image
        image_path = os.path.join(folder_path, file)
        image = Image.open(image_path)

        # Get the resolution of the image
        resolution = image.size

        if(resolution[0] < 256 or resolution[1] < 256):
            print(file)
            print(resolution)

        # Add the resolution to the set
        resolutions.add(resolution)

        # Check if resolution is smaller than 256 in one axis
        if resolution[0] > 1536 or resolution[1] > 1536:
            num_small_resolutions += 1

        if resolution[0] < smallest_res or resolution[1] < smallest_res:
          smallest_res = min(resolution[0], resolution[1])

    # Print the unique resolutions
    print("Unique resolutions:")
    for res in resolutions:
        if(res[0] > 1536 or res[1] > 1536):
            print(res)

    # Print the number of images with resolution in one axis smaller than 256x256
    print("Number of images with resolution in one axis smaller than 256x256: {}".format(num_small_resolutions))

print_unique_resolutions('/home/msccomputer/Desktop/Water_dataset/val/images')


Unique resolutions:
(1920, 1080)
(1920, 1500)
(1605, 642)
Number of images with resolution in one axis smaller than 256x256: 10


In [63]:
import os
import random
import shutil
from PIL import Image


def split_data(annotations_folder, images_folder, test_percent, val_percent, train_percent, root_path):
    assert test_percent + val_percent + train_percent == 100, "The sum of test, validation, and train percentages must be 100"
    
    annotations_filenames = os.listdir(annotations_folder)
    image_filenames = os.listdir(images_folder)
    
    # Make sure there are the same number of annotation files and image files
    assert len(annotations_filenames) == len(image_filenames), "The number of annotation files and image files must be the same"

    annotations_filenames_fixed = []
    image_filenames_fixed = []

    for image_file in image_filenames:
      # Open the image
      image_path = os.path.join(images_folder, image_file)
      
      # Check if file format is JPG
      if image_file.endswith('.jpg') or image_file.endswith('.jpeg'):
          # Convert to PNG
          image = Image.open(image_path)
          image = image.convert('RGB')
          new_image_path = os.path.splitext(image_path)[0] + ".png"
          image.save(new_image_path)
          os.remove(image_path)
          image_path = new_image_path
      else:
          image = Image.open(image_path)

      # Get the resolution of the image
      resolution = image.size

      # Check if resolution is smaller than 256 in one axis
      if (resolution[0] >= 256 and resolution[1] >= 256):
          print("Skipping image {} with resolution {}".format(image_file, resolution))
          image_filenames_fixed.append(image_file)

          # Create the annotation file name
          annotation_file = os.path.splitext(image_file)[0] + ".png"
          annotations_filenames_fixed.append(annotation_file)


    annotations_filenames_fixed.sort()
    image_filenames_fixed.sort()
    
    # Create a list of (annotation_filename, image_filename) pairs
    filename_pairs = [(annotation_filename, image_filename) for annotation_filename, image_filename in zip(annotations_filenames_fixed, image_filenames_fixed) if os.path.splitext(annotation_filename)[0] == os.path.splitext(image_filename)[0]]
    
    # Shuffle the filename pairs to ensure a random split
    random.seed(42)
    random.shuffle(filename_pairs)
    
    num_examples = len(filename_pairs)
    num_test = int(num_examples * test_percent / 100)
    num_val = int(num_examples * val_percent / 100)
    num_train = num_examples - num_test - num_val
    
    test_pairs = filename_pairs[:num_test]
    val_pairs = filename_pairs[num_test:num_test+num_val]
    train_pairs = filename_pairs[num_test+num_val:]
    
    # Create new folders for the split data
    test_annotations_folder = os.path.join(root_path, "test/annotations")
    test_images_folder = os.path.join(root_path, "test/images")
    val_annotations_folder = os.path.join(root_path, "val/annotations")
    val_images_folder = os.path.join(root_path, "val/images")
    train_annotations_folder = os.path.join(root_path, "train/annotations")
    train_images_folder = os.path.join(root_path, "train/images")
    
    os.makedirs(test_annotations_folder, exist_ok=True)
    os.makedirs(test_images_folder, exist_ok=True)
    os.makedirs(val_annotations_folder, exist_ok=True)
    os.makedirs(val_images_folder, exist_ok=True)
    os.makedirs(train_annotations_folder, exist_ok=True)
    os.makedirs(train_images_folder, exist_ok=True)
    
    for annotation_filename, image_filename in test_pairs:
        src_annotation_path = os.path.join(annotations_folder, annotation_filename)
        dst_annotation_path = os.path.join(test_annotations_folder, annotation_filename)
        shutil.copy(src_annotation_path, dst_annotation_path)
        
        src_image_path = os.path.join(images_folder, image_filename)
        dst_image_path = os.path.join(test_images_folder, image_filename)
        shutil.copy(src_image_path, dst_image_path)
        
    for annotation_filename, image_filename in val_pairs:
        src_annotation_path = os.path.join(annotations_folder, annotation_filename)
        dst_annotation_path = os.path.join(val_annotations_folder, annotation_filename)
        shutil.copy(src_annotation_path, dst_annotation_path)
        
        src_image_path = os.path.join(images_folder, image_filename)
        dst_image_path = os.path.join(val_images_folder, image_filename)
        shutil.copy(src_image_path, dst_image_path)
        
    for annotation_filename, image_filename in train_pairs:
        src_annotation_path = os.path.join(annotations_folder, annotation_filename)
        dst_annotation_path = os.path.join(train_annotations_folder, annotation_filename)
        shutil.copy(src_annotation_path, dst_annotation_path)
        
        src_image_path = os.path.join(images_folder, image_filename)
        dst_image_path = os.path.join(train_images_folder, image_filename)
        shutil.copy(src_image_path, dst_image_path)


In [65]:
root_path = "/home/msccomputer/Desktop/Water_dataset"
annotations_folder = "/home/msccomputer/Desktop/Water_dataset/Annotations"
images_folder = "/home/msccomputer/Desktop/Water_dataset/Images"

test_percent = 20
val_percent = 10
train_percent = 70

split_data(annotations_folder, images_folder, test_percent, val_percent, train_percent, root_path)

Skipping image 001435.png with resolution (1500, 965)
Skipping image 002070.png with resolution (640, 480)
Skipping image 000559.png with resolution (500, 333)
Skipping image 000804.png with resolution (256, 256)
Skipping image 002020.png with resolution (500, 375)
Skipping image 001786.png with resolution (256, 256)
Skipping image 001398.png with resolution (1024, 768)
Skipping image 000202.png with resolution (1024, 768)
Skipping image 000177.png with resolution (296, 293)
Skipping image 001477.png with resolution (256, 256)
Skipping image 002249.png with resolution (1024, 683)
Skipping image 000095.png with resolution (1200, 800)
Skipping image 000252.png with resolution (640, 480)
Skipping image 000463.png with resolution (1024, 768)
Skipping image 001610.png with resolution (887, 665)
Skipping image 001386.png with resolution (256, 256)
Skipping image 001920.png with resolution (800, 514)
Skipping image 000420.png with resolution (1024, 768)
Skipping image 000440.png with resoluti

In [9]:
import cv2

image = cv2.imread('/home/msccomputer/Desktop/Water_dataset/train2/annotations/000002_aug_flip_left_right.png')

np.unique(image)

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  39,
       216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228,
       229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
       242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
       255], dtype=uint8)

In [6]:
from PIL import Image
import os

# set the directory path where the images are stored
dir_path = "/home/msccomputer/Desktop/Water_dataset/Annotations"

# loop through all files in the directory
for filename in os.listdir(dir_path):
    # check if the file is an image (you can modify this condition to suit your needs)
    if filename.endswith(".jpg") or filename.endswith(".png"):
        # open the image using Pillow
        img = Image.open(os.path.join(dir_path, filename))
        
        # convert the image to black and white
        bw_img = img.convert("L")
        
        # save the black and white image with the same filename
        bw_img.save(os.path.join(dir_path, filename))


In [11]:
from PIL import Image
import os

# define the path to the two folders with images
folder1_path = "/home/msccomputer/Desktop/Water_dataset/train2/annotations"
folder2_path = "/home/msccomputer/Desktop/Water_dataset/train2/images"

# define the augmentations to be applied
rotations = [0, 90, 180, 270]  # in degrees
flips = ["left_right", "top_bottom"]

# loop through the images in folder1
for filename in os.listdir(folder1_path):
    if filename.endswith(".jpg") or filename.endswith(".png"):  # assuming the images are either JPG or PNG
        # open the image and apply augmentations
        with Image.open(os.path.join(folder1_path, filename)) as img:
          for rotation in rotations:
              rotated_img = img.rotate(rotation)
              rotated_img.convert("L").save(os.path.join(folder1_path, filename[:-4] + f"_aug_rotate_{rotation}.png"))  # add postfix to filename and save as PNG
          for flip in flips:
              flipped_img = img.transpose(getattr(Image, f"FLIP_{flip.upper()}"))
              flipped_img.convert("L").save(os.path.join(folder1_path, filename[:-4] + f"_aug_flip_{flip}.png"))  # add postfix to filename and save as PNG
    
        with Image.open(os.path.join(folder2_path, filename)) as img:
          for rotation in rotations:
              rotated_img = img.rotate(rotation)
              rotated_img.save(os.path.join(folder2_path, filename[:-4] + f"_aug_rotate_{rotation}.png"))  # add postfix to filename and save as PNG
          for flip in flips:
              flipped_img = img.transpose(getattr(Image, f"FLIP_{flip.upper()}"))
              flipped_img.save(os.path.join(folder2_path, filename[:-4] + f"_aug_flip_{flip}.png"))  # add postfix to filename and save as PNG



  flipped_img = img.transpose(getattr(Image, f"FLIP_{flip.upper()}"))
  flipped_img = img.transpose(getattr(Image, f"FLIP_{flip.upper()}"))
  flipped_img = img.transpose(getattr(Image, f"FLIP_{flip.upper()}"))
  flipped_img = img.transpose(getattr(Image, f"FLIP_{flip.upper()}"))
