# Mixed Dataset

This is a jupyter notebook to easily create folders with mixed DL and CT images.

**Authors:** [Raphaël Achddou](https://people.epfl.ch/raphael.achddou) ([contact](mailto:raphael.achddou@epfl.ch)), [Paulo Ribeiro](https://people.epfl.ch/paulo.ribeirodecarvalho) ([contact](mailto:paulo.ribeirodecarvalho@epfl.ch))

In [None]:
!pip install cv2

In [1]:
import os
import cv2
import random
from tqdm.notebook import tqdm

SEED = 42
PERCENTAGES = [0.2, 0.4, 0.6, 0.8]

## Load DL and CT

First load all the DL and CT images.

In [2]:
# Specify the directory containing the .png images
dl_folder = "train/dl_images_texture"
ct_folder = "train/ct_images"

# Iterate over all files in the directory
def load_images_in_folder(folder_path: str):
    images = []
    count = 0
    for filename in tqdm(os.listdir(folder_path)):
        # Check if the file is a .png image
        count += 1
        if filename.endswith(".png"):
            # Construct the full file path
            file_path = os.path.join(folder_path, filename)
            # Open the image file
            image = cv2.imread(file_path)
            # Append the image to the list
            if image is not None:
                images.append(image)

    return images

dl_images = load_images_in_folder(folder_path=dl_folder)
print(f"Loaded {len(dl_images)} DL images.")
ct_images = load_images_in_folder(folder_path=ct_folder)
print(f"Loaded {len(ct_images)} CT images.")

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/opt/anaconda3/envs/research_sp/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code
  File "/var/folders/tn/31pmfl0n667ghvsfkxf6nxyh0000gn/T/ipykernel_82516/1435081786.py", line 22, in <module>
    dl_images = load_images_in_folder(folder_path=dl_folder)
  File "/var/folders/tn/31pmfl0n667ghvsfkxf6nxyh0000gn/T/ipykernel_82516/1435081786.py", line 16, in load_images_in_folder
    image = Image.open(file_path)
  File "/opt/anaconda3/envs/research_sp/lib/python3.10/site-packages/PIL/Image.py", line 3274, in open
  File "/opt/anaconda3/envs/research_sp/lib/python3.10/posixpath.py", line 397, in realpath
  File "/opt/anaconda3/envs/research_sp/lib/python3.10/posixpath.py", line 384, in abspath
OSError: [Errno 24] Too many open files

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/anaconda3/envs/research_sp/lib/python3.10/site-packages/IPython/co

## Mixed Dataset

Compute the mixed dataset and store it in the disk.

In [5]:
# Fix the seed for reproducibility
random.seed(SEED)

# Create the mixed datasets
def create_mixed_dataset(ct_images: list, dl_images: list, percentage: float):
    base_folder_name = "train/mixed_dl_ct_"

    for percentage in PERCENTAGES:
        # Start creating the directory
        current_folder_name = base_folder_name + f'{int(percentage*100)}%'
        os.makedirs(current_folder_name, exist_ok=True)
        # Choose randomly the percentage of DL and CT images
        total_images = len(dl_images)
        dl_number = int(total_images * percentage)
        ct_number = total_images - dl_number
        batch_ct_images = random.sample(ct_images, ct_number)
        batch_dl_images = random.sample(dl_images, dl_number)
        mixed_images = batch_ct_images + batch_dl_images
        random.shuffle(mixed_images)  # Shuffle the mixed images

        # Store the mixed images in disk
        for idx, image in tqdm(enumerate(mixed_images)):
            cv2.imwrite(os.path.join(current_folder_name, f"image_{idx:05}.png"), image)

create_mixed_dataset(ct_images=ct_images, dl_images=dl_images, percentage=PERCENTAGES)

ValueError: Operation on closed image