# CNN for placton classification

In [4]:
from google.colab import drive
import os
drive.mount('/content/drive')

Mounted at /content/drive


# Data Augmentation


## Image rotation adding colored padding

In [8]:
from collections import Counter

def get_background_color(img):
    """
    Determines the background color of the image by analyzing the border pixels.

    Parameters:
    - img (PIL.Image.Image): Opened and converted image in RGB.

    Returns:
    - background_color (tuple): Determined background color (R, G, B).
    """
    pixels = []
    width, height = img.size

    # Get pixels from the top and bottom borders
    for x in range(width):
        pixels.append(img.getpixel((x, 0)))           # Top border
        pixels.append(img.getpixel((x, height - 1)))  # Bottom border

    # Get pixels from the left and right borders
    for y in range(height):
        pixels.append(img.getpixel((0, y)))           # Left border
        pixels.append(img.getpixel((width - 1, y)))   # Right border

    # Count the colors and find the most common one
    color_counts = Counter(pixels)
    background_color = color_counts.most_common(1)[0][0]

    return background_color


In [9]:
import os
from PIL import Image, ImageFile, ImageOps
from collections import Counter
from tqdm import tqdm

# To handle truncated images
ImageFile.LOAD_TRUNCATED_IMAGES = True

def get_resampling_methods():
    """
    Determines the available interpolation methods based on the Pillow version.

    Returns:
    - A list of interpolation methods in order of preference, excluding LANCZOS.
    """
    try:
        # Pillow 10.x and later
        from PIL import Image
        if hasattr(Image, 'Resampling'):
            return [Image.Resampling.BICUBIC, Image.Resampling.BILINEAR, Image.Resampling.NEAREST]
        else:
            return [Image.BICUBIC, Image.BILINEAR, Image.NEAREST]
    except AttributeError:
        # Pillow 9.x and earlier
        return [Image.BICUBIC, Image.BILINEAR, Image.NEAREST]



def augment_dataset_with_high_quality_rotations_colored(dataset_path, n, image_extensions=None, max_rotation_degree=360):
    """
    Augments images in each subfolder of the dataset by rotating them with smaller angles to ensure
    at least 'n' images per subfolder, using high-quality rotation to minimize artifacts.

    Parameters:
    - dataset_path (str): Path to the dataset directory containing subfolders with images.
    - n (int): Minimum number of images required per subfolder after augmentation.
    - image_extensions (set, optional): Set of image file extensions to consider.
                                         Defaults to common image formats.
    - max_rotation_degree (int, optional): Maximum degree for rotation (default is 360).
    """
    if image_extensions is None:
        # Define common image extensions
        image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.gif'}

    # Get the list of interpolation methods in order of preference
    resampling_methods = get_resampling_methods()

    # Iterate through each subfolder in the dataset path
    for subfolder in tqdm(os.listdir(dataset_path), desc="Subfolders"):
        subfolder_path = os.path.join(dataset_path, subfolder)

        # Ensure it is a directory
        if not os.path.isdir(subfolder_path):
            continue

        # List all image files in the subfolder
        images = [f for f in os.listdir(subfolder_path)
                  if os.path.isfile(os.path.join(subfolder_path, f)) and
                  os.path.splitext(f.lower())[1] in image_extensions]

        m = len(images)  # Current number of images

        print(f"\nProcessing folder: {subfolder} | Current images: {m}")

        if m == 0:
            print(f"Warning: No images found in folder '{subfolder}'. Skipping augmentation.")
            continue

        if m >= n:
            print(f"Folder '{subfolder}' already has {m} images. Skipping augmentation.")
            continue

        # Calculate the total number of additional images needed
        delta = n - m

        # Calculate the number of rotations per image
        # Distribute rotations as evenly as possible
        rotations_per_image = delta // m
        extra_rotations = delta % m  # Some images will have an additional rotation

        print(f"Each image will be rotated {rotations_per_image} times.")
        if extra_rotations > 0:
            print(f"{extra_rotations} images will be rotated an additional time.")

        for idx, image_name in enumerate(images):
            image_path = os.path.join(subfolder_path, image_name)
            try:
                with Image.open(image_path).convert('RGB') as img:
                    # Determine the background color
                    background_color = get_background_color(img)

                    # Determine the number of rotations for this image
                    num_rotations = rotations_per_image + (1 if idx < extra_rotations else 0)

                    if num_rotations == 0:
                        continue  # No rotation needed for this image

                    # Calculate the rotation step angle
                    # To ensure variety, divide max_rotation_degree by the number of rotations +1
                    step_angle = max_rotation_degree / (num_rotations + 1)

                    for rot in range(1, num_rotations + 1):
                        # Calculate the rotation angle
                        angle = (rot * step_angle) % max_rotation_degree

                        # Try rotating with available interpolation methods, using fillcolor
                        rotated_img = None
                        for method in resampling_methods:
                            try:
                                # Check if the rotation method supports 'fillcolor'
                                rotated_img = img.rotate(angle, resample=method, expand=True, fillcolor=background_color)
                                print(f"Rotated with method {method} and fillcolor.")
                                break  # If rotation succeeds, exit the loop
                            except TypeError:
                                # If 'fillcolor' is not supported, rotate without fillcolor and add the fill manually
                                rotated_img = img.rotate(angle, resample=method, expand=True)
                                # Create a new image with the background color
                                background = Image.new(rotated_img.mode, rotated_img.size, background_color)
                                # Paste the rotated image onto the new background image
                                if rotated_img.mode == 'RGBA':
                                    background.paste(rotated_img, mask=rotated_img.split()[3])  # Use the alpha channel as mask
                                else:
                                    background.paste(rotated_img)
                                rotated_img = background
                                print(f"Rotated with method {method} without fillcolor, added background manually.")
                                break  # Exit the loop after manually adding the fill
                            except Exception as e:
                                print(f"Unexpected error with method {method}: {e}")
                                continue  # Try the next method

                        if rotated_img is None:
                            print(f"Failed to rotate image '{image_name}' with angle {angle}. Skipping.")
                            continue

                        # Create a new name for the rotated image
                        base, ext = os.path.splitext(image_name)
                        new_image_name = f"{base}_rot{int(angle)}{ext}"
                        new_image_path = os.path.join(subfolder_path, new_image_name)

                        # Avoid overwriting existing images
                        counter = 1
                        while os.path.exists(new_image_path):
                            new_image_name = f"{base}_rot{int(angle)}_{counter}{ext}"
                            new_image_path = os.path.join(subfolder_path, new_image_name)
                            counter += 1
                            if counter > 100:
                                print(f"Too many duplicates for image '{image_name}'. Skipping this rotation.")
                                rotated_img = None
                                break

                        if rotated_img is None:
                            continue

                        # Save the rotated image with high quality
                        if ext.lower() in ['.jpg', '.jpeg']:
                            rotated_img.save(new_image_path, quality=95)
                        else:
                            rotated_img.save(new_image_path)

                        print(f"Saved rotated image: {new_image_name}")

            except Exception as e:
                print(f"Error processing image '{image_name}': {e}")

        # Optional: Verify the number of images after augmentation
        new_images = [f for f in os.listdir(subfolder_path)
                     if os.path.isfile(os.path.join(subfolder_path, f)) and
                     os.path.splitext(f.lower())[1] in image_extensions]
        new_m = len(new_images)
        print(f"After augmentation, folder '{subfolder}' has {new_m} images.\n")




In [10]:
import os
from PIL import Image, ImageFile, ImageOps
from collections import Counter
from tqdm import tqdm
from multiprocessing import Pool, cpu_count
import traceback

# Per gestire immagini troncate
ImageFile.LOAD_TRUNCATED_IMAGES = True



def process_image(args):
    """
    Elabora una singola immagine: ridimensiona mantenendo l'aspect ratio e aggiunge padding dinamico.

    Parametri:
    - args (tuple): Tuple contenente (input_image_path, target_size).

    Ritorna:
    - None o messaggio di errore.
    """
    input_image_path, target_size = args
    try:
        with Image.open(input_image_path).convert('RGB') as img:
            # Determina il colore di sfondo
            background_color = get_background_color(img)

            # Determina il metodo di resampling
            try:
                resample_method = Image.Resampling.LANCZOS
            except AttributeError:
                # Per versioni più vecchie di Pillow
                resample_method = Image.ANTIALIAS

            # Calcola il rapporto per mantenere l'aspect ratio
            ratio = min(target_size[0] / img.width, target_size[1] / img.height)
            new_size = (int(img.width * ratio), int(img.height * ratio))

            # Ridimensiona l'immagine
            img_resized = img.resize(new_size, resample=resample_method)

            # Calcola il padding necessario
            delta_w = target_size[0] - new_size[0]
            delta_h = target_size[1] - new_size[1]
            padding = (
                delta_w // 2,             # Padding sinistro
                delta_h // 2,             # Padding superiore
                delta_w - (delta_w // 2), # Padding destro
                delta_h - (delta_h // 2)  # Padding inferiore
            )

            # Aggiungi il padding con il colore di sfondo
            img_padded = ImageOps.expand(img_resized, padding, fill=background_color)

            # Sovrascrivi l'immagine originale con quella preelaborata
            img_padded.save(input_image_path)

        return None  # Nessun errore

    except Exception as e:
        error_message = f"Error processing {input_image_path}: {e}"
        # Opzionale: stampa lo stack trace per debug
        traceback_str = traceback.format_exc()
        return f"{error_message}\n{traceback_str}"


def preprocess_dataset_with_dynamic_padding_colored(input_dir, target_size=(224, 224)):
    """
    Preprocessa un dataset di immagini ridimensionando ciascuna immagine mantenendo l'aspect ratio
    e aggiungendo padding con il colore di sfondo dell'immagine. Le immagini preelaborate sovrascrivono
    le immagini originali, mantenendo la struttura delle sottocartelle.

    Parametri:
    - input_dir (str): Percorso alla directory del dataset originale.
    - target_size (tuple): Dimensione desiderata per le immagini (larghezza, altezza).

    Ritorna:
    - None
    """
    # Raccogli tutti i percorsi delle immagini
    image_paths = []
    classes = [d for d in os.listdir(input_dir) if os.path.isdir(os.path.join(input_dir, d))]

    for class_name in classes:
        class_input_path = os.path.join(input_dir, class_name)
        # Filtra solo i file con estensioni comuni di immagini
        valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.gif')
        image_files = [
            os.path.join(class_input_path, f)
            for f in os.listdir(class_input_path)
            if os.path.isfile(os.path.join(class_input_path, f)) and f.lower().endswith(valid_extensions)
        ]
        image_paths.extend([(path, target_size) for path in image_files])

    print(f"Totale immagini da processare: {len(image_paths)}")

    # Determina il numero di processi da utilizzare
    num_processes = cpu_count() - 1 if cpu_count() > 1 else 1  # Lascia un CPU libero
    print(f"Utilizzo di {num_processes} processi per la preelaborazione delle immagini.")

    # Utilizza multiprocessing Pool per elaborare le immagini in parallelo
    with Pool(processes=num_processes) as pool:
        # Utilizza tqdm per mostrare la barra di avanzamento
        for result in tqdm(pool.imap_unordered(process_image, image_paths), total=len(image_paths), desc="Elaborazione Immagini"):
            if result is not None:
                print(result)  # Stampa eventuali errori

    print("Preelaborazione in loco con padding dinamico completata!")




In [11]:
dataset_directory = '/content/drive/MyDrive/CNN-CNR/Datasets/final-200'
minimum_images = 200

augment_dataset_with_high_quality_rotations_colored(dataset_directory, minimum_images)



Subfolders:   0%|          | 0/25 [00:00<?, ?it/s]


Processing folder: Chaetoceros spp. | Current images: 200
Folder 'Chaetoceros spp.' already has 200 images. Skipping augmentation.

Processing folder: Leptocylindrus danicus | Current images: 51
Each image will be rotated 2 times.
47 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2013_001_083957_00360_rot90.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2013_001_083957_00360_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2013_001_083957_00360_rot270.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2013_011_174553_01017_rot90.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2013_011_174553_01017_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2013_011_174553_01017_rot270.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2013_011_174553

Subfolders:   8%|▊         | 2/25 [00:02<00:34,  1.49s/it]

Saved rotated image: D20240912T092716_IFCB182_00074_rot120.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T092716_IFCB182_00074_rot240.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T094948_IFCB182_00045_rot120.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T094948_IFCB182_00045_rot240.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T094948_IFCB182_00357_rot120.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T094948_IFCB182_00357_rot240.png
After augmentation, folder 'Leptocylindrus danicus' has 200 images.


Processing folder: Und. pennate diatoms >20 | Current images: 24
Each image will be rotated 7 times.
8 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T094948_IFCB182_00175_rot40.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T094948_IFCB182_00175_rot80.png
Rotated with method 3 a

Subfolders:  12%|█▏        | 3/25 [00:06<00:51,  2.34s/it]

After augmentation, folder 'Und. pennate diatoms >20' has 200 images.


Processing folder: Gymnodinium spp. <20 | Current images: 104
Each image will be rotated 0 times.
96 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: D20210713T120326_IFCB114_00495_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20210706T120205_IFCB114_00430_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20210608T115904_IFCB114_00551_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20210519T120718_IFCB114_01683_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20210810T120255_IFCB114_04236_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20210706T120205_IFCB114_02165_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20210519T120718_IFCB114_02120_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20210810T051824_IFCB114_0

Subfolders:  16%|█▌        | 4/25 [00:08<00:46,  2.21s/it]

After augmentation, folder 'Gymnodinium spp. <20' has 200 images.


Processing folder: Chaetoceros tortissimius | Current images: 111
Each image will be rotated 0 times.
89 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00357_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00341_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00140_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00173_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00034_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00085_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00102_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_0

Subfolders:  20%|██        | 5/25 [00:11<00:51,  2.57s/it]

After augmentation, folder 'Chaetoceros tortissimius' has 200 images.


Processing folder: Bacteriastrum jadranum | Current images: 22
Each image will be rotated 8 times.
2 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00006_rot36.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00006_rot72.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00006_rot108.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00006_rot144.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00006_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00006_rot216.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00006_rot252.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_000

Subfolders:  24%|██▍       | 6/25 [00:18<01:14,  3.94s/it]

Saved rotated image: D20240912T090443_IFCB182_00225_rot240.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T090443_IFCB182_00225_rot280.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T090443_IFCB182_00225_rot320.png
After augmentation, folder 'Bacteriastrum jadranum' has 200 images.


Processing folder: Thalassiosira spp. >10 | Current images: 16
Each image will be rotated 11 times.
8 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_150_203337_01094_rot27.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_150_203337_01094_rot55.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_150_203337_01094_rot83.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_150_203337_01094_rot110.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_150_203337_01094_rot138

Subfolders:  28%|██▊       | 7/25 [00:22<01:10,  3.92s/it]

Saved rotated image: Copia di IFCB1_2014_156_180619_03516_rot150.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_156_180619_03516_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_156_180619_03516_rot210.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_156_180619_03516_rot240.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_156_180619_03516_rot270.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_156_180619_03516_rot300.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_156_180619_03516_rot330.png
After augmentation, folder 'Thalassiosira spp. >10' has 200 images.


Processing folder: Pleurosigma spp. | Current images: 200
Folder 'Pleurosigma spp.' already has 200 images. Skipping augmentation.

Processing folder: Bacteriastrum furcatum | Current images: 15
Each image will be rotated 12 

Subfolders:  36%|███▌      | 9/25 [00:33<01:16,  4.77s/it]

Saved rotated image: D20221018T081629_IFCB182_00474_rot332.png
After augmentation, folder 'Bacteriastrum furcatum' has 200 images.


Processing folder: Syracosphaera pulcra | Current images: 28
Each image will be rotated 6 times.
4 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_01284_rot45.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_01284_rot90.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_01284_rot135.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_01284_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_01284_rot225.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_01284_rot270.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_01284_rot315.png
Rotated with method 3 and f

Subfolders:  40%|████      | 10/25 [00:36<01:03,  4.20s/it]

Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00672_rot154.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00672_rot205.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00672_rot257.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00672_rot308.png
After augmentation, folder 'Syracosphaera pulcra' has 200 images.


Processing folder: Chaetoceros socialis | Current images: 118
Each image will be rotated 0 times.
82 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00080_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00314_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00129_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00488

Subfolders:  44%|████▍     | 11/25 [00:38<00:52,  3.75s/it]

After augmentation, folder 'Chaetoceros socialis' has 200 images.


Processing folder: Cerataulina pelagica | Current images: 32
Each image will be rotated 5 times.
8 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00110_rot51.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00110_rot102.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00110_rot154.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00110_rot205.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00110_rot257.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00110_rot308.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00715_rot51.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_00715_rot

Subfolders:  48%|████▊     | 12/25 [00:43<00:52,  4.01s/it]

After augmentation, folder 'Cerataulina pelagica' has 200 images.


Processing folder: Guinardia striata | Current images: 42
Each image will be rotated 3 times.
32 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_002_201555_00734_rot72.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_002_201555_00734_rot144.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_002_201555_00734_rot216.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_002_201555_00734_rot288.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_002_203908_01571_rot72.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_002_203908_01571_rot144.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_002_203908_01571_rot216.png
Rotated with method 3 and fillcolor.
Saved rotated i

Subfolders:  52%|█████▏    | 13/25 [00:49<00:55,  4.61s/it]

After augmentation, folder 'Guinardia striata' has 200 images.


Processing folder: Dactyliosolen fragilissimus | Current images: 46
Each image will be rotated 3 times.
16 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_150_192358_02647_rot72.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_150_192358_02647_rot144.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_150_192358_02647_rot216.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_150_192358_02647_rot288.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_150_192358_01914_rot72.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_150_192358_01914_rot144.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_150_192358_01914_rot216.png
Rotated with method 3 and fillcolor.
Saved ro

Subfolders:  56%|█████▌    | 14/25 [00:56<00:57,  5.21s/it]

Saved rotated image: Copia di IFCB1_2014_150_203337_00029_rot270.png
After augmentation, folder 'Dactyliosolen fragilissimus' has 200 images.


Processing folder: Eucampia cornuta | Current images: 62
Each image will be rotated 2 times.
14 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_120213_04132_rot90.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_120213_04132_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_120213_04132_rot270.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_120213_00563_rot90.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_120213_00563_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_120213_00563_rot270.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_1

Subfolders:  60%|██████    | 15/25 [01:02<00:55,  5.50s/it]

Saved rotated image: Copia di IFCB1_2014_304_182116_01467_rot120.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB1_2014_304_182116_01467_rot240.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_113906_04898_rot120.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_113906_04898_rot240.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_248_010423_05722_rot120.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_248_010423_05722_rot240.png
After augmentation, folder 'Eucampia cornuta' has 200 images.


Processing folder: Chaetoceros diversus | Current images: 83
Each image will be rotated 1 times.
34 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T092716_IFCB182_00043_rot120.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T092716_IFCB182_00043_rot24

Subfolders:  64%|██████▍   | 16/25 [01:04<00:40,  4.53s/it]

After augmentation, folder 'Chaetoceros diversus' has 200 images.


Processing folder: Leptocylindrus mediterraneus | Current images: 33
Each image will be rotated 5 times.
2 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_122510_05753_rot51.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_122510_05753_rot102.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_122510_05753_rot154.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_122510_05753_rot205.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_122510_05753_rot257.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_122510_05753_rot308.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_122510_01836_rot51.png
Rotated with method 3 and fillcolor.
Saved

Subfolders:  68%|██████▊   | 17/25 [01:14<00:48,  6.06s/it]

Saved rotated image: Copia di IFCB5_2014_259_113906_02823_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_113906_02823_rot240.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_113906_02823_rot300.png
After augmentation, folder 'Leptocylindrus mediterraneus' has 200 images.


Processing folder: Asterionellopsis glacialis | Current images: 31
Each image will be rotated 5 times.
14 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: IFCB5_2013_167_152816_04118_rot51.png
Rotated with method 3 and fillcolor.
Saved rotated image: IFCB5_2013_167_152816_04118_rot102.png
Rotated with method 3 and fillcolor.
Saved rotated image: IFCB5_2013_167_152816_04118_rot154.png
Rotated with method 3 and fillcolor.
Saved rotated image: IFCB5_2013_167_152816_04118_rot205.png
Rotated with method 3 and fillcolor.
Saved rotated image: IFCB5_2013_167_152816_04118_rot257.png
Rotated wi

Subfolders:  72%|███████▏  | 18/25 [01:22<00:46,  6.69s/it]

Saved rotated image: D20221018T081629_IFCB182_00937_rot300.png
Rotated with method 3 and fillcolor.
Saved rotated image: IFCB5_2013_172_193903_00899_rot60.png
Rotated with method 3 and fillcolor.
Saved rotated image: IFCB5_2013_172_193903_00899_rot120.png
Rotated with method 3 and fillcolor.
Saved rotated image: IFCB5_2013_172_193903_00899_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: IFCB5_2013_172_193903_00899_rot240.png
Rotated with method 3 and fillcolor.
Saved rotated image: IFCB5_2013_172_193903_00899_rot300.png
After augmentation, folder 'Asterionellopsis glacialis' has 200 images.


Processing folder: Chaetoceros diadema | Current images: 21
Each image will be rotated 8 times.
11 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T090443_IFCB182_00459_rot36.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T090443_IFCB182_00459_rot72.png
Rotated with method 3 and fillcolor.
Sa

Subfolders:  76%|███████▌  | 19/25 [01:35<00:50,  8.44s/it]


Saved rotated image: D20221018T081629_IFCB182_01094_rot280.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20221018T081629_IFCB182_01094_rot320.png
After augmentation, folder 'Chaetoceros diadema' has 200 images.


Processing folder: Guinardia flaccida | Current images: 45
Each image will be rotated 3 times.
20 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_315_135823_04146_rot72.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_315_135823_04146_rot144.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_315_135823_04146_rot216.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_315_135823_04146_rot288.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_120213_04738_rot72.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_259_120213_04738_rot14

Subfolders:  80%|████████  | 20/25 [01:42<00:40,  8.14s/it]

Saved rotated image: D20221018T081629_IFCB182_00764_rot270.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_016_195725_01866_rot90.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_016_195725_01866_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di IFCB5_2014_016_195725_01866_rot270.png
After augmentation, folder 'Guinardia flaccida' has 200 images.


Processing folder: Und. nanoflagellates | Current images: 200
Folder 'Und. nanoflagellates' already has 200 images. Skipping augmentation.

Processing folder: Und. naked dinoflagellates <20 | Current images: 71
Each image will be rotated 1 times.
58 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: D20220914T091954_IFCB182_00026_rot120.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20220914T091954_IFCB182_00026_rot240.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20

Subfolders:  88%|████████▊ | 22/25 [01:44<00:14,  4.83s/it]

Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T094948_IFCB182_00088_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T094948_IFCB182_00195_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T092716_IFCB182_00472_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T092716_IFCB182_00317_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T094948_IFCB182_00173_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T094948_IFCB182_00393_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T094948_IFCB182_00341_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T094948_IFCB182_00295_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T094948_IFCB182_00307_rot180.png
After augmentation, folder 'Und. naked dinoflagellates <20' has 200 images.


Processing folder: Mer

Subfolders:  92%|█████████▏| 23/25 [01:46<00:08,  4.11s/it]

Saved rotated image: Copia di Merismopedia_sp_33_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di Merismopedia_sp_83_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di Merismopedia_sp_78_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di Merismopedia_sp_97_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di Merismopedia_sp_35_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di Merismopedia_sp_68_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: Copia di Merismopedia_sp_72_rot180.png
After augmentation, folder 'Merismopedia spp.' has 200 images.


Processing folder: Thalassiosira pseudonana | Current images: 23
Each image will be rotated 7 times.
16 images will be rotated an additional time.
Rotated with method 3 and fillcolor.
Saved rotated image: D20240628T081006_IFCB182_00257_rot40.png
Rotated with method 3 and fillcolor.
Saved rota

Subfolders:  96%|█████████▌| 24/25 [01:48<00:03,  3.58s/it]

Saved rotated image: D20240628T081006_IFCB182_00408_rot315.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T090443_IFCB182_00085_rot45.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T090443_IFCB182_00085_rot90.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T090443_IFCB182_00085_rot135.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T090443_IFCB182_00085_rot180.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T090443_IFCB182_00085_rot225.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T090443_IFCB182_00085_rot270.png
Rotated with method 3 and fillcolor.
Saved rotated image: D20240912T090443_IFCB182_00085_rot315.png
After augmentation, folder 'Thalassiosira pseudonana' has 200 images.


Processing folder: Cylindrotheca closterium | Current images: 39
Each image will be rotated 4 times.
5 images will be rotated an additional time.
Rotated with method 3

Subfolders: 100%|██████████| 25/25 [01:53<00:00,  4.53s/it]

Saved rotated image: IFCB1_2014_150_194713_01849_rot288.png
After augmentation, folder 'Cylindrotheca closterium' has 200 images.






In [12]:
preprocess_dataset_with_dynamic_padding_colored(dataset_directory)

Totale immagini da processare: 5000
Utilizzo di 1 processi per la preelaborazione delle immagini.


Elaborazione Immagini: 100%|██████████| 5000/5000 [02:40<00:00, 31.18it/s]

Preelaborazione in loco con padding dinamico completata!





## Take the class the have at least n images

In [None]:
import os
import shutil



def create_filtered_dataset(input_dir, output_dir, min_count):
    """
    Create a new dataset containing only classes (subdirectories) that have at least a minimum number of elements (images).

    :param input_dir: The directory of the original dataset. It should contain subdirectories corresponding to classes.
    :param output_dir: The directory where the filtered dataset will be created.
    :param min_count: The minimum number of elements (images) that a class must contain to be included.
    """
    # Ensure the output directory exists, if not create it
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Iterate over each subdirectory in the input dataset
    for class_name in os.listdir(input_dir):
        class_path = os.path.join(input_dir, class_name)

        # Check if it's a directory (class folder)
        if os.path.isdir(class_path):
            # Get the list of elements in the class directory
            elements = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]

            # Check if the number of elements meets the minimum count requirement
            if len(elements) >= min_count:
                # Create the class directory in the output dataset
                target_class_path = os.path.join(output_dir, class_name)
                if not os.path.exists(target_class_path):
                    os.makedirs(target_class_path)

                # Copy all elements from the original class directory to the filtered dataset
                for element in elements:
                    src_path = os.path.join(class_path, element)
                    dst_path = os.path.join(target_class_path, element)
                    shutil.copy(src_path, dst_path)

    print(f"Filtered dataset created successfully at {output_dir}!")



input_dir ='/content/drive/MyDrive/CNN-CNR/backup/Dataset-plancton-cnr-original-final-version-AUGMENTED'

output_file = "/content/drive/MyDrive/CNN-CNR/Datasets/final-15"


#create_filtered_dataset(input_dir, output_file, 15)

## Creation of a subset dataset

In [7]:
import os
import shutil
import random
from tqdm import tqdm

def create_subset_dataset(original_path, new_path, n, seed=None, extensions=None):
    """
    Creates a new dataset containing up to 'n' images per class from the original dataset.

    :param original_path: Path to the original dataset.
    :param new_path: Path where the new subset dataset will be saved.
    :param n: Number of images to select per class.
    :param seed: (Optional) Seed for reproducibility of random selection.
    :param extensions: (Optional) List of file extensions to consider as images (e.g., ['.jpg', '.png']).
    """
    # Set the seed for reproducibility if provided
    if seed is not None:
        random.seed(seed)

    # Define default image extensions if none are provided
    if extensions is None:
        extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif']

    # Check if the original path exists
    if not os.path.exists(original_path):
        raise FileNotFoundError(f"The original path '{original_path}' does not exist.")

    # Create the new dataset directory if it doesn't exist
    os.makedirs(new_path, exist_ok=True)

    # List all classes (subdirectories) in the original dataset
    classes = [d for d in os.listdir(original_path) if os.path.isdir(os.path.join(original_path, d))]

    # Iterate over each class with a progress bar
    for class_name in tqdm(classes, desc="Processing classes"):
        original_class_path = os.path.join(original_path, class_name)

        # List all files in the class directory and filter by the specified extensions
        images = [
            img for img in os.listdir(original_class_path)
            if os.path.isfile(os.path.join(original_class_path, img)) and
               os.path.splitext(img)[1].lower() in extensions
        ]

        # Check if there are enough images; if not, use all available images
        if len(images) < n:
            print(f"Warning: Class '{class_name}' has only {len(images)} images. All will be copied.")
            selected_images = images
        else:
            selected_images = random.sample(images, n)

        # Create the class directory in the new dataset
        new_class_path = os.path.join(new_path, class_name)
        os.makedirs(new_class_path, exist_ok=True)

        # Copy the selected images to the new class directory
        for img in selected_images:
            src = os.path.join(original_class_path, img)
            dst = os.path.join(new_class_path, img)
            shutil.copy2(src, dst)

    print(f"New dataset created at '{new_path}' with up to {n} images per class.")


original_path = '/content/drive/MyDrive/CNN-CNR/Datasets/final-200-v2'
new_path = '/content/drive/MyDrive/CNN-CNR/Datasets/final-200'
n = 200
seed = 42

create_subset_dataset(original_path, new_path, n, seed=None, extensions=None)

Processing classes:   4%|▍         | 1/25 [00:10<04:11, 10.46s/it]



Processing classes:   8%|▊         | 2/25 [00:27<05:31, 14.42s/it]



Processing classes:  12%|█▏        | 3/25 [00:34<03:59, 10.89s/it]



Processing classes:  16%|█▌        | 4/25 [00:36<02:38,  7.54s/it]



Processing classes:  20%|██        | 5/25 [00:39<01:56,  5.83s/it]



Processing classes:  24%|██▍       | 6/25 [00:46<01:55,  6.08s/it]



Processing classes:  32%|███▏      | 8/25 [00:56<01:35,  5.59s/it]



Processing classes:  36%|███▌      | 9/25 [01:01<01:25,  5.36s/it]



Processing classes:  40%|████      | 10/25 [01:08<01:32,  6.16s/it]



Processing classes:  44%|████▍     | 11/25 [01:12<01:13,  5.24s/it]



Processing classes:  48%|████▊     | 12/25 [01:20<01:20,  6.19s/it]



Processing classes:  52%|█████▏    | 13/25 [01:31<01:32,  7.71s/it]



Processing classes:  56%|█████▌    | 14/25 [01:43<01:39,  9.04s/it]



Processing classes:  60%|██████    | 15/25 [02:00<01:54, 11.48s/it]



Processing classes:  64%|██████▍   | 16/25 [02:24<02:17, 15.26s/it]



Processing classes:  68%|██████▊   | 17/25 [02:33<01:45, 13.25s/it]



Processing classes:  72%|███████▏  | 18/25 [02:43<01:25, 12.16s/it]



Processing classes:  76%|███████▌  | 19/25 [02:50<01:04, 10.75s/it]



Processing classes:  84%|████████▍ | 21/25 [03:08<00:38,  9.66s/it]



Processing classes:  88%|████████▊ | 22/25 [03:27<00:37, 12.44s/it]



Processing classes:  92%|█████████▏| 23/25 [03:30<00:18,  9.44s/it]



Processing classes:  96%|█████████▌| 24/25 [03:36<00:08,  8.56s/it]



Processing classes: 100%|██████████| 25/25 [03:46<00:00,  9.06s/it]

New dataset created at '/content/drive/MyDrive/CNN-CNR/Datasets/final-200' with up to 200 images per class.





## SPLIT DATASET

In [14]:
import os
import shutil

def split_dataset_train_test(data_dir, output_dir, train_ratio=0.8, test_ratio=0.2):
    for split in ['train', 'test']:
        split_dir = os.path.join(output_dir, split)
        if not os.path.exists(split_dir):
            os.makedirs(split_dir)

    for class_name in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_dir):
            continue

        for split in ['train', 'test']:
            split_class_dir = os.path.join(output_dir, split, class_name)
            if not os.path.exists(split_class_dir):
                os.makedirs(split_class_dir)

        images = os.listdir(class_dir)
        images = [img for img in images if os.path.isfile(os.path.join(class_dir, img))]

        total_images = len(images)
        train_size = int(train_ratio * total_images)

        train_images = images[:train_size]
        test_images = images[train_size:]

        for img in train_images:
            src = os.path.join(class_dir, img)
            dst = os.path.join(output_dir, 'train', class_name, img)
            shutil.copyfile(src, dst)

        for img in test_images:
            src = os.path.join(class_dir, img)
            dst = os.path.join(output_dir, 'test', class_name, img)
            shutil.copyfile(src, dst)

    print(f"Dataset divided into training and test sets. Saved in {output_dir}")


data_dir ='/content/drive/MyDrive/CNN-CNR/Datasets/final-200'
output_dir = '/content/drive/MyDrive/CNN-CNR/Datasets/final-200-train-test'

split_dataset_train_test(data_dir, output_dir)


Dataset divided into training and test sets. Saved in /content/drive/MyDrive/CNN-CNR/Datasets/final-200-train-test


# Statistics

In [1]:
import os
import plotly.graph_objects as go
from torchvision import datasets

def plot_classes(dir):
    dataset = datasets.ImageFolder(dir)

    class_names = dataset.classes
    class_to_idx = dataset.class_to_idx

    class_counts = {class_name: 0 for class_name in class_names}

    for _, label in dataset.samples:
        class_name = class_names[label]
        class_counts[class_name] += 1

    total_images = sum(class_counts.values())
    total_classes = len(class_names)

    print(f"Number of images: {total_images}")
    print(f"Number of classes: {total_classes}")

    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=list(class_counts.keys()),
        y=list(class_counts.values()),
        marker_color='skyblue',
        hovertext=list(class_counts.keys()),
        hoverinfo="text+y"
    ))


    fig.update_layout(
        title='Number of Images per Class in Set',
        xaxis_title='Class Names',
        yaxis_title='Number of Images',
        xaxis_tickangle=-45,
        height=600,
        margin=dict(l=50, r=50, t=80, b=150),
    )

    fig.show()




In [None]:
plot_classes('/content/drive/MyDrive/CNN-CNR/Datasets/ORIGINAL/original')

Number of images: 5854
Number of classes: 70


In [5]:
plot_classes('/content/drive/MyDrive/CNN-CNR/backup/final-15-backup')

Number of images: 2216
Number of classes: 25


In [None]:
plot_classes('/content/drive/MyDrive/CNN-CNR/Datasets/ORIGINAL/original-augmented')

Number of images: 6293
Number of classes: 70


In [19]:
plot_classes('/content/drive/MyDrive/CNN-CNR/Datasets/final-200-train-test')

Number of images: 5000
Number of classes: 2


In [None]:
plot_classes('/content/drive/MyDrive/CNN-CNR/Datasets/final-150-train-test-v2/train')

Number of images: 3000
Number of classes: 25


In [18]:
plot_classes('/content/drive/MyDrive/CNN-CNR/Datasets/final-200-train-test/train')

Number of images: 11200
Number of classes: 70


## Class Distribution


In [None]:
import os

def get_class_distribution(dataset_path, reverse=False, output_file=None):
    class_distribution = []

    for class_name in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_name)

        if os.path.isdir(class_path):
            num_elements = len([f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))])
            class_distribution.append((class_name, num_elements))

    sorted_distribution = sorted(class_distribution, key=lambda x: x[1], reverse=reverse)

    # Save to file if output_file path is provided
    if output_file:
        with open(output_file, 'w') as f:
            for class_name, num_elements in sorted_distribution:
                f.write(f"{class_name}: {num_elements}\n")

    return sorted_distribution

dataset_path ='/content/drive/MyDrive/CNN-CNR/backup/min-15-copied'
output_file = "/content/drive/MyDrive/CNN-CNR/Utili/distribution-original-dataset-min-15.txt"
#distribution = get_class_distribution(dataset_path, reverse=True, output_file=output_file)

for class_name, count in distribution:
    print(f"Class: {class_name}, Number of images: {count}")
