In [14]:
from torchvision import transforms
from PIL import Image
import os
import torchvision.transforms.functional as TF
import random
import shutil
from sklearn.model_selection import train_test_split
from importlib import reload
import utility
reload(utility)
from utility import load_data, plot_confusion_matrix, plot_average_f1_scores, train_model, get_classification_details, get_hard_disk_path, show_samples, plot_features_importance, visualize_correlation, get_shap

# Define your custom transform
def augment_image(image, augmentation_count):
    """
    Apply all combinations of flips and 90-degree rotations to the image.
    Returns a list of augmented images.
    """
    augmented_images = []
    for flip_h in [True, False]:
        for flip_v in [True, False]:
            for rotation in [0, 90, 180, 270]:
                aug_image = image
                if flip_h:
                    aug_image = TF.hflip(aug_image)
                if flip_v:
                    aug_image = TF.vflip(aug_image)
                aug_image = TF.rotate(aug_image, rotation)
                augmented_images.append(aug_image)
                if len(augmented_images) >= augmentation_count:
                    return augmented_images
    return augmented_images


def split_and_augment(input_data_dir, class_dir, train_dir, test_dir, augment_factor=5, test_size=0.2):
    # Get all image filenames
    image_files = [f for f in sorted(os.listdir(input_data_dir)) if f.endswith('.jpg')]
    train_files, test_files = train_test_split(image_files, test_size=test_size, random_state=42)

    # Copy test files to the test directory instead of moving
    for f in test_files:
        shutil.copy(os.path.join(input_data_dir, f), os.path.join(test_dir, f))

    # Augment train files and save copies
    for f in train_files:
        img_path = os.path.join(input_data_dir, f)
        image = Image.open(img_path)
        shutil.copy(img_path, os.path.join(train_dir, f))  # Also just copy original image

        # Generate and save augmented images
        augmented_images = augment_image(image, augment_factor)
        for i, aug_image in enumerate(augmented_images):
            augmented_image_path = os.path.join(train_dir, f"{i}_{f}")
            aug_image.save(augmented_image_path)

# Example usage:

data_dir = os.path.join(get_hard_disk_path("DL"), "z_projection")
output_dir = "D:/data_for_DL_augmented/"
classes = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]
print(classes)

for class_name in classes:
    os.makedirs(os.path.join(output_dir, class_name))
    class_dir = os.path.join(output_dir, class_name)
    train_class_dir = os.path.join(class_dir, 'train')
    test_class_dir = os.path.join(class_dir, 'test')
    input_data_dir = os.path.join(data_dir, class_name)

    # Create directories for the class inside train and test folders
    os.makedirs(train_class_dir, exist_ok=True)
    os.makedirs(test_class_dir, exist_ok=True)

    split_and_augment(input_data_dir, class_dir, train_class_dir, test_class_dir)

Successfully loaded data from D:/data_for_DL/
['keep0', 'keep1', 'keep2', 'reseed0', 'reseed1', 'split', 'dead', 'empty']


In [17]:
import os
from PIL import Image
import hashlib

def file_hash(filepath):
    with open(filepath, 'rb') as f:
        return hashlib.md5(f.read()).hexdigest()

def all_images_unique(directory):
    hashes = set()
    for filename in os.listdir(directory):
        if filename.endswith('.jpg'):  # Assuming all images are in JPEG format
            file_path = os.path.join(directory, filename)
            image_hash = file_hash(file_path)
            if image_hash in hashes:
                print(f"Duplicate found: {filename}")
                return False  # Return False if a duplicate is found
            hashes.add(image_hash)
    return True  # Return True if all images are unique

# Usage
dir_path = 'D:/data_for_DL_augmented/reseed0/train/'
if all_images_unique(dir_path):
    print("All images in the folder are unique.")
else:
    print("There are duplicate images in the folder.")

All images in the folder are unique.
