In [1]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from libs.location import *
import shutil

dataset_dir = CLASSIFIED_TRAINING_IMG_JPG_LOCATION_PATH
output_dir = BALANCED_TRAINING_IMG_JPG_LOCATION_PATH

target_class_balance = 1.0
target_num_samples = {}


2023-09-28 13:21:36.013449: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
for class_name in os.listdir(dataset_dir):
    class_dir = os.path.join(dataset_dir, class_name)
    num_samples = len(os.listdir(class_dir))
    target_num_samples[class_name] = num_samples

# Find the class with fewer samples (minority class)
majority_class = max(target_num_samples, key=target_num_samples.get)
minority_class = min(target_num_samples, key=target_num_samples.get)

no_of_majority_samples = target_num_samples[majority_class]

class_name = minority_class
class_dir = os.path.join(dataset_dir, class_name)
num_original_samples = len(os.listdir(class_dir))
num_samples_needed = no_of_majority_samples

datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest",
    brightness_range=[-1, 1],
)
output_class_dir = os.path.join(output_dir, class_name)
os.makedirs(output_class_dir, exist_ok=True)

In [19]:
print(f"Augmenting images for class: {class_name}")
class_datagen = datagen.flow_from_directory(
    dataset_dir,
    target_size=(224, 224),
    batch_size=32,
    save_to_dir=None,
    save_format="jpg",
    save_prefix=f"{class_name}_augmented",
    classes=[class_name],  # Specify the class name
)


Augmenting images for class: class-1
Found 6012 images belonging to 1 classes.


In [20]:
num_samples_needed

20672

In [21]:
num_samples_needed // 32

646

In [12]:
class_datagen.next()[1].shape

(32, 1)

In [22]:
batch = class_datagen.next()

In [28]:
batch[0].shape,batch[1].shape

((32, 224, 224, 3), (32, 1))

In [31]:
class_datagen.filenames[0]

'class-1/class-1_000db696-cf54-4385-b10b-6b16fbb3f985.jpg'

In [None]:
for _ in range(num_samples_needed // 32):
    batch = class_datagen.next()
    for j, image_array in enumerate(batch):
        original_name = class_datagen.filenames[j].split(os.path.sep)[-1]
        image_path = os.path.join(class_dir, original_name)
        new_image_path = os.path.join(output_class_dir, original_name)
        shutil.copy(image_path, new_image_path)
