In [14]:
import numpy as np
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import collections

In [1]:
# Load all original images

import pickle
with open('../images_data.pkl', 'rb') as f:
    images_data = pickle.load(f)

In [17]:
# Create numpy array of the images and base directory

X = []
Y = []

for image, label in images_data:
    X.append(np.array(image))
    Y.append(label)

os.makedirs("../archive_balanced", exist_ok=True)

In [22]:
# Get all unique categories
unique_categories = set(Y) # names of the categories

# Create a mapping from category name to integer label
category_to_label = {category: label for label, category in enumerate(unique_categories)}

# Count the number of images in each category
category_counts = collections.Counter(Y)

categories = category_to_label.keys()  # Lista de categorías

# Print the mapping of category names to integer labels
print("Category to Label Mapping:")
for category, label in category_to_label.items():
    count = category_counts[category]
    print(f"{category} --> {label} --> {count} images")

Category to Label Mapping:
Loose Silky-bent --> 0 --> 762 images
Cleavers --> 1 --> 335 images
Black-grass --> 2 --> 309 images
Scentless Mayweed --> 3 --> 607 images
Maize --> 4 --> 257 images
Charlock --> 5 --> 452 images
Sugar beet --> 6 --> 463 images
Fat Hen --> 7 --> 538 images
Small-flowered Cranesbill --> 8 --> 576 images
Common wheat --> 9 --> 253 images
Common Chickweed --> 10 --> 713 images
Shepherd Purse --> 11 --> 274 images


In [24]:
# Create a list to store the integer labels corresponding to each category
Y = [category_to_label[category] for category in Y]
X = np.array(X)
Y = np.array(Y)

  X = np.array(X)


In [25]:
# Balance classes by supplementing them with rotated images, zoomed images, etc., of the same class in case they are missing.

# Ensure class labels in Y are encoded as integers
class_names = np.unique(Y)
label_to_int = {label: i for i, label in enumerate(class_names)}
Y_encoded = np.array([label_to_int[label] for label in Y])

# Define the data augmentation parameters
data_augmentation = ImageDataGenerator(
    rotation_range=30,        # Random rotation within +/- 30 degrees
    zoom_range=0.25,          # Maximum 25% zoom-in or zoom-out
    fill_mode='nearest',      # Fill newly created pixels with the nearest existing pixel
    horizontal_flip=True      # Random horizontal flipping
)

# Create a dictionary to keep track of the class indices
class_indices = {class_label: np.where(Y_encoded == label_to_int[class_label])[0] for class_label in class_names}

# Calculate the number of samples needed for each class to balance the dataset
max_samples = max(len(class_indices[class_label]) for class_label in class_names)
num_classes = len(class_indices)
samples_per_class = {class_label: max_samples - len(class_indices[class_label]) for class_label in class_names}

# Generate augmented samples for each underrepresented class
X_balanced = []
Y_balanced = []

for class_label, num_samples in samples_per_class.items():
    indices = class_indices[class_label]
    np.random.shuffle(indices)

    for i in range(num_samples):
        original_image = X[indices[i % len(indices)]]
        # Apply data augmentation to create new images
        augmented_image = data_augmentation.random_transform(original_image)
        X_balanced.append(augmented_image)
        Y_balanced.append(class_label)

# Concatenate the original data with the new balanced data
X = np.concatenate([X, np.array(X_balanced)])
Y = np.concatenate([Y, np.array(Y_balanced)])

# Now X_balanced and Y_balanced contain the balanced dataset with augmented images for underrepresented classes

  X = np.concatenate([X, np.array(X_balanced)])


In [27]:
# Count the number of images in each category
category_counts = collections.Counter(Y)

# Print the mapping of category names to integer labels
print("Category to Label Mapping:")
for category, label in category_to_label.items():
    count = category_counts[category]
    print(f"{category} --> {label} --> {count} images")

Category to Label Mapping:
Loose Silky-bent --> 0 --> 0 images
Cleavers --> 1 --> 0 images
Black-grass --> 2 --> 0 images
Scentless Mayweed --> 3 --> 0 images
Maize --> 4 --> 0 images
Charlock --> 5 --> 0 images
Sugar beet --> 6 --> 0 images
Fat Hen --> 7 --> 0 images
Small-flowered Cranesbill --> 8 --> 0 images
Common wheat --> 9 --> 0 images
Common Chickweed --> 10 --> 0 images
Shepherd Purse --> 11 --> 0 images


In [28]:
print(category_counts)

Counter({2: 762, 5: 762, 1: 762, 10: 762, 9: 762, 7: 762, 0: 762, 4: 762, 3: 762, 11: 762, 8: 762, 6: 762})


In [29]:
# Create a list to store the integer labels corresponding to each category
#Y_labels = [category_to_label[category] for category in Y]

from PIL import Image

for category in categories:
    dir = os.path.join("../archive_balanced", category)
    os.makedirs(dir, exist_ok=True)
    label = category_to_label[category]
    category_images = X[Y == label]
    print("Working on", category)
    # Contador para nombrar las imágenes
    counter = 1
    for image in category_images:
        # Transform image to image_array
        image_array = np.array(image)
        # Save image_array as an image
        dest_path = os.path.join("../archive_balanced", category, f"image{counter}.jpg")
        image_to_save = Image.fromarray(image_array)
        image_to_save.save(dest_path)
        counter += 1

Category to Label Mapping:
Loose Silky-bent --> 0 --> 0 images
Cleavers --> 1 --> 0 images
Black-grass --> 2 --> 0 images
Scentless Mayweed --> 3 --> 0 images
Maize --> 4 --> 0 images
Charlock --> 5 --> 0 images
Sugar beet --> 6 --> 0 images
Fat Hen --> 7 --> 0 images
Small-flowered Cranesbill --> 8 --> 0 images
Common wheat --> 9 --> 0 images
Common Chickweed --> 10 --> 0 images
Shepherd Purse --> 11 --> 0 images
Working on Loose Silky-bent
Working on Cleavers
Working on Black-grass
Working on Scentless Mayweed
Working on Maize
Working on Charlock
Working on Sugar beet
Working on Fat Hen
Working on Small-flowered Cranesbill
Working on Common wheat
Working on Common Chickweed
Working on Shepherd Purse
