<a href="https://colab.research.google.com/github/megmarv/PsychoAI-/blob/Emotion-Identification2/preprocessing_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm  # Progress bar

# Define paths
data_dir = "/content/drive/MyDrive/CNN model/new_dataset_for_custom_model"
output_dir = "/content/drive/MyDrive/CNN model/preprocessed_dataset"
emotions = ["anger", "happy", "sad", "neutral", "surprise", "fear", "disgust"]
target_size = (224, 224)  # Match ResNet50V2 input size

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
for emotion in emotions:
    os.makedirs(os.path.join(output_dir, emotion), exist_ok=True)

# Augmentation settings
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2,
    brightness_range=[0.8, 1.2]
)

# Step 1: Count images per class
class_counts = {emotion: len(os.listdir(os.path.join(data_dir, emotion))) for emotion in emotions}
max_count = max(class_counts.values())  # Find the largest class count
total_images = sum(class_counts.values())  # Total number of images before balancing

# Step 2: Preprocess & Augment for balancing
def preprocess_and_augment(image_path, target_size, save_dir, image_name, augment_needed):
    """Preprocesses an image and applies augmentation if needed."""
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB
    image = cv2.resize(image, target_size)  # Resize to 224x224
    normalized_image = image / 255.0  # Normalize to [0,1]

    # Save original preprocessed image
    save_path = os.path.join(save_dir, image_name)
    cv2.imwrite(save_path, (normalized_image * 255).astype(np.uint8))

    # Apply augmentation if needed
    if augment_needed > 0:
        image_expanded = np.expand_dims(normalized_image, axis=0)
        augmented_images = datagen.flow(image_expanded, batch_size=1)

        for i in range(augment_needed):
            aug_image = next(augmented_images)[0]  # Get the augmented image
            aug_image = (aug_image * 255).astype(np.uint8)  # Convert back to 0-255 range
            aug_save_path = os.path.join(save_dir, f"aug_{i}_{image_name}")
            cv2.imwrite(aug_save_path, aug_image)

# Step 3: Process dataset with balancing and visualize progress
print("Starting preprocessing and augmentation...")

total_processed = 0
with tqdm(total=total_images, desc="Processing Images", unit="img") as pbar:
    for emotion in emotions:
        emotion_dir = os.path.join(data_dir, emotion)
        output_emotion_dir = os.path.join(output_dir, emotion)

        image_files = os.listdir(emotion_dir)
        augment_needed_per_image = max(0, max_count - len(image_files)) // len(image_files)  # Augmentation per image

        for image_name in image_files:
            image_path = os.path.join(emotion_dir, image_name)
            preprocess_and_augment(image_path, target_size, output_emotion_dir, image_name, augment_needed_per_image)

            total_processed += 1
            pbar.update(1)  # Update progress bar

print("✅ Preprocessing & augmentation complete! Balanced dataset saved in", output_dir)

Starting preprocessing and augmentation...


Processing Images: 100%|██████████| 26171/26171 [17:15<00:00, 25.28img/s]

✅ Preprocessing & augmentation complete! Balanced dataset saved in /content/drive/MyDrive/CNN model/preprocessed_dataset



