In [None]:
import os
import cv2
import numpy as np
from imblearn.over_sampling import SMOTE

# Original train directory with subfolders (one per class)
TRAIN_DIR = '../../data/train'

# Directory where we'll put a combined dataset:
#   - original images
#   - plus newly generated SMOTE images
TRAIN_COMBINED_DIR = '../../data/train_combined'

IMG_SIZE = (224, 224)

def ensure_dir_exists(path):
    if not os.path.exists(path):
        os.makedirs(path)

def offline_smote_images_per_class():
    """
    For each class in TRAIN_DIR, load all images into memory (for that class),
    apply SMOTE, and write new images to TRAIN_COMBINED_DIR alongside the originals.
    """
    classes = sorted(os.listdir(TRAIN_DIR))
    print("Classes found:", classes)

    for cls in classes:
        class_path = os.path.join(TRAIN_DIR, cls)
        if not os.path.isdir(class_path):
            continue
        
        # Destination directory for combined data
        combined_class_path = os.path.join(TRAIN_COMBINED_DIR, cls)
        ensure_dir_exists(combined_class_path)

        # ---------------------------------------------------
        # 1) Load images for this class
        # ---------------------------------------------------
        print(f"\n[CLASS: {cls}] Loading images...")
        X_list = []
        img_paths = []
        for img_name in os.listdir(class_path):
            img_full_path = os.path.join(class_path, img_name)
            img = cv2.imread(img_full_path)
            if img is None:
                continue
            img = cv2.resize(img, IMG_SIZE)  # (224,224,3)
            X_list.append(img.reshape(-1))  # flatten to (224*224*3,)
            img_paths.append(img_full_path)

        X_arr = np.array(X_list, dtype=np.uint8)
        n_original = len(X_arr)
        if n_original == 0:
            print(f"No images found for class {cls}, skipping.")
            continue
        
        # Labels are all the same class here, so just use zeros
        y_arr = np.zeros(n_original, dtype=np.uint8)

        # ---------------------------------------------------
        # 2) Copy original images to combined folder
        #    (so the new directory has both original + SMOTE)
        # ---------------------------------------------------
        print(f"Copying original images to {combined_class_path}...")
        for img_path in img_paths:
            base_name = os.path.basename(img_path)
            cv2.imwrite(os.path.join(combined_class_path, base_name), cv2.imread(img_path))

        # ---------------------------------------------------
        # 3) Apply SMOTE on this single class
        #    This will generate more data for the minority class.
        # ---------------------------------------------------
        if n_original < 2:
            print(f"Class {cls} has <2 images, skipping SMOTE.")
            continue

        print(f"Applying SMOTE to class '{cls}' with {n_original} images...")
        smote = SMOTE(random_state=42, k_neighbors=1)  
        # k_neighbors=1 can help if a class is extremely small, but can produce duplicates.

        X_sm, y_sm = smote.fit_resample(X_arr, y_arr)
        # X_sm now includes both original + new synthetic data

        new_count = len(X_sm)
        generated_count = new_count - n_original
        if generated_count <= 0:
            print(f"No new images were generated for class {cls}.")
            continue

        print(f"Generated {generated_count} new images for class {cls}.")

        # ---------------------------------------------------
        # 4) Write out ONLY the newly generated images
        #    i.e. from index [n_original ... new_count-1]
        # ---------------------------------------------------
        for idx in range(n_original, new_count):
            # Convert back to shape (224,224,3)
            sm_img = X_sm[idx].reshape(IMG_SIZE[1], IMG_SIZE[0], 3)
            # Must ensure data type is valid for saving
            sm_img = sm_img.astype(np.uint8)

            out_filename = f"smote_{cls}_{idx}.jpg"
            out_path = os.path.join(combined_class_path, out_filename)
            cv2.imwrite(out_path, sm_img)

    print("\nAll classes processed. New SMOTE images are in:", TRAIN_COMBINED_DIR)

# ---------------------------------------------------
# Run the offline function
# ---------------------------------------------------
if __name__ == "__main__":
    offline_smote_images_per_class()
