#/kaggle/input/dataset-of-30-words/30words

In [1]:
!pip install patool
!pip install dask[dataframe]

Collecting patool
  Downloading patool-3.1.0-py2.py3-none-any.whl.metadata (4.3 kB)
Downloading patool-3.1.0-py2.py3-none-any.whl (98 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.4/98.4 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: patool
Successfully installed patool-3.1.0


In [2]:
import os
import cv2
import patoolib

In [3]:
import cv2
import os
import numpy as np

# Paths and Parameters
dataset_path = "/kaggle/input/dataset30words/40words"
processed_path = "/kaggle/output/Processed-dataset-of-30-words"  # Output folder for processed images
IMG_SIZE = 128  # New image size
os.makedirs(processed_path, exist_ok=True)  # Ensure the output directory exists

# Define augmentation functions
def apply_augmentations(image):
    augmented_images = []

    # Original resized grayscale image
    augmented_images.append(image)

    # Slight blur
    blurred = cv2.GaussianBlur(image, (5, 5), 0)
    augmented_images.append(blurred)

    # Adjust brightness (increase and decrease)
    bright_high = cv2.convertScaleAbs(image, alpha=1.2, beta=30)  # Brighter
    bright_low = cv2.convertScaleAbs(image, alpha=0.8, beta=-30)  # Darker
    augmented_images.append(bright_high)
    augmented_images.append(bright_low)

    return augmented_images

# Process and save images
for class_name in sorted(os.listdir(dataset_path)):
    class_input_path = os.path.join(dataset_path, class_name)
    class_output_path = os.path.join(processed_path, class_name)
    os.makedirs(class_output_path, exist_ok=True)  # Create folder for each class

    for img_name in os.listdir(class_input_path):  # Iterate through images
        img_input_path = os.path.join(class_input_path, img_name)

        try:
            # Read image
            img = cv2.imread(img_input_path)

            # Convert to grayscale
            img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

            # Resize to 128x128
            img_resized = cv2.resize(img_gray, (IMG_SIZE, IMG_SIZE))

            # Apply augmentations
            augmented_images = apply_augmentations(img_resized)

            # Save augmented images
            for i, augmented_img in enumerate(augmented_images):
                new_img_name = f"{os.path.splitext(img_name)[0]}_aug_{i}.jpg"
                img_output_path = os.path.join(class_output_path, new_img_name)
                cv2.imwrite(img_output_path, augmented_img)
                print(f"Saved: {img_output_path}")

        except Exception as e:
            print(f"Error processing {img_input_path}: {e}")

print("Processing and augmentation complete. All images saved to the 'Processed_images' folder.")


Saved: /kaggle/output/Processed-dataset-of-30-words/Aaj/t (1)_aug_0.jpg
Saved: /kaggle/output/Processed-dataset-of-30-words/Aaj/t (1)_aug_1.jpg
Saved: /kaggle/output/Processed-dataset-of-30-words/Aaj/t (1)_aug_2.jpg
Saved: /kaggle/output/Processed-dataset-of-30-words/Aaj/t (1)_aug_3.jpg
Saved: /kaggle/output/Processed-dataset-of-30-words/Aaj/5_aug_0.jpg
Saved: /kaggle/output/Processed-dataset-of-30-words/Aaj/5_aug_1.jpg
Saved: /kaggle/output/Processed-dataset-of-30-words/Aaj/5_aug_2.jpg
Saved: /kaggle/output/Processed-dataset-of-30-words/Aaj/5_aug_3.jpg
Saved: /kaggle/output/Processed-dataset-of-30-words/Aaj/tar (1)_aug_0.jpg
Saved: /kaggle/output/Processed-dataset-of-30-words/Aaj/tar (1)_aug_1.jpg
Saved: /kaggle/output/Processed-dataset-of-30-words/Aaj/tar (1)_aug_2.jpg
Saved: /kaggle/output/Processed-dataset-of-30-words/Aaj/tar (1)_aug_3.jpg
Saved: /kaggle/output/Processed-dataset-of-30-words/Aaj/m (3)_aug_0.jpg
Saved: /kaggle/output/Processed-dataset-of-30-words/Aaj/m (3)_aug_1.jpg


In [4]:
patoolib.create_archive("Dataset_final30words.zip", ("/kaggle/output/Processed-dataset-of-30-words",))

INFO patool: Creating Dataset_final30words.zip ...
INFO patool: running /usr/bin/7z a -tzip -- Dataset_final30words.zip /kaggle/output/Processed-dataset-of-30-words
INFO patool: ... Dataset_final30words.zip created.
