In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from google.colab import drive

In [None]:
# mounting Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# defining our paths
input_directory = "/content/drive/MyDrive/new_data"
output_directory = "/content/drive/MyDrive/processed_new_data"

In [None]:
# Create the output directory
os.makedirs(output_directory, exist_ok=True)

In [None]:
def remove_background(image_path, threshold_value=240):
    """
    Removes the background from an image based on a threshold value.
    Assumes the background is plain and light (e.g., white or very light).
    """
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not read {image_path}")
        return None

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, threshold_value, 255, cv2.THRESH_BINARY_INV)
    result = cv2.bitwise_and(img, img, mask=thresh)
    return result

def resize_and_convert_to_grayscale(image):
    """
    Resizes the image to 28x28 and converts it to grayscale.
    """
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    resized_image = cv2.resize(gray_image, (28, 28), interpolation=cv2.INTER_AREA)
    return resized_image

def save_image(image, output_path):
    """
    Saves the processed image to the specified path.
    """
    pil_image = Image.fromarray(image)
    pil_image.save(output_path)

def process_sketch(image_path, output_path):
    """
    Processes the sketch by removing background, resizing, and converting to grayscale.
    """
    no_background_image = remove_background(image_path)
    if no_background_image is None:
        return False
    final_image = resize_and_convert_to_grayscale(no_background_image)
    save_image(final_image, output_path)
    return True


In [None]:
# Get all categories (subfolders)
categories = [d for d in os.listdir(input_directory) if os.path.isdir(os.path.join(input_directory, d))]

In [None]:
# Process images for each category (No splitting at this point)
for category in categories:
    category_path = os.path.join(input_directory, category)
    output_category_path = os.path.join(output_directory, category)

    # Create category folder in output directory
    os.makedirs(output_category_path, exist_ok=True)

    # Process and save images
    images = [f for f in os.listdir(category_path) if f.endswith(('.jpg', '.png', '.jpeg'))]
    for file in images:
        input_path = os.path.join(category_path, file)
        output_path = os.path.join(output_category_path, file)
        if process_sketch(input_path, output_path):
            print(f"Processed and saved: {output_path}")

print("Data processing completed. Images are organized by category.")