# Data Preprocessing Notebook
This notebook performs dataset loading, preprocessing, and sample visualization for the Sign Language Detection project.

In [None]:

import cv2
import os
import numpy as np
import matplotlib.pyplot as plt

raw_path = "dataset"
processed_path = "processed_dataset"
os.makedirs(processed_path, exist_ok=True)

IMG_SIZE = 64
samples = []

for label in os.listdir(raw_path):
    label_path = os.path.join(raw_path, label)
    if not os.path.isdir(label_path): continue
    os.makedirs(os.path.join(processed_path, label), exist_ok=True)

    for img_name in os.listdir(label_path):
        img_path = os.path.join(label_path, img_name)
        img = cv2.imread(img_path)

        if img is None:
            continue

        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        blur = cv2.GaussianBlur(gray, (5,5), 0)
        _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
        resized = cv2.resize(thresh, (IMG_SIZE, IMG_SIZE))

        save_path = os.path.join(processed_path, label, img_name)
        cv2.imwrite(save_path, resized)

        samples.append((img, resized))

print("Total processed images:", len(samples))


In [None]:

# Show before/after sample
if samples:
    orig, proc = samples[0]
    plt.figure(figsize=(6,3))
    plt.subplot(1,2,1); plt.title("Original"); plt.imshow(cv2.cvtColor(orig, cv2.COLOR_BGR2RGB)); plt.axis("off")
    plt.subplot(1,2,2); plt.title("Processed"); plt.imshow(proc, cmap="gray"); plt.axis("off")
    plt.show()
else:
    print("No samples found.")
