In [None]:
import os

# Define the dataset folder
train_set_folder = 'own_dataset'
output_folder = 'preprocessed'
output_folder_2 = 'augmented'
def create_directory_if_not_exists(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

# Ensure output folders exist
create_directory_if_not_exists(output_folder)
create_directory_if_not_exists(output_folder_2)

In [None]:
import os
import cv2
import numpy as np
from PIL import Image
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import DepthwiseConv2D
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from kerastuner.tuners import RandomSearch

# Custom DepthwiseConv2D layer
class CustomDepthwiseConv2D(DepthwiseConv2D):
    def __init__(self, *args, **kwargs):
        if 'groups' in kwargs:
            kwargs.pop('groups')
        super().__init__(*args, **kwargs)

# Check if image is valid
def check_image_integrity(img_path):
    try:
        img = Image.open(img_path)
        img.verify()  # Verify that it is an image
        return True
    except (IOError, SyntaxError) as e:
        print(f"Corrupted image: {img_path} - {e}")
        return False

# Image preprocessing function
def preprocess_image(img):
    img_size = (300, 300)  # Resize images to 300x300 pixels
    img_resized = cv2.resize(img, img_size)
    gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)
    eq_img = cv2.equalizeHist(gray)
    return eq_img

# Function to process and save images
def preprocess_and_save(input_folder, output_folder):
    create_directory_if_not_exists(output_folder)
    
    for label in os.listdir(input_folder):
        label_path = os.path.join(input_folder, label)
        print("here2", label_path)

        
        if os.path.isdir(label_path):
            output_label_path = os.path.join(output_folder, label)
            create_directory_if_not_exists(output_label_path)
            
            for img_name in os.listdir(label_path):
                img_path = os.path.join(label_path, img_name)
                if os.path.isfile(img_path) and check_image_integrity(img_path):
                    img = cv2.imread(img_path)
                    if img is not None:
                        processed_img = preprocess_image(img)
                        save_path = os.path.join(output_label_path, img_name)
                        cv2.imwrite(save_path, processed_img)
                    else:
                        print(f"Failed to load image: {img_path}")

# Run preprocessing
preprocess_and_save(train_set_folder, output_folder)
print("Preprocessing completed. Images saved to:", output_folder)



In [None]:
# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

def augment_and_save(input_folder, output_folder_2, augment_count=5):
    create_directory_if_not_exists(output_folder_2)
    
    for label in os.listdir(input_folder):
        label_path = os.path.join(input_folder, label)
        
        if os.path.isdir(label_path):
            output_label_path = os.path.join(output_folder_2, label)
            create_directory_if_not_exists(output_label_path)
            
            for img_name in os.listdir(label_path):
                img_path = os.path.join(label_path, img_name)
                if os.path.isfile(img_path):
                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                    if img is not None:
                        img = cv2.resize(img, (300, 300))  # Resize to 300x300
                        img = img.reshape((1,) + img.shape + (1,))
                        
                        for i, batch in enumerate(datagen.flow(img, batch_size=1)):
                            augmented_img = batch[0].astype('uint8')
                            save_path = os.path.join(output_label_path, f"{os.path.splitext(img_name)[0]}_aug_{i}.jpg")
                            cv2.imwrite(save_path, augmented_img)
                            
                            if i >= augment_count - 1:
                                break
                    else:
                        print(f"Failed to load image: {img_path}")

# Run augmentation
augment_and_save(output_folder, output_folder_2, augment_count=5)
print("Data augmentation completed. Augmented images saved to:", output_folder_2)



In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from kerastuner.tuners import RandomSearch

# Function to load image paths and labels
def load_images_and_labels(dataset_folder):
    X, y = [], []
    for label in os.listdir(dataset_folder):
        label_path = os.path.join(dataset_folder, label)
        if os.path.isdir(label_path):
            for img_name in os.listdir(label_path):
                img_path = os.path.join(label_path, img_name)
                if os.path.isfile(img_path):
                    X.append(img_path)
                    y.append(label)
                else:
                    print(f"Skipping non-file: {img_path}")
        else:
            print(f"Skipping non-directory: {label_path}")
    print(f"Loaded {len(X)} image paths and {len(y)} labels.")
    return np.array(X), np.array(y)

# Encode labels
label_encoder = LabelEncoder()

# Generator function to load images in batches
def image_generator(X, y, batch_size=32):
    num_samples = len(X)
    while True:
        for offset in range(0, num_samples, batch_size):
            batch_paths = X[offset:offset+batch_size]
            batch_labels = y[offset:offset+batch_size]
            
            batch_images = []
            for img_path in batch_paths:
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    img = cv2.resize(img, (300, 300))
                    img = img / 255.0  # Normalize
                    batch_images.append(img)
                else:
                    print(f"Failed to load image: {img_path}")
            
            batch_images = np.array(batch_images).reshape(-1, 300, 300, 1)
            yield batch_images, batch_labels

# Load image paths and labels
X, y = load_images_and_labels(output_folder_2)

# Check if data was loaded correctly
if len(X) == 0 or len(y) == 0:
    raise ValueError("No images were loaded. Please check the dataset path and structure.")

# Encode all labels
y_encoded = label_encoder.fit_transform(y)

# Split data into training (70%), validation (15%), and test (15%) sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y_encoded, test_size=0.3, stratify=y_encoded, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

print("Data splitting completed.")
print(f"Training set: {len(X_train)} samples")
print(f"Validation set: {len(X_val)} samples")
print(f"Test set: {len(X_test)} samples")

# Create generators
train_gen = image_generator(X_train, y_train)
val_gen = image_generator(X_val, y_val)
test_gen = image_generator(X_test, y_test)



In [None]:
import numpy as np
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from kerastuner.tuners import RandomSearch

# Define the model building function for Keras Tuner
def build_model(hp):
    model = models.Sequential()
    model.add(layers.Conv2D(filters=hp.Int('conv_1_filter', min_value=32, max_value=128, step=32),
                            kernel_size=hp.Choice('conv_1_kernel', values=[3, 5]),
                            activation='relu',
                            input_shape=(300, 300, 1)))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.3))
    
    model.add(layers.Conv2D(filters=hp.Int('conv_2_filter', min_value=64, max_value=256, step=64),
                            kernel_size=hp.Choice('conv_2_kernel', values=[3, 5]),
                            activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.3))
    
    model.add(layers.Conv2D(filters=hp.Int('conv_3_filter', min_value=128, max_value=512, step=128),
                            kernel_size=hp.Choice('conv_3_kernel', values=[3, 5]),
                            activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.4))
    
    model.add(layers.Flatten())
    
    model.add(layers.Dense(units=hp.Int('dense_1_units', min_value=128, max_value=512, step=32),
                           activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.5))
    
    model.add(layers.Dense(27, activation='softmax'))  # 27 classes: 26 alphabets + space
    
    model.compile(optimizer=optimizers.Adam(learning_rate=hp.Float('lr', min_value=1e-4, max_value=1e-2, sampling='LOG')),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Set up the tuner
tuner = RandomSearch(build_model,
                     objective='val_accuracy',
                     max_trials=10,
                     executions_per_trial=2,
                     directory='output_dir',
                     project_name='sign_language_model')

# Run the tuner search
tuner.search(train_gen,
             steps_per_epoch=len(X_train) // 32,  # Changed back to 32 to match the generator
             epochs=50,
             validation_data=val_gen,
             validation_steps=len(X_val) // 32,  # Changed back to 32
             callbacks=[EarlyStopping(patience=5), ReduceLROnPlateau()])

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Evaluate on the test set
test_loss, test_acc = best_model.evaluate(test_gen, steps=len(X_test) // 32)
print(f"Test accuracy: {test_acc * 100:.2f}%")

# Save the model
best_model.save('sign_language_model.h5')
print("Model training and evaluation completed. Model saved.")

# Save the label encoder classes
np.save('classes.npy', label_encoder.classes_)