In [1]:
#extracting the characters from the each cell and saving it in each folders

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os

def preprocess_sheet(image_path):
    # Load the image
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur to smooth the image and reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Adaptive thresholding to binarize the image
    binary = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 8)
    
    # Invert the binary image to make characters black and background white
    binary = cv2.bitwise_not(binary)
    
    return binary

def extract_characters(binary_image, rows, cols, crop_percent=0.1):
    # Get the dimensions of the binary image
    height, width = binary_image.shape
    
    # Calculate the width and height of each cell in pixels
    cell_width_pixels = width // cols
    cell_height_pixels = height // rows
    
    # Calculate the number of pixels to crop from each side
    crop_pixels_x = int(cell_width_pixels * crop_percent)
    crop_pixels_y = int(cell_height_pixels * crop_percent)
    
    # Create a list to hold extracted characters
    extracted_characters = []
    
    # Loop over each cell in the grid
    for row in range(rows):
        for col in range(cols):
            # Calculate the starting and ending coordinates of the cell
            x_start = col * cell_width_pixels + crop_pixels_x
            y_start = row * cell_height_pixels + crop_pixels_y
            x_end = (col + 1) * cell_width_pixels - crop_pixels_x
            y_end = (row + 1) * cell_height_pixels - crop_pixels_y
            
            # Ensure the coordinates are within image bounds
            x_start = max(x_start, 0)
            y_start = max(y_start, 0)
            x_end = min(x_end, width)
            y_end = min(y_end, height)
            
            # Extract the cell from the binary image
            cell = binary_image[y_start:y_end, x_start:x_end]
            
            # Append the extracted cell to the list
            extracted_characters.append(cell)

    return extracted_characters

def save_characters(characters, output_folder, sheet_index, rows, cols):
    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Save each character in the corresponding folder
    for index, character in enumerate(characters):
        character_folder = os.path.join(output_folder, f'character_{index }')
        if not os.path.exists(character_folder):
            os.makedirs(character_folder)
        
        character_filename = f'{sheet_index + 1}.png'
        character_path = os.path.join(character_folder, character_filename)
        cv2.imwrite(character_path, character)

def process_all_sheets(input_folder, output_folder, rows, cols):
    # Check if input folder exists
    if not os.path.exists(input_folder):
        print(f"Input folder does not exist: {input_folder}")
        return
    
    # List files in the input folder and print them for debugging
    all_files = os.listdir(input_folder)
    print(f"Files in the input folder: {all_files}")
    
    # Filter out .jpg files
    sheet_files = sorted([f for f in all_files if f.lower().endswith('.jpg')])
    
    if not sheet_files:
        print(f"No .jpg files found in the input folder: {input_folder}")
        return

    print(f"Processing {len(sheet_files)} sheets from folder: {input_folder}")

    for sheet_index, sheet_file in enumerate(sheet_files):
        sheet_path = os.path.join(input_folder, sheet_file)
        
        print(f"Processing sheet: {sheet_path}")
        
        # Preprocess the sheet to get the binarized image
        binarized_image = preprocess_sheet(sheet_path)
        
        # Extract characters from the binarized image
        characters = extract_characters(binarized_image, rows, cols)
        
        # Save the extracted characters to corresponding folders
        save_characters(characters, output_folder, sheet_index, rows, cols)

# Parameters
input_folder = r'D:\\Tulu_lipi\\PROJECT'  # Folder containing the 112 images
output_folder = r'D:\\Tulu_lipi\\Output'  # Folder where the extracted characters will be saved
rows = 6
cols = 9

# Process all sheets
process_all_sheets(input_folder, output_folder, rows, cols)

In [3]:
# Augmentation

In [None]:
import cv2
import os
import numpy as np

def augment_images_with_slant(input_folder, output_folder, target_size=(150, 150)):
    for root, dirs, files in os.walk(input_folder):
        for idx, file in enumerate(files):
            input_path = os.path.join(root, file)
            output_subfolder = os.path.relpath(root, input_folder)

            # Ensure the output subfolder exists
            output_subfolder_path = os.path.join(output_folder, output_subfolder)
            os.makedirs(output_subfolder_path, exist_ok=True)

            try:
                # Read the image
                original_image = cv2.imread(input_path)

                # Resize the image to the target size
                resized_image = cv2.resize(original_image, target_size)

                # Convert image to grayscale
                gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)

                # Binarize the image and invert it
                _, binary_image = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY)
                binary_image = cv2.bitwise_not(binary_image)

                # Get image dimensions
                height, width = binary_image.shape

                # Define the slant angles for left and right
                left_slant_angle = 10  # degrees
                right_slant_angle = -10  # degrees

                # Center of the image
                center = (width // 2, height // 2)

                # Compute the rotation matrices
                left_rotation_matrix = cv2.getRotationMatrix2D(center, left_slant_angle, 1)
                right_rotation_matrix = cv2.getRotationMatrix2D(center, right_slant_angle, 1)

                # Apply the slant (rotation) transformations
                left_slanted_image = cv2.warpAffine(binary_image, left_rotation_matrix, (width, height), borderMode=cv2.BORDER_CONSTANT, borderValue=(0,))
                right_slanted_image = cv2.warpAffine(binary_image, right_rotation_matrix, (width, height), borderMode=cv2.BORDER_CONSTANT, borderValue=(0,))

                # Save the original and slanted images
                original_output_path = os.path.join(output_subfolder_path, f"original_{idx}.png")
                left_slanted_output_path = os.path.join(output_subfolder_path, f"left_slanted_{idx}.png")
                right_slanted_output_path = os.path.join(output_subfolder_path, f"right_slanted_{idx}.png")

                cv2.imwrite(original_output_path, binary_image)
                cv2.imwrite(left_slanted_output_path, left_slanted_image)
                cv2.imwrite(right_slanted_output_path, right_slanted_image)

            except Exception as e:
                print(f"Error processing image {input_path}: {e}")

if _name_ == "_main_":
    # Set your input folder and output folder for images
    input_folder = 'D:\\Tulu_lipi\\Output'  # Change this to your input images folder path
    output_folder = 'D:\\Tulu_lipi\\dataset'  # Change this to your output folder for augmented images

    # Augment the images with left and right slants, and save the results
    augment_images_with_slant(input_folder, output_folder)

In [4]:
# tulu to kannada mapping

In [None]:
import os

# Define the mapping from Tulu folder names to Kannada characters
tulu_to_kannada_mapping = {
    'character_1': 'ಅ',
    'character_2': 'ಆ',
    'character_3': 'ಇ',
    'character_4': 'ಈ',
    'character_5': 'ಉ',
    'character_6': 'ಊ',
    'character_7': 'ಋ',
    'character_8': 'ೠ',
    'character_9': 'ಎ',
    'character_10': 'ಏ',
    'character_11': 'ಐ',
    'character_12': 'ಒ',
    'character_13': 'ಔ',
    'character_14': 'ಅಂ',
    'character_15': 'ಅಃ',
    'character_16': 'ಕ',
    'character_17': 'ಖ',
    'character_18': 'ಗ',
    'character_19': 'ಘ',
    'character_20': 'ಙ',
    'character_21': 'ಚ',
    'character_22': 'ಛ',
    'character_23': 'ಜ',
    'character_24': 'ಝ',
    'character_25': 'ಞ',
    'character_26': 'ಟ',
    'character_27': 'ಠ',
    'character_28': 'ಡ',
    'character_29': 'ಢ',
    'character_30': 'ಣ',
    'character_31': 'ತ',
    'character_32': 'ಥ',
    'character_33': 'ದ',
    'character_34': 'ಧ',
    'character_35': 'ನ',
    'character_36': 'ಪ',
    'character_37': 'ಫ',
    'character_38': 'ಬ',
    'character_39': 'ಭ',
    'character_40': 'ಮ',
    'character_41': 'ಯ',
    'character_42': 'ರ',
    'character_43': 'ಲ',
    'character_44': 'ವ',
    'character_45': 'ಶ',
    'character_46': 'ಷ',
    'character_47': 'ಸ',
    'character_48': 'ಹ',
     'character_49': 'ಳ',
    # Add mappings for remaining characters if there are more
}

# Path to the dataset
tulu_characters_folder = "D:\\Tulu_lipi\\dataset"

# Rename directories and images based on the mapping
def rename_folders_and_images_to_kannada_characters(root_folder):
    # Iterate through each folder in the root folder
    for folder_name in os.listdir(root_folder):
        folder_path = os.path.join(root_folder, folder_name)
        if os.path.isdir(folder_path):
            # Get the Kannada character corresponding to the Tulu character folder_name
            kannada_character = tulu_to_kannada_mapping.get(folder_name, 'Unknown')
            
            # New folder path
            new_folder_path = os.path.join(root_folder, kannada_character)
            os.rename(folder_path, new_folder_path)
            
            # Rename images inside the folder
            for idx, image_name in enumerate(os.listdir(new_folder_path)):
                image_path = os.path.join(new_folder_path, image_name)
                if os.path.isfile(image_path):
                    # Create the new image name with a unique identifier
                    new_image_name = f"{kannada_character}_{idx+1}{os.path.splitext(image_name)[1]}"
                    new_image_path = os.path.join(new_folder_path, new_image_name)
                    os.rename(image_path, new_image_path)

if _name_ == "_main_":
    # Rename the folders and images to Kannada characters based on the mappings
    rename_folders_and_images_to_kannada_characters(tulu_characters_folder)
    print("Folder and image renaming to Kannada characters completed.")

In [5]:
# codes for training the model

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define image dimensions and paths
img_height, img_width = 150, 150
batch_size = 32
data_dir = 'D:\\Tulu_lipi\\dataset'

# Data augmentation and normalization
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,  # 20% of data for validation
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
num_classes=49
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(49, activation='softmax')  # 49 classes for 49 characters
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
epochs = 100

history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size,
    epochs=epochs
)
model.save('tulu_character_recognition_model2.h5')
val_loss, val_acc = model.evaluate(validation_generator, steps=validation_generator.samples // batch_size)
print(f"Validation loss: {val_loss:.4f}, Validation accuracy: {val_acc:.4f}")