In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import os
import random
from sklearn.metrics import accuracy_score, confusion_matrix

#import tensorflow_addons as tfa


In [6]:
class TamilHandwritingRecognizer:
    def __init__(self, img_height=64, img_width=256, max_length=32):
        self.img_height = img_height
        self.img_width = img_width
        self.max_length = max_length
        
        # Tamil character set (simplified - expand as needed)
        self.tamil_chars = [
            'அ', 'ஆ', 'இ', 'ஈ', 'உ', 'ஊ', 'எ', 'ஏ', 'ஐ', 'ஒ', 'ஓ', 'ஔ',
            'க', 'ங', 'ச', 'ஞ', 'ட', 'ண', 'த', 'ந', 'ப', 'ம', 'ய', 'ர', 'ல', 'வ', 'ழ', 'ள', 'ற', 'ன',
            'ா', 'ி', 'ீ', 'ு', 'ூ', 'ெ', 'ே', 'ை', 'ொ', 'ோ', 'ௌ', '்',
            ' ', '<PAD>', '<START>', '<END>'
        ]
        
        self.char_to_idx = {char: idx for idx, char in enumerate(self.tamil_chars)}
        self.idx_to_char = {idx: char for idx, char in enumerate(self.tamil_chars)}
        self.vocab_size = len(self.tamil_chars)
        
        self.model = None


    def build_model(self):
        """Build CNN + LSTM model for handwriting recognition"""
        
        # Input layer
        input_img = layers.Input(shape=(self.img_height, self.img_width, 1), name='image_input')
        
        # CNN Feature Extraction
        x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
        x = layers.MaxPooling2D((2, 2))(x)
        x = layers.BatchNormalization()(x)
        
        x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
        x = layers.MaxPooling2D((2, 2))(x)
        x = layers.BatchNormalization()(x)
        
        x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
        x = layers.MaxPooling2D((2, 2))(x)
        x = layers.BatchNormalization()(x)
        
        x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
        x = layers.MaxPooling2D((2, 2))(x)
        x = layers.BatchNormalization()(x)
        
        # Reshape for RNN
        new_shape = ((self.img_width // 16), (self.img_height // 16) * 256)
        x = layers.Reshape(target_shape=new_shape)(x)
        x = layers.Dense(64, activation='relu')(x)
        
        # RNN layers
        x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x)
        x = layers.Bidirectional(layers.LSTM(64, return_sequences=True, dropout=0.25))(x)
        
        # Output layer
        output = layers.Dense(self.vocab_size, activation='softmax', name='output')(x)
        
        # Create model
        self.model = models.Model(inputs=input_img, outputs=output)
        
        # Compile model
        self.model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )
        
        return self.model


    def preprocess_image(self, image_path):
        """Preprocess image for model input"""
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        
        # Resize image
        img = cv2.resize(img, (self.img_width, self.img_height))
        
        # Normalize
        img = img.astype(np.float32) / 255.0
        
        # Add channel dimension
        img = np.expand_dims(img, axis=-1)
        
        return img


    def encode_text(self, text):
        """Encode text to sequence of indices"""
        encoded = [self.char_to_idx.get(char, self.char_to_idx['<PAD>']) for char in text]
        encoded = pad_sequences([encoded], maxlen=self.max_length, padding='post')[0]
        return to_categorical(encoded, num_classes=self.vocab_size)


    def decode_prediction(self, prediction):
        """Decode model prediction to text"""
        predicted_indices = np.argmax(prediction, axis=-1)
        decoded_text = ''.join([self.idx_to_char[idx] for idx in predicted_indices])
        
        # Remove padding and special tokens
        decoded_text = decoded_text.replace('<PAD>', '').replace('<START>', '').replace('<END>', '')
        
        return decoded_text.strip()


    def train(self, train_images, train_labels, validation_split=0.2, epochs=50, batch_size=32):
        """Train the model"""
        
        # Split data
        X_train, X_val, y_train, y_val = train_test_split(
            train_images, train_labels, test_size=validation_split, random_state=42
        )
        
        # Callbacks
        callbacks = [
            tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
            tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5),
            tf.keras.callbacks.ModelCheckpoint('tamil_handwriting_best.h5', save_best_only=True)
        ]
        
        # Train model
        history = self.model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks,
            verbose=1
        )
        
        return history

    def predict(self, image):
        """Predict text from image"""
        if len(image.shape) == 3:
            image = np.expand_dims(image, axis=0)
        
        prediction = self.model.predict(image)
        decoded_text = self.decode_prediction(prediction[0])
        
        # Calculate confidence
        confidence = np.mean(np.max(prediction[0], axis=-1))
        
        return decoded_text, confidence


# Data preparation functions
    def prepare_tamil_dataset(data_path):
        labels_df = pd.read_csv(os.path.join(data_path, 'labels.csv'))
    
        images = []
        labels = []
    
        recognizer = TamilHandwritingRecognizer()
    
        for _, row in labels_df.iterrows():
            image_path = os.path.join(data_path, 'images', row['filename'])
        
            if os.path.exists(image_path):
            # Preprocess image
                img = recognizer.preprocess_image(image_path)
                images.append(img)
            
            # Encode label
                encoded_label = recognizer.encode_text(row['text'])
                labels.append(encoded_label)
    
        return np.array(images), np.array(labels)



    def augment_image(img):
        """Apply random augmentation: rotation, shift, zoom, brightness, flips"""
        img = tf.image.random_brightness(img, max_delta=0.2)
        img = tf.image.random_contrast(img, lower=0.8, upper=1.2)
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        # Random rotation
        angle = random.uniform(-0.15, 0.15)  # radians
        img_rotated = tf.image.rot90(img, k=int(angle / (tf.pi/2))) 
        return img

    def evaluate_character_level(model, X, y_true, recognizer):
        y_pred = model.predict(X)
        y_pred_indices = np.argmax(y_pred, axis=-1)
        y_true_indices = np.argmax(y_true, axis=-1)
        acc = accuracy_score(y_true_indices, y_pred_indices)
        cm = confusion_matrix(y_true_indices, y_pred_indices)
        print(f"Character-level accuracy: {acc:.4f}")
        return acc, cm

In [7]:
def prepare_tamil_dataset_kaggle(data_path, recognizer, augment=False):
    train_csv = os.path.join(data_path, "train.csv")
    img_dir = os.path.join(data_path, "Train-Kaggle", "Train-Kaggle")
    df = pd.read_csv(train_csv)

    images = []
    labels = []
    skipped = 0
    for idx, row in df.iterrows():
        img_path = os.path.join(img_dir, row['ImageId'])
        label_idx = row['Class Label']
        # Data cleaning: skip if file missing or label out of range
        if not os.path.exists(img_path):
            skipped += 1
            continue
        if not (0 <= label_idx < len(recognizer.tamil_chars)):
            skipped += 1
            continue
        try:
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                skipped += 1
                continue
            img = cv2.resize(img, (recognizer.img_width, recognizer.img_height))
            img = img.astype(np.float32) / 255.0
            img = np.expand_dims(img, axis=-1)
            if augment:
                img = augment_image(img)
            images.append(img)
                # One-hot encode label
            label = recognizer.char_to_idx[recognizer.tamil_chars[label_idx]]
            label = tf.keras.utils.to_categorical(label, num_classes=recognizer.vocab_size)
            labels.append(label)
        except Exception:
            skipped += 1
            continue
    print(f"Loaded {len(images)} samples, skipped {skipped} due to errors.")
    return np.array(images), np.array(labels)

In [9]:
# Training script
if __name__ == "__main__":
    # Initialize model
    recognizer = TamilHandwritingRecognizer()
    model = recognizer.build_model()
    
    print("Model Summary:")
    model.summary()
    
    # Example usage in your notebook:
    data_path = "/kaggle/input/tamil-hwcr"
    recognizer = TamilHandwritingRecognizer( img_height=64, img_width=256, max_length=32)
    X_train, y_train = prepare_tamil_dataset_kaggle(data_path, recognizer, augment=True)
# Now you can train as before:
# history = recognizer.train(X_train, y_train, epochs=100)

    # Load and prepare data
    # Download Tamil handwriting dataset from:
    # https://www.kaggle.com/datasets/tamil-handwriting-recognition
    # Or create custom dataset
    
    data_path = "path/to/tamil_handwriting_dataset"
    # X_train, y_train = prepare_tamil_dataset(data_path)
    
    # Train model
    # history = recognizer.train(X_train, y_train, epochs=100)
    
    # Save model
    # model.save('tamil_handwriting_model.h5')
    
    print("Tamil Handwriting Recognition Model Ready!")


Model Summary:
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 image_input (InputLayer)    [(None, 64, 256, 1)]      0         
                                                                 
 conv2d_4 (Conv2D)           (None, 64, 256, 32)       320       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 32, 128, 32)      0         
 2D)                                                             
                                                                 
 batch_normalization_4 (Batc  (None, 32, 128, 32)      128       
 hNormalization)                                                 
                                                                 
 conv2d_5 (Conv2D)           (None, 32, 128, 64)       18496     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 16, 64,

KeyError: 'ImageId'