In [2]:
import numpy as np
import pandas as pdZ
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import random 
import cv2
import random
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.utils import to_categorical  # Updated ixmport
from tensorflow.keras.models import Sequential
from tensorflow.keras import optimizers
from sklearn.preprocessing import LabelBinarizer
from keras import backend as K
from keras.layers import Dense, Activation, Flatten, Dense,MaxPooling2D, Dropout
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf


In [4]:
import numpy as np
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

modern_dir = '/kaggle/input/8th-century-tamil-inscriptions/Modern characters'

def custom_preprocessing(image):
    # Resize image to (32, 32)
    image = cv2.resize(image, (32, 32))

    # Ensure the image is in the correct format (uint8)
    if image.dtype != np.uint8:
        image = (image * 255).astype(np.uint8)

    # Apply Gaussian smoothing
    smoothed_image = cv2.GaussianBlur(image, (5, 5), 0)

    # Apply adaptive thresholding
    adaptive_thresholded = cv2.adaptiveThreshold(
        smoothed_image,
        255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        11,
        2
    )

    # Normalize the pixel values to the range [0, 1]
    normalized_image = adaptive_thresholded / 255.0

    return normalized_image


# Create ImageDataGenerator for both categorized and augmented images
datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values to [0, 1]
    validation_split=0.2  # Split 80% for training, 20% for validation
)

# Custom generator to apply preprocessing
def custom_generator(directory, datagen):
    for batch in datagen.flow_from_directory(directory,
                                             target_size=(32, 32),
                                             color_mode='grayscale',  # Keep this as is since images are grayscale
                                             batch_size=32,
                                             class_mode='sparse',
                                             shuffle=True,
                                             subset='training'):
        # Ensure the batch contains images and preprocess them
        processed_batch = np.array([custom_preprocessing(image) for image in batch[0]])
        yield processed_batch, batch[1]

augmented_generator = custom_generator('/kaggle/input/8th-century-tamil-inscriptions/augmented_images', datagen)
categorised_generator = custom_generator('/kaggle/input/8th-century-tamil-inscriptions/images_categorised', datagen)

# Custom generator to apply preprocessing
def custom_generator(directory, datagen):
    for batch in datagen.flow_from_directory(directory,
                                             target_size=(32, 32),
                                             color_mode='grayscale',
                                             batch_size=32,
                                             class_mode='sparse',
                                             shuffle=True,
                                             subset='training'):
        processed_batch = np.array([custom_preprocessing(image) for image in batch[0]])
        yield processed_batch, batch[1]

        

def modern_generator_func(directory, datagen):
    for batch in datagen.flow_from_directory(directory, 
                                             target_size=(32, 32), 
                                             color_mode='grayscale', 
                                             batch_size=32, 
                                             class_mode='sparse', 
                                             shuffle=False):
        processed_batch = np.array([custom_preprocessing(image) for image in batch[0]])
        yield processed_batch, batch[1]

modern_generator = modern_generator_func(modern_dir, datagen)


# Function to load data from a generator
def load_data(generator, num_batches=10):
    images, labels = [], []
    for _ in range(num_batches):
        batch = next(generator)
        images.append(batch[0])
        labels.append(batch[1])
    return np.concatenate(images), np.concatenate(labels)

# Load the augmented and categorised data
augmented_images, augmented_labels = load_data(augmented_generator)
categorised_images, categorised_labels = load_data(categorised_generator)

# Combine the images and labels from augmented and categorised folders
combined_images = np.concatenate([augmented_images, categorised_images], axis=0)
combined_labels = np.concatenate([augmented_labels, categorised_labels], axis=0)
modern_images, modern_labels = load_data(modern_generator)


# Split the combined dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    combined_images, combined_labels, test_size=0.2, random_state=42
)

Found 1369 images belonging to 27 classes.
Found 136 images belonging to 27 classes.
Found 27 images belonging to 27 classes.


In [5]:

# Create pairs of images for the Siamese network
def create_pairs(ancient_images, modern_images, labels):
    pairs_ancient = []
    pairs_modern = []
    pair_labels = []
    
    for i in range(len(labels)):
        for j in range(len(modern_labels)):
            pairs_ancient.append(ancient_images[i])
            pairs_modern.append(modern_images[j])
            pair_labels.append(1 if labels[i] == modern_labels[j] else 0)
    
    return np.array(pairs_ancient), np.array(pairs_modern), np.array(pair_labels)

# Create the pairs
X_ancient, X_modern, y_pairs = create_pairs(combined_images, modern_images, combined_labels)

# Split into train and test sets
X_ancient_train, X_ancient_test, X_modern_train, X_modern_test, y_train, y_test = train_test_split(
    X_ancient, X_modern, y_pairs, test_size=0.2, random_state=42
)


In [6]:
import numpy as np
import cv2
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout, MaxPooling2D, Conv2D, Lambda, Reshape, LSTM, Bidirectional
import tensorflow as tf  # Ensure TensorFlow is imported

# Define the base network
def create_base_network(input_shape):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding="same", activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.2))
    return model

# Define the Siamese network
def create_siamese_network(input_shape=(32, 32, 1), num_classes=28):
    base_network = create_base_network(input_shape)
    
    input_ancient = Input(shape=input_shape)
    input_modern = Input(shape=input_shape)
    
    processed_ancient = base_network(input_ancient)
    processed_modern = base_network(input_modern)
    
    merged_vector = Lambda(lambda tensors: tf.abs(tensors[0] - tensors[1]))([processed_ancient, processed_modern])
    
    # Reshape for RNN
    reshaped_vector = Reshape((4, 32))(merged_vector)  # Assuming a sequence length of 4 and feature size of 32

    # Bidirectional LSTM layers
    lstm_output = Bidirectional(LSTM(64, return_sequences=False, dropout=0.25))(reshaped_vector)
    
    output = Dense(num_classes, activation='softmax')(lstm_output)
    
    siamese_model = Model(inputs=[input_ancient, input_modern], outputs=output)
    
    return siamese_model

# Create the Siamese model
siamese_model = create_siamese_network()

# Display the model summary
siamese_model.summary()

# Compile the model
siamese_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:

# Compile the model for multi-class classification
siamese_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Convert labels to one-hot encoding for multi-class classification
y_train = to_categorical(y_train, num_classes=28)
y_test = to_categorical(y_test, num_classes=28)
y_train = y_train.reshape(-1, 28)
y_test = y_test.reshape(-1, 28)



In [None]:
from tensorflow.keras.callbacks import EarlyStopping 
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=2,
    restore_best_weights=True
)

# Train the model with EarlyStopping
history = siamese_model.fit(
    [X_ancient_train, X_modern_train],
    y_train,
    validation_data=([X_ancient_test, X_modern_test], y_test),
    epochs=20,
    batch_size=32,
    callbacks=[early_stopping]
)




Epoch 1/20
[1m3996/3996[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 52ms/step - accuracy: 0.9609 - loss: 0.1963 - val_accuracy: 0.9540 - val_loss: 0.1172
Epoch 2/20
[1m3623/3996[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m17s[0m 47ms/step - accuracy: 0.9711 - loss: 0.0953

In [None]:
# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()