<a href="https://colab.research.google.com/github/krishnapriyaveluri/HistoriDecode---Language-Evolution-Decryption/blob/main/Hybrid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import tensorflow as tf
import numpy as np
from google.colab import drive

# 1. Mount Google Drive
drive.mount('/content/drive')

# 2. Load and preprocess the dataset
dataset_path = '/content/drive/MyDrive/dataset'

train_data = tf.keras.preprocessing.image_dataset_from_directory(
    dataset_path,
    labels='inferred',
    label_mode='categorical',
    batch_size=32,
    image_size=(224, 224),
    color_mode='grayscale'
)

# Normalize pixel values
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_data = train_data.map(lambda x, y: (normalization_layer(x), y))

# Split the dataset into training and validation sets
dataset_size = tf.data.experimental.cardinality(train_data).numpy()
val_size = int(0.2 * dataset_size)

val_data = train_data.take(val_size)
train_data = train_data.skip(val_size)

# Save images and labels as .npy files for PyTorch ViT training
def save_images_and_labels(dataset, save_path_prefix):
    images, labels = [], []
    for image_batch, label_batch in dataset:
        images.append(image_batch.numpy())
        labels.append(label_batch.numpy())
    images = np.concatenate(images)
    labels = np.concatenate(labels)
    np.save(f'{save_path_prefix}_images.npy', images)
    np.save(f'{save_path_prefix}_labels.npy', labels)

save_images_and_labels(train_data, '/content/drive/MyDrive/train')
save_images_and_labels(val_data, '/content/drive/MyDrive/val')

# 3. Build and train the CRNN model (optional, can be run separately)
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Reshape, GRU, Dense, Dropout
from tensorflow.keras.models import Sequential

def build_crnn(input_shape, num_classes):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Reshape((-1, 128)))
    model.add(GRU(128, return_sequences=False))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    return model

img_height, img_width = 224, 224
num_classes = 23
input_shape = (img_height, img_width, 1)

crnn_model = build_crnn(input_shape, num_classes)
crnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train CRNN model
epochs = 10
history_crnn = crnn_model.fit(train_data, epochs=epochs, validation_data=val_data)

# Save CRNN model
crnn_model.save('/content/drive/MyDrive/crnn_model.h5')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found 303 files belonging to 23 classes.
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 6s/step - accuracy: 0.0720 - loss: 3.1367 - val_accuracy: 0.0625 - val_loss: 3.1316
Epoch 2/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 5s/step - accuracy: 0.0359 - loss: 3.1419 - val_accuracy: 0.0469 - val_loss: 3.1249
Epoch 3/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 5s/step - accuracy: 0.0303 - loss: 3.1454 - val_accuracy: 0.0469 - val_loss: 3.1397
Epoch 4/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 5s/step - accuracy: 0.0598 - loss: 3.1130 - val_accuracy: 0.0469 - val_loss: 3.1146
Epoch 5/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 5s/step - accuracy: 0.0476 - loss: 3.1180 - val_accuracy: 0.0469 - val_loss: 3.1139
Epoch 6/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 5s/step - accuracy: 0.0381 - loss: 3.1370 - val_accuracy: 0.0469 - val_loss: 3.1223
Epoch 7/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 



In [None]:
# Import necessary libraries
import torch
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from transformers import ViTFeatureExtractor, ViTForImageClassification
from google.colab import drive

# 1. Mount Google Drive
drive.mount('/content/drive')

# Load CRNN dataset split
train_images = np.load('/content/drive/MyDrive/train_images.npy')
train_labels = np.load('/content/drive/MyDrive/train_labels.npy')
val_images = np.load('/content/drive/MyDrive/val_images.npy')
val_labels = np.load('/content/drive/MyDrive/val_labels.npy')

# 2. Load ViT Model
vit_feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
vit_model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k', num_labels=23)

optimizer = torch.optim.Adam(vit_model.parameters(), lr=3e-5)
loss_fn = torch.nn.CrossEntropyLoss()

# Convert grayscale images to RGB for ViT
train_images_rgb = np.repeat(train_images, 3, axis=-1)  # Repeat channel dimension 3 times
val_images_rgb = np.repeat(val_images, 3, axis=-1)

# Convert to PyTorch tensors
train_images_tensor = torch.tensor(train_images_rgb, dtype=torch.float32).permute(0, 3, 1, 2)  # Shape: (batch, channels, height, width)
train_labels_tensor = torch.tensor(np.argmax(train_labels, axis=1), dtype=torch.long)  # Convert one-hot to long tensor

val_images_tensor = torch.tensor(val_images_rgb, dtype=torch.float32).permute(0, 3, 1, 2)
val_labels_tensor = torch.tensor(np.argmax(val_labels, axis=1), dtype=torch.long)

# Create DataLoader for ViT
from torch.utils.data import DataLoader, TensorDataset
train_dataset = TensorDataset(train_images_tensor, train_labels_tensor)
val_dataset = TensorDataset(val_images_tensor, val_labels_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


# Training Loop
vit_model.train()  # Set model to training mode
optimizer = torch.optim.Adam(vit_model.parameters(), lr=3e-5)
loss_fn = torch.nn.CrossEntropyLoss()

epochs = 5  # Set epochs

for epoch in range(epochs):
    vit_model.train()
    total_loss = 0
    correct = 0

    for imgs, lbls in train_loader:
        optimizer.zero_grad()
        outputs = vit_model(imgs).logits  # Forward pass
        loss = loss_fn(outputs, lbls)
        loss.backward()  # Backward pass
        optimizer.step()

        total_loss += loss.item()
        correct += (outputs.argmax(dim=1) == lbls).sum().item()

    avg_loss = total_loss / len(train_loader)
    avg_accuracy = correct / len(train_images_tensor)

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}, Accuracy: {avg_accuracy:.4f}")

torch.save(vit_model.state_dict(), '/content/drive/MyDrive/vit_model.pth')

Mounted at /content/drive


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5, Loss: 3.1231, Accuracy: 0.0418
Epoch 2/5, Loss: 3.0263, Accuracy: 0.2176
Epoch 3/5, Loss: 2.9303, Accuracy: 0.4351
Epoch 4/5, Loss: 2.8213, Accuracy: 0.5523
Epoch 5/5, Loss: 2.7152, Accuracy: 0.6569


In [None]:
# Add a mapping of class indices to modern letters
class_mapping = [
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
    'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
    'U', 'V', 'W'
]

# Update the hybrid prediction function to use the mapping
def hybrid_predict(img_path):
    img_crnn = Image.open(img_path).convert("L").resize((224, 224))
    img_crnn = np.expand_dims(np.array(img_crnn) / 255.0, axis=-1)
    img_crnn = np.expand_dims(img_crnn, axis=0)

    img_vit = Image.open(img_path).convert("RGB")
    inputs = vit_feature_extractor(images=img_vit, return_tensors="pt")

    crnn_prediction = crnn_model.predict(img_crnn)
    with torch.no_grad():
        vit_outputs = vit_model(inputs['pixel_values'])
    vit_prediction = torch.nn.functional.softmax(vit_outputs.logits, dim=-1).numpy()

    combined_prediction = (crnn_prediction + vit_prediction) / 2
    predicted_index = np.argmax(combined_prediction)

    # Get the corresponding letter from the mapping
    predicted_letter = class_mapping[predicted_index]
    return predicted_letter

img_path = '/content/drive/MyDrive/dataset/letter_C/old_letter_C_variant10.png'

# Test the function
predicted_letter = hybrid_predict(img_path)
print(f"Predicted Letter: {predicted_letter}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
Predicted Letter: C
