In [11]:
import os
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image


In [12]:
# Vocabulary and Mappings
VOCAB = """ !"#&'()*+,-./0123456789:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"""
CTC_BLANK = 0
int_to_char = {i + 1: char for i, char in enumerate(VOCAB)}

# Image Transformations
IMG_HEIGHT = 64
IMG_WIDTH = 512
inference_transform = transforms.Compose([
    transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# CRNN Model Architecture
class CRNN(nn.Module):
    def __init__(self, num_chars):
        super(CRNN, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.map_to_seq = nn.Linear(64 * (IMG_HEIGHT // 4), 64)
        self.rnn = nn.LSTM(64, 128, num_layers=2, bidirectional=True, dropout=0.25)
        self.fc = nn.Linear(256, num_chars)

    def forward(self, x):
        x = self.cnn(x)
        x = x.permute(0, 3, 1, 2)
        b, w, c, h = x.size()
        x = x.view(b, w, c * h)
        x = self.map_to_seq(x)
        x = x.permute(1, 0, 2)
        x, _ = self.rnn(x)
        x = self.fc(x)
        x = nn.functional.log_softmax(x, dim=2)
        return x

# CTC Decoding Function
def ctc_decode(log_probs):
    preds = log_probs.argmax(dim=2).permute(1, 0)
    decoded_texts = []
    for pred in preds:
        s = ''.join([int_to_char.get(c.item(), '') for c in pred if c != CTC_BLANK])
        dedup_s = ""
        if s:
            dedup_s = s[0]
            for char in s[1:]:
                if char != dedup_s[-1]:
                    dedup_s += char
        decoded_texts.append(dedup_s)
    return decoded_texts



In [13]:
MODEL_PATH = "handwriting_recognizer_best.pth"
# !!! IMPORTANT: CHANGE THIS TO THE PATH OF YOUR IMAGE !!!
IMAGE_PATH = r"D:\programming\python files(not package)\ocrtake2\file1\try_this_for_prediction.png"


In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Load Model ---
if not os.path.exists(MODEL_PATH):
    print(f"Error: Model file not found at '{MODEL_PATH}'")
else:
    model = CRNN(num_chars=len(VOCAB) + 1).to(device)
    model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
    model.eval()
    print("Model loaded successfully.")

    # --- Load and Predict Image ---
    if not os.path.exists(IMAGE_PATH):
        print(f"Error: Image file not found at '{IMAGE_PATH}'")
    else:
        try:
            image = Image.open(IMAGE_PATH).convert("RGB")
            image_tensor = inference_transform(image).unsqueeze(0).to(device)

            with torch.no_grad():
                log_probs = model(image_tensor)

            predicted_text = ctc_decode(log_probs)[0]

            print(f"\nImage: '{os.path.basename(IMAGE_PATH)}'")
            print(f"Predicted Text: {predicted_text}")

            # Optional: Display the image
            plt.imshow(image)
            plt.title(f"Prediction: {predicted_text}")
            plt.axis("off")
            plt.show()

        except Exception as e:
            print(f"An error occurred: {e}")


Model loaded successfully.

Image: 'try_this_for_prediction.png'
Predicted Text: try thisfor prediction .
An error occurred: name 'plt' is not defined
