In [16]:
from PIL import Image

im_file = "/Users/surajkumar/Desktop/OCR_model/page_01.jpg"

im = Image.open(im_file)
print(im)
im.save("/Users/surajkumar/Desktop/OCR_model/page_01.jpg")

<PIL.JpegImagePlugin.JpegImageFile image mode=L size=1200x1522 at 0x10D949BE0>


In [12]:
ls ~/Desktop/OCR_model/page_01.jpg


/Users/surajkumar/Desktop/OCR_model/page_01.jpg


In [None]:
# Import libraries
import time
import cv2
from PIL import Image
import pytesseract
import torch
import torchvision.transforms as transforms
from torch.quantization import quantize_dynamic
import matplotlib.pyplot as plt

# Ensure pytesseract is properly installed and configured
pytesseract.pytesseract.tesseract_cmd = r'/usr/local/bin/tesseract'

# Set the path to the image
image_path = "/Users/surajkumar/Desktop/OCR_model/page_01.jpg"

# 1. Load the image and preprocess it
def preprocess_image(image_path):
    """Load and preprocess the input image."""
    # Load image using OpenCV
    image = cv2.imread(image_path)

    # Convert to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian Blur
    blurred_image = cv2.GaussianBlur(gray_image, (5, 5), 0)

    # Apply binary thresholding
    _, threshold_image = cv2.threshold(blurred_image, 127, 255, cv2.THRESH_BINARY)

    return image, threshold_image

# Preprocess the image
original_image, processed_image = preprocess_image(image_path)

# 2. Define a mock GPU-based OCR model (Example: CNN or pre-trained PyTorch model)
class MockOCRModel(torch.nn.Module):
    def __init__(self):
        super(MockOCRModel, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.relu = torch.nn.ReLU()
        self.fc = torch.nn.Linear(16 * 256 * 256, 10)  # Example: Fully connected for OCR output

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x

# Instantiate the GPU-based model
gpu_model = MockOCRModel().cuda()  # Simulating GPU usage
print("GPU-based model initialized.")

# 3. Convert the GPU model to a CPU model
def convert_to_cpu_model(gpu_model):
    """Convert GPU-based PyTorch model to CPU model."""
    # Move to CPU
    cpu_model = gpu_model.cpu()
    
    # Apply quantization for optimization
    quantized_model = quantize_dynamic(cpu_model, {torch.nn.Linear}, dtype=torch.qint8)
    return quantized_model

cpu_model = convert_to_cpu_model(gpu_model)
print("Converted to CPU model.")

# 4. Perform OCR using pytesseract
def perform_ocr_with_pytesseract(image):
    """Perform OCR on the input image using pytesseract."""
    text = pytesseract.image_to_string(image)
    return text

# Perform OCR on the original image
ocr_text_gpu = perform_ocr_with_pytesseract(Image.fromarray(original_image))
ocr_text_cpu = perform_ocr_with_pytesseract(Image.fromarray(processed_image))

print("OCR (GPU):", ocr_text_gpu)
print("OCR (CPU):", ocr_text_cpu)

# 5. Measure performance (FPS and Accuracy)
def evaluate_performance(model, image, runs=10):
    """Evaluate FPS and inference time of the model."""
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Grayscale(num_output_channels=1),
        transforms.ToTensor()
    ])
    input_image = transform(image).unsqueeze(0)  # Add batch dimension

    start_time = time.time()
    for _ in range(runs):
        model(input_image)
    end_time = time.time()

    fps = runs / (end_time - start_time)
    return fps

# Evaluate GPU and CPU models
fps_gpu = evaluate_performance(gpu_model, processed_image)
fps_cpu = evaluate_performance(cpu_model, processed_image)

print(f"FPS (GPU): {fps_gpu}")
print(f"FPS (CPU): {fps_cpu}")

# 6. Visualize and compare results
def visualize_results(image, ocr_text_gpu, ocr_text_cpu, fps_gpu, fps_cpu):
    """Visualize the input image and results side by side."""
    plt.figure(figsize=(12, 6))

    # Show the processed image
    plt.subplot(1, 2, 1)
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.title("Input Image")
    plt.axis("off")

    # Display OCR results
    plt.subplot(1, 2, 2)
    results_text = f"""
    OCR Results:
    GPU Text: {ocr_text_gpu}
    CPU Text: {ocr_text_cpu}

    FPS Comparison:
    GPU FPS: {fps_gpu:.2f}
    CPU FPS: {fps_cpu:.2f}
    """
    plt.text(0.1, 0.5, results_text, fontsize=12, wrap=True)
    plt.axis("off")

    plt.tight_layout()
    plt.show()

# Visualize the results
visualize_results(original_image, ocr_text_gpu, ocr_text_cpu, fps_gpu, fps_cpu)
