In [7]:
# Milestone 2 â€“ Model 1: Deskewed Red-Channel CNN-OCR (Local)


import os, time, cv2, torch, torch.nn as nn, joblib
import numpy as np
from pathlib import Path
from datasets import load_dataset
from PIL import Image

# Setup
SAVE_DIR = Path("saved_model")
SAVE_DIR.mkdir(exist_ok=True)

# Preprocessing
def deskew_red(img):
    """Deskew the image and keep only the red channel."""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150)
    lines = cv2.HoughLines(edges, 1, np.pi/180, 200)
    angle = 0
    if lines is not None:
        angles = [theta for rho, theta in lines[:, 0]]
        angle = (np.mean(angles) - np.pi/2) * 180/np.pi
    (h, w) = img.shape[:2]
    M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1)
    img = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_CUBIC)
    b,g,r = cv2.split(img)
    red_only = cv2.merge([np.zeros_like(r), np.zeros_like(r), r])
    gray = cv2.cvtColor(red_only, cv2.COLOR_BGR2GRAY)
    return cv2.resize(gray, (128,32))

# CNN-OCR model
class OCRNet(nn.Module):
    def __init__(self, nclass=80):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1,32,3,1,1), nn.ReLU(), nn.MaxPool2d(2,2),
            nn.Conv2d(32,64,3,1,1), nn.ReLU(), nn.MaxPool2d(2,2)
        )
        self.lstm = nn.LSTM(64*8,128,num_layers=2,
                             bidirectional=True,batch_first=True)
        self.fc = nn.Linear(256,nclass)
    def forward(self,x):
        x = self.conv(x)
        b,c,h,w = x.size()
        x = x.permute(0,3,1,2).reshape(b,w,-1)
        x,_ = self.lstm(x)
        return self.fc(x)

# Build and save model
model = OCRNet()
torch.save(model.state_dict(), SAVE_DIR/"model.pt")
joblib.dump(model, SAVE_DIR/"model.joblib")
print("Model saved in:", SAVE_DIR)

# Load and save model
model_loaded = joblib.load(SAVE_DIR/"model.joblib")
print("Model reloaded from saved_model folder")

# Run sample prediction
dataset = load_dataset("lansinuote/ocr_id_card", split="train[:1]")
img = np.array(dataset[0]["image"])
cv2.imwrite("sample.jpg", cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

proc = deskew_red(cv2.imread("sample.jpg"))
tensor = torch.tensor(proc/255.0).unsqueeze(0).unsqueeze(0).float()

with torch.no_grad():
    t0 = time.perf_counter()
    _ = model_loaded(tensor)
    t1 = time.perf_counter()

print(f"Forward latency: {(t1-t0)*1000:.2f} ms")

Model saved in: saved_model
Model reloaded from saved_model folder
Forward latency: 7.93 ms
