## OCR dos adesivos via Deepseek

In [1]:
import cv2, os, io, sys, re
import numpy as np
import torch

In [2]:
from tqdm import tqdm
from transformers import AutoModel, AutoTokenizer

In [3]:
import warnings
from transformers import logging
warnings.filterwarnings("ignore")
logging.set_verbosity_error()

In [4]:
input_dir = "validar-universidad"
output_file = "results_" + input_dir + ".txt"

In [5]:
ods = ['0101', '0102', '0103', '0104', '0105', '0106', '0107', '0201', '0202', '0203', '0204', '0205', '0206', '0207', '0208', '0209', '0210', '0211', '0301', '0302', '0303', '0304', '0305', '0306', '0307', '0308', '0309', '0310', '0311', '0312', '0313', '0314', '0315', '0316', '0317', '0318', '0319', '0320', '0401', '0402', '0403', '0404', '0405', '0406', '0407', '0408', '0409', '0410', '0411', '0501', '0502', '0503', '0504', '0505', '0506', '0507', '0508', '0509', '0510', '0511', '0512', '0513', '0601', '0602', '0603', '0604', '0605', '0606', '0701', '0702', '0703', '0801', '0802', '0803', '0804', '0805', '0806', '0807', '0808', '0809', '0810', '0811', '0812', '0813', '0814', '0815', '0816', '0817', '0818', '0819', '0820', '0821', '0822', '0823', '0824', '0901', '0902', '0903', '0904', '0905', '0906', '0907', '0908', '0909', '0910', '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1010', '1101', '1102', '1103', '1104', '1105', '1106', '1107', '1108', '1109', '1110', '1111', '1112', '1113', '1114', '1115', '1116', '1117', '1201', '1202', '1203', '1204', '1205', '1206', '1207', '1208', '1209', '1210', '1211', '1212', '1213', '1214', '1301', '1302', '1303', '1304', '1401', '1402', '1403', '1404', '1405', '1406', '1407', '1408', '1409', '1501', '1502', '1503', '1504', '1505', '1506', '1507', '1508', '1509', '1510', '1511', '1512', '1513', '1514', '1601', '1602', '1603', '1604', '1605', '1606', '1607', '1608', '1610', '1611', '1612', '1613', '1614', '1615', '1701', '1702', '1703', '1704', '1705', '1706', '1707', '1708', '1709', '1801', '1802', '1803', '1804', '1805', '1806', '1807', '1808', '1809', '1810', '1811', '1812']

In [6]:
def rotaciona(image_path, angulo):

    # Carrega a imagem
    img = cv2.imread(image_path)

    # Obtém dimensões
    h, w = img.shape[:2]
    centro = (w // 2, h // 2)

    # Matriz de rotação
    M = cv2.getRotationMatrix2D(centro, angulo, 1.0)

    # Calcula nova bounding box para não cortar a imagem após a rotação
    cos = abs(M[0, 0])
    sin = abs(M[0, 1])

    new_w = int((h * sin) + (w * cos))
    new_h = int((h * cos) + (w * sin))

    # Ajusta a matriz de rotação para transladar a imagem rotacionada
    M[0, 2] += (new_w / 2) - centro[0]
    M[1, 2] += (new_h / 2) - centro[1]

    # Aplica a rotação
    img_rotacionada = cv2.warpAffine(img, M, (new_w, new_h), flags=cv2.INTER_CUBIC)

    # path temporario
    temp_path = image_path.replace('.png', '_temp.png')
    
    # Salva a imagem temporaria no mesmo caminho
    cv2.imwrite(temp_path, img_rotacionada)

    return temp_path

In [7]:
model_name = 'deepseek-ai/DeepSeek-OCR'

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModel.from_pretrained(model_name, trust_remote_code=True, use_safetensors=True)
model = model.eval().cuda().to(torch.bfloat16)

In [9]:
prompt = "<image>\n<|grounding|>OCR this image."

In [10]:
with open(output_file, "w", encoding="utf-8") as result:
    for file in tqdm(os.listdir(input_dir)):
        for ang in range(0, 360, 2):

            buffer = io.StringIO()
            sys_stdout = sys.stdout
            sys.stdout = buffer

            image_path = os.path.join(input_dir, file)
            temp_path = rotaciona(image_path, ang)
            
            _ = model.infer(tokenizer, prompt=prompt, image_file=temp_path, output_path='results', crop_mode=True, save_results=False, test_compress=True)
        
            sys.stdout = sys_stdout
            
            output = buffer.getvalue()
            buffer.close()
        
            ref = re.search(r"<\|ref\|>(.*?)<\|/ref\|>", output)
            ref = ref.group(1).replace('.', '').replace('-', '').replace(':', '')

            if os.path.exists(temp_path):
                os.remove(temp_path)
            
            if ref in ods:
                result.write(f"{file}:{ref}\n")
                break

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 165/165 [1:21:04<00:00, 29.48s/it]
