In [1]:
import sys
from pathlib import Path

ROOT = Path(".").resolve()
sys.path.append(str(ROOT / "src"))

print("Added to path:", ROOT / "src")


Added to path: D:\personalProjects\ocr\src\notebook\src


In [2]:
import os
import time
import json
import torch
import pandas as pd
import numpy as np

from dotenv import load_dotenv
from PIL import Image
from pathlib import Path

load_dotenv()

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print("Device:", DEVICE)


Device: cpu


In [3]:
DATA_DIR = Path("../dataset")

images = sorted(DATA_DIR.glob("*"))

print("Images:", len(images))


Images: 3


In [4]:
from backend.nougat import NougatOCR
from backend.donut import DonutOCR

nougat = NougatOCR()
donut = DonutOCR()

print("Nougat device:", nougat.device)
print("Donut device:", donut.device)


  from .autonotebook import tqdm as notebook_tqdm
Loading weights: 100%|██████████| 484/484 [00:02<00:00, 229.61it/s, Materializing param=encoder.encoder.layers.3.blocks.1.output.dense.weight]                         
The image processor of type `DonutImageProcessor` is now loaded as a fast processor by default, even if the model checkpoint was saved with a slow processor. This is a breaking change and may produce slightly different outputs. To continue using the slow processor, instantiate this class with `use_fast=False`. 
Loading weights: 100%|██████████| 484/484 [00:02<00:00, 229.92it/s, Materializing param=encoder.encoder.layers.3.blocks.1.output.dense.weight]                         


Nougat device: cpu
Donut device: cpu


In [5]:

results = []

PROMPT = "<s_docvqa><s_question>Extract all prices and totals.</s_question><s_answer>"

for img in images:
    print("Processing:", img.name)
    out_nougat = nougat.run(img)
    out_donut = donut.run(img)

    results.append({
        "nougat_time": out_nougat["time"],
        "donut_time": out_donut["time"],
        "nougat_text_preview": out_nougat["text"][:400],
        "donut_parsed": out_donut["parsed"],
    })

df_compare = pd.DataFrame(results)
df_compare


Processing: receipt_1.jpg
Processing: receipt_2.jpg
Processing: receipt_3.png


Unnamed: 0,nougat_time,donut_time,nougat_text_preview,donut_parsed
0,6.623652,12.964136,.,"{'menu': {'nm': 'Miller Lite', 'cnt': '1', 'pr..."
1,41.741818,9.857366,"## References\n\n* [1] A. A. Krizrizov, A. A. ...","{'menu': [{'nm': 'TRIPLE DIPPER', 'price': '12..."
2,9.56764,12.457992,HAND TOVEL 075953630184 2.97 X GATORADE 068949...,"{'menu': [{'nm': 'HAND TOWEL', 'num': '0759536..."


In [7]:
from pprint import pprint

for i, val in enumerate(df_compare["nougat_text_preview"]):
    print(f"\n===== ROW {i} =====")
    pprint(val)



===== ROW 0 =====
'.'

===== ROW 1 =====
('## References\n'
 '\n'
 '* [1] A. A. Krizrizov, A. A. Krizov, A. A. Krizov, A. A. Krizov, A. A. '
 'Krizov, A. A. Krizov, A. A. Krizov, A. A. Krizov, A. A. Krizov, A. A. '
 'Krizov, A. A. Krizov, A. A. Krizov, A. A. Krizov, A. A. Krizov, A. A. '
 'Krizov, A. A. Krizov, A. A. Krizov, A. A. Krizov, A. A. Krizov, A. A. '
 'Krizov, A. A. Krizov, A. A. Krizov, A. A. Krizov, A. A. Krizov, A. A. '
 'Krizov, A. A. Krizov, A. A. Krizov')

===== ROW 2 =====
('HAND TOVEL 075953630184 2.97 X GATORADE 068949055223 2.00 X T-SHIRT '
 '036231552452 16.88 X PUSH PINS 088348997350 1.24 X SUBTOTAL 23.09 TAX 1 '
 '7.89% 2.90 TAX 2 4.90% 1.28 TOTAL 27.27 CREDIT TEND 27.27 CHANGE DUE 0.00')


In [6]:
from pprint import pprint

for i, val in enumerate(df_compare["donut_parsed"]):
    print(f"\n===== ROW {i} =====")
    pprint(val)



===== ROW 0 =====
{'menu': {'cnt': '1', 'nm': 'Miller Lite', 'price': '5.00'},
 'sub_total': {'subtotal_price': '5.00', 'tax_price': '0.35'},
 'total': {'cashprice': 'Tendered:',
           'changeprice': '5.35',
           'total_price': '$5.35'}}

===== ROW 1 =====
{'menu': [{'nm': 'TRIPLE DIPPER', 'price': '12.19'},
          {'nm': 'CHICKEN WAFFLES4', 'price': '11.89'}],
 'sub_total': {'subtotal_price': '24.08', 'tax_price': '2.29'},
 'total': {'cashprice': '26.37', 'total_price': '26.37'}}

===== ROW 2 =====
{'menu': [{'nm': 'HAND TOWEL', 'num': '075953630184', 'price': '2.97 x'},
          {'nm': 'GATORADE', 'num': '068949055223', 'price': '2.00 x'},
          {'nm': 'T-SHIRT', 'num': '036231552452', 'price': '16.88 x'},
          {'nm': 'PUSH PINS', 'num': '088348997350', 'price': '1.24 x'}],
 'sub_total': {'subtotal_price': '23.09', 'tax_price': '2.90'},
 'total': {'changeprice': '0.00',
           'creditcardprice': '7.89%',
           'total_price': '27.27'}}


Clearly Donut performs better, it has 100% accuracy with the 3 images in the dataset. Nougat only performed decently in the 3rd receipt where it's in digital/scanned form and was nonsensical when used for the other 2 images. Donut also in average is faster. 