In [1]:
!pip install transformers==4.36.2
!pip install torch torchvision
!pip install Pillow

Collecting transformers==4.36.2
  Downloading transformers-4.36.2-py3-none-any.whl.metadata (126 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/126.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.8/126.8 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.19,>=0.14 (from transformers==4.36.2)
  Downloading tokenizers-0.15.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.36.2-py3-none-any.whl (8.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m76.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tokenizers-0.15.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m92.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers

In [6]:
from transformers import DonutProcessor, VisionEncoderDecoderModel
from PIL import Image
import torch

# Load pretrained Donut receipt model (CORD dataset)
processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")

# Load a receipt image
image = Image.open("/content/5098.png").convert("RGB")

# Prepare input
task_prompt = "<s_receipt>"  # special token for receipts
inputs = processor(image, text=task_prompt, return_tensors="pt")

# Run inference
outputs = model.generate(**inputs, max_length=512, return_dict_in_generate=True)
result = processor.batch_decode(outputs.sequences)[0]

print(result)

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


<s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s><s></s>


In [1]:
# Install dependencies
!pip install python-doctr transformers torch torchvision --quiet

from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from transformers import pipeline
import re, json

# ===== 1. OCR Extraction =====
ocr_model = ocr_predictor(pretrained=True)
doc = DocumentFile.from_images("/content/5096.png")  # Change image path
ocr_result = ocr_model(doc)

# Flatten text lines
lines = []
for p in ocr_result.pages:
    for b in p.blocks:
        for l in b.lines:
            line_text = " ".join([w.value for w in l.words])
            lines.append(line_text)

text = " ".join(lines)

# ===== 2. NER Model (BERT) =====
ner = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
entities = ner(text)

# ===== 3. Prepare Output JSON =====
output = {
    "date": None,
    "supplier_name": None,
    "items_purchased": [],
    "payment_type": None,
    "total_balance": None
}

# Fill from detected entities
for ent in entities:
    if "DATE" in ent['entity_group'] and not output["date"]:
        output["date"] = ent["word"]
    elif "ORG" in ent['entity_group'] and not output["supplier_name"]:
        output["supplier_name"] = ent["word"]

# ===== 4. Detect Total Balance =====
total_match = re.search(r"(total|amount|balance)\s*[:\-]?\s*\$?\d+[.,]?\d*", text, re.I)
if total_match:
    amount_match = re.search(r"\$?\d+[.,]?\d*", total_match.group(0))
    if amount_match:
        output["total_balance"] = amount_match.group(0)

# ===== 5. Detect Payment Type =====
pay_match = re.search(r"(credit\s*card|debit\s*card|cash|upi|net\s*banking)", text, re.I)
if pay_match:
    output["payment_type"] = pay_match.group(0)

# ===== 6. Extract Items Purchased (structured) =====
items = []
capture = False

for l in lines:
    # Stop capturing when reaching total/amount/balance
    if re.search(r"(total|amount|balance)", l, re.I):
        break
    # Start when we see first "item + price" line
    if re.search(r"[A-Za-z]", l) and re.search(r"\d+[.,]\d{2}", l):
        capture = True
    if capture and re.search(r"[A-Za-z]", l) and re.search(r"\d+[.,]\d{2}", l):
        # Split into item name + price
        price_match = re.search(r"\d+[.,]\d{2}", l)
        if price_match:
            price = price_match.group(0)
            item_name = l.replace(price, "").strip(" -:")  # clean name
            items.append({"item": item_name, "price": price})

output["items_purchased"] = items

# ===== 7. Save to JSON =====
with open("receipt_output.json", "w") as f:
    json.dump(output, f, indent=2)

print(json.dumps(output, indent=2))


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.5/48.5 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m288.4/288.4 kB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m345.1/345.1 kB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m88.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m963.8/963.8 kB[0m [31m65.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m85.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m108.1 MB/s[0m eta 

  0%|          | 0/65814772 [00:00<?, ?it/s]

Downloading https://doctr-static.mindee.com/models?id=v0.12.0/crnn_vgg16_bn-0417f351.pt&src=0 to /root/.cache/doctr/models/crnn_vgg16_bn-0417f351.pt


  0%|          | 0/63303144 [00:00<?, ?it/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/829 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/59.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cuda:0


{
  "date": null,
  "supplier_name": "Waitrose",
  "items_purchased": [
    {
      "item": "litre Pump # 04",
      "price": "51.32"
    },
    {
      "item": "E",
      "price": "67.69"
    },
    {
      "item": "9 C / litro",
      "price": "1.31"
    },
    {
      "item": "A",
      "price": "0.50"
    }
  ],
  "payment_type": null,
  "total_balance": "68.19"
}
