In [1]:
!pip install git+https://github.com/shivalikasingh95/transformers.git@image_utils_fix
!pip install datasets sentencepiece word2number gradio
!pip install symspellpy pillow torch
!pip install gradio
!pip install --upgrade gradio

Collecting git+https://github.com/shivalikasingh95/transformers.git@image_utils_fix
  Cloning https://github.com/shivalikasingh95/transformers.git (to revision image_utils_fix) to /tmp/pip-req-build-ouq55qac
  Running command git clone --filter=blob:none --quiet https://github.com/shivalikasingh95/transformers.git /tmp/pip-req-build-ouq55qac
  Running command git checkout -b image_utils_fix --track origin/image_utils_fix
  Switched to a new branch 'image_utils_fix'
  Branch 'image_utils_fix' set up to track remote branch 'image_utils_fix' from 'origin'.
  Resolved https://github.com/shivalikasingh95/transformers.git to commit 3a164e9aeea6070f67c3d300189422fd76695684
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.24.0.dev0)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2

In [2]:
from transformers import DonutProcessor, VisionEncoderDecoderModel
import pkg_resources
from symspellpy import SymSpell
from word2number import w2n
from dateutil import relativedelta
from datetime import datetime
from word2number import w2n
from PIL import Image
import torch
import re

CHEQUE_PARSER_MODEL = "shivi/donut-cheque-parser"
TASK_PROMPT = "<parse-cheque>"
device = "cuda" if torch.cuda.is_available() else "cpu"

def load_donut_model_and_processor():
    donut_processor = DonutProcessor.from_pretrained(CHEQUE_PARSER_MODEL)
    model = VisionEncoderDecoderModel.from_pretrained(CHEQUE_PARSER_MODEL)
    model.to(device)
    return donut_processor, model

def prepare_data_using_processor(donut_processor,image_path):
    ## Pass image through donut processor's feature extractor and retrieve image tensor
    image = load_image(image_path)
    pixel_values = donut_processor(image, return_tensors="pt").pixel_values
    pixel_values = pixel_values.to(device)

    ## Pass task prompt for document (cheque) parsing task to donut processor's tokenizer and retrieve the input_ids
    decoder_input_ids = donut_processor.tokenizer(TASK_PROMPT, add_special_tokens=False, return_tensors="pt")["input_ids"]
    decoder_input_ids = decoder_input_ids.to(device)

    return pixel_values, decoder_input_ids

def load_image(image_path):
    image = Image.open(image_path).convert("RGB")
    return image

def parse_cheque_with_donut(input_image_path):

    donut_processor, model = load_donut_model_and_processor()

    cheque_image_tensor, input_for_decoder = prepare_data_using_processor(donut_processor,input_image_path)

    outputs = model.generate(cheque_image_tensor,
                                decoder_input_ids=input_for_decoder,
                                max_length=model.decoder.config.max_position_embeddings,
                                early_stopping=True,
                                pad_token_id=donut_processor.tokenizer.pad_token_id,
                                eos_token_id=donut_processor.tokenizer.eos_token_id,
                                use_cache=True,
                                num_beams=1,
                                bad_words_ids=[[donut_processor.tokenizer.unk_token_id]],
                                return_dict_in_generate=True,
                                output_scores=True,)

    decoded_output_sequence = donut_processor.batch_decode(outputs.sequences)[0]

    extracted_cheque_details = decoded_output_sequence.replace(donut_processor.tokenizer.eos_token, "").replace(donut_processor.tokenizer.pad_token, "")
    ## remove task prompt from token sequence
    cleaned_cheque_details = re.sub(r"<.*?>", "", extracted_cheque_details, count=1).strip()
    ## generate ordered json sequence from output token sequence
    cheque_details_json = donut_processor.token2json(cleaned_cheque_details)
    print("cheque_details_json:",cheque_details_json['cheque_details'])

    ## extract required fields from predicted json

    amt_in_words  = cheque_details_json['cheque_details'][0]['amt_in_words']
    amt_in_figures = cheque_details_json['cheque_details'][1]['amt_in_figures']
    macthing_amts = match_legal_and_courstesy_amount(amt_in_words,amt_in_figures)

    payee_name = cheque_details_json['cheque_details'][2]['payee_name']

    bank_name = cheque_details_json['cheque_details'][3]['bank_name']
    cheque_date = cheque_details_json['cheque_details'][4]['cheque_date']

    stale_cheque = check_if_cheque_is_stale(cheque_date)

    return payee_name,amt_in_words,amt_in_figures,bank_name,cheque_date,macthing_amts,stale_cheque

def spell_check(amt_in_words):
    sym_spell = SymSpell(max_dictionary_edit_distance=2,prefix_length=7)
    dictionary_path = pkg_resources.resource_filename("symspellpy", "frequency_dictionary_82_765.txt")
    bigram_path = pkg_resources.resource_filename("symspellpy", "frequency_bigramdictionary_en_243_342.txt")

    sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)
    sym_spell.load_bigram_dictionary(bigram_path, term_index=0, count_index=2)

    suggestions = sym_spell.lookup_compound(amt_in_words, max_edit_distance=2)

    return suggestions[0].term

def match_legal_and_courstesy_amount(legal_amount,courtesy_amount):
    macthing_amts = False
    if len(legal_amount) == 0:
        return macthing_amts

    corrected_amt_in_words = spell_check(legal_amount)
    print("corrected_amt_in_words:",corrected_amt_in_words)

    numeric_legal_amt = w2n.word_to_num(corrected_amt_in_words)
    print("numeric_legal_amt:",numeric_legal_amt)
    if int(numeric_legal_amt) == int(courtesy_amount):
        macthing_amts = True
    return macthing_amts

def check_if_cheque_is_stale(cheque_issue_date):
    stale_check = False
    current_date = datetime.now().strftime('%d/%m/%y')
    current_date_ = datetime.strptime(current_date, "%d/%m/%y")
    cheque_issue_date_ = datetime.strptime(cheque_issue_date, "%d/%m/%y")
    relative_diff = relativedelta.relativedelta(current_date_, cheque_issue_date_)
    months_difference = (relative_diff.years * 12) + relative_diff.months
    print("months_difference:",months_difference)
    if months_difference > 3:
        stale_check = True
    return stale_check


In [4]:
path = "test.jpeg"
parse_cheque_with_donut(path)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/361 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/588 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/1.30M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.01M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/448 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/355 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/5.10k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/809M [00:00<?, ?B/s]

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
2024-03-15 10:32:47,487: E symspellpy.symspellpy] Dictionary file not found at /usr/local/lib/python3.10/dist-packages/symspellpy/frequency_dictionary_82_765.txt.
ERROR:symspellpy.symspellpy:Dictionary file not found at /usr/local/lib/python3.10/dist-packages/symspellpy/frequency_dictionary_82_765.txt.


cheque_details_json: [{'amt_in_words': 'Three Thousand Seven Hundred and Eighty Five'}, {'amt_in_figures': '3755'}, {'payee_name': 'Edmee Pelletier'}, {'bank_name': 'AXIS BANK'}, {'cheque_date': '06/05/22'}]
corrected_amt_in_words: three thousand seven hundred and eighty five
numeric_legal_amt: 3785
months_difference: 22


('Edmee Pelletier',
 'Three Thousand Seven Hundred and Eighty Five',
 '3755',
 'AXIS BANK',
 '06/05/22',
 False,
 True)

In [5]:
# UI Code

import os
import glob
import gradio as gr

##Create list of examples to be loaded
example_list = glob.glob("examples/cheque_parser/*")
example_list = list(map(lambda el:[el], example_list))

demo = gr.Blocks()

with demo:

    gr.Markdown("# **<p align='center'>ChequeEasy: Banking with Transformers </p>**")
    gr.Markdown("ChequeEasy is a project that aims to simplify the process of approval of cheques and making it easier for both bank officials and customers. \
    This project leverages Donut model proposed in the paper <a href=\"https://arxiv.org/abs/2111.15664/\"> OCR-free Document Understanding Transformer </a> for the parsing of the required data from cheques." \
    "Donut is based on a very simple transformer encoder and decoder architecture. It's main USP is that it is an OCR-free approach to Visual Document Understanding (VDU) and can perform tasks like document classification, information extraction as well as VQA. \
    OCR based techniques come with several limitations such as requiring use of additional downstream models, lack of understanding about document structure, requiring use of hand crafted rules for information extraction,etc. \
    Donut helps you get rid of all of these OCR specific limitations. The model for the project has been trained using a subset of this  <a href=\"https://www.kaggle.com/datasets/medali1992/cheque-images/\"> kaggle dataset </a>. The original dataset contains images of cheques of 10 different banks. \
    A filtered version of this dataset containing images of cheques from 4 banks that are more commonly found in the Indian Banking Sector was created with ground truth prepared in the format required for fine-tuning Donut. This <a href=\"https://huggingface.co/datasets/shivi/cheques_sample_data/\"> dataset </a> is available on the Hugging Face Hub for download.")


    with gr.Tabs():

        with gr.TabItem("Cheque Parser"):
            gr.Markdown("This module is used to extract details filled by a bank customer from cheques. At present the model is trained to extract details like - Payee Name, Amount in words, Amount in Figures, Bank Name and Cheque Date.  \
            This model can be further trained to parse additional details like MICR Code, Cheque Number, Account Number, etc.   \
            Additionally, the app compares if the extracted legal & courtesy amount are matching which is an important check done during approval process of cheques. \
            It also checks if the cheque is stale. A cheque is considered stale if it is presented to the bank 3 months after the date mentioned on the cheque.")
            with gr.Group():
                gr.Markdown("**Upload Cheque**")
                input_image_parse = gr.Image(type='filepath', label="Input Cheque")
            with gr.Group():
                gr.Markdown("**Parsed Cheque Data**")

                payee_name = gr.Textbox(label="Payee Name")
                amt_in_words = gr.Textbox(label="Legal Amount")
                amt_in_figures = gr.Textbox(label="Courtesy Amount")
                cheque_date = gr.Textbox(label="Cheque Date")
                bank_name = gr.Textbox(label="Bank Name")

                amts_matching = gr.Checkbox(label="Legal & Courtesy Amount Matching")
                stale_check = gr.Checkbox(label="Stale Cheque")

            with gr.Group():
                gr.Markdown("**Predict**")
                with gr.Row():
                    parse_cheque = gr.Button("Call Donut 🍩")

            with gr.Column():
                gr.Examples(example_list, [input_image_parse],
                            [payee_name,amt_in_words,amt_in_figures,cheque_date],parse_cheque_with_donut,cache_examples=False)


    parse_cheque.click(parse_cheque_with_donut, inputs=input_image_parse, outputs=[payee_name,amt_in_words,amt_in_figures,bank_name,cheque_date,amts_matching,stale_check])

    gr.Markdown('\n Solution built by: <a href=\"https://twitter.com/singhshiviii/\">Shivalika Singh</a>')

demo.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://2c4c75d2667661959b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


ReadTimeout: The read operation timed out