# Green receipt
## From receipt image to list of items

In [1]:
# !pip install voila # conda install -c conda-forge voila                                               
# !jupyter serverextension enable voila --sys-prefix

In [2]:
import numpy as np
import re
import json

import io
from matplotlib import pyplot as plt
from PIL import Image

# IPython widgets
import ipywidgets as widgets

import cv2
import pytesseract
from pytesseract import Output
%matplotlib inline

## Function to incorporate as widget

In [3]:
def ReceiptProcessing(pil_im):

    thres = cv2.threshold(np.asarray(pil_im), 150, 255, cv2.THRESH_BINARY_INV)[1]
    image = cv2.GaussianBlur(thres, (5,5), 0)
    image = 255 - image

    language = 'eng'
    extracted_text = pytesseract.image_to_string(image, lang = language, config='--psm 3')

    lines_with_pounds = []
    for line in extracted_text.splitlines():
        if re.search(r'£', line):
            lines_with_pounds.append(line.lower())

    items = []
    for line in lines_with_pounds:
        if re.search(r'balance', line):
            total = line
        elif re.search(r'visa', line):
            PaymentType = 'card'
        elif re.search(r'cash', line):
            PaymentType = 'cash'
        else:
            items.append(line)

    CostPattern = r'[0-9]+(\.|,)[0-9]+'

    ItemsDict = {}
    for item in items:
        details = item.split('£')

        name = details[0].strip()

        cost = details[-1].strip()
        cost = ''.join(cost.split())
        cost = float(re.search(CostPattern, cost).group())

        ItemsDict[name] = {'cost': cost}

    total = total.split('£')[-1]
    total = ''.join(total.split())
    total = float(re.search(CostPattern, total).group())
    
    receipt_ocr = {}
    receipt_ocr['items'] = ItemsDict
    receipt_ocr['total'] = total
    receipt_json = json.dumps(receipt_ocr)
    
    return receipt_json

In [4]:
btn_upload = widgets.FileUpload()
btn_run = widgets.Button(description='Itemise')
ReceiptOut = widgets.Output()
ItemsOut = widgets.Output()

In [5]:
def on_click_itemise(change):
    #ReceiptOut.clear_output()
    #ItemsOut.clear_output()
    for name, file_info in btn_upload.value.items():
        pil_im = Image.open(io.BytesIO(file_info['content']))
    #plt.imshow(np.asarray(pil_im));
    #plt.axis('off');
    with ItemsOut:
        print(ReceiptProcessing(pil_im))
    with ReceiptOut:
        size = [256, 256]
        pil_im.thumbnail(size, Image.ANTIALIAS)
        display(pil_im)


btn_run.on_click(on_click_itemise)

In [6]:
widgets.VBox([widgets.Label('Please upload your receipt'),
              btn_upload, btn_run, ReceiptOut, ItemsOut])

VBox(children=(Label(value='Please upload your receipt'), FileUpload(value={}, description='Upload'), Button(d…