In [0]:
# Uncomment below lines on first run
# !pip install spacy google-cloud-vision

In [0]:
# Unzipping model files
! unzip models.zip

In [0]:
# Importing Libraries
import spacy
import os 
import json
from google.cloud import vision
import io

# Setting Environment Variable for Vision API
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/content/fyp-bot-fkvpth-63ef51dcf510.json"

In [0]:
# Setting variables
modelDir = "models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA"

In [0]:
# Initializing vision API
client = vision.ImageAnnotatorClient()

# Loading the saved Spacy model
nlp = spacy.load(modelDir)

In [0]:
def getOutput(type, data):
  """
  Parameters: type: type of data, either img or txt
  Output: Prints the dictionary
  """
  textToPredict = ""
  # Checking if file type is img or not
  if (type == "img"):
    with io.open(data, 'rb') as image_file:
        # Reading file contente
        content = image_file.read()
        # Creating image format to match Vision API format
        image = vision.types.Image(content=content)
        # Getting results from Vision API
        text_response = client.text_detection(image=image)
        # Getting the text from the response
        texts = [text.description for text in text_response.text_annotations]
        # Storing data in variable
        textToPredict = texts[0]
  else:
    # Opening txt file
    f = open(data, "r")
    # Storing data in variable
    textToPredict = f.read()
  # Sending textual data to Spacy model for NER
  doc = nlp(textToPredict)
  max_amt = 0
  i = 1
  data = {}
  items_list = []
  # Iterating over every entitiy to create a dictionary
  for ent in doc.ents:
    # Saving only one instance of Total Bill Amount
    if (ent.label_ == "Total bill amount"):
      try:
        amt = float(ent.text)
        if amt > max_amt:
          data["Total bill amount"] = amt
      except Exception as e:
        pass
    # Creating a list of Items
    elif (ent.label_ == "Items"):
      try:
        items_list.append(ent.text)
      except Exception as e:
        print(e)
    # Checking if the detected key is already present in the key,
    # If yes then we create a new key to store that value instead of overwriting the previous one
    else:
      if ent.label_ in data.keys():
        data[ent.label_+"-"+str(i)] = ent.text
        i +=1
      else:
        data[ent.label_] = ent.text
  # Staring the list of items using the Items key in the dictionary
  data["Items"]=items_list
  # Sorting all the elements of the dictionary
  data = dict(sorted(data.items()))
  # Printing final result
  print(json.dumps(data, indent=2))

In [50]:
%time
# Calling the function to get the output
getOutput("img", "1.1.png")

CPU times: user 2 µs, sys: 2 µs, total: 4 µs
Wall time: 7.87 µs
{
  "Date": "Date:2019-10-28",
  "GSTIN": "33AAPFP2374MIZR",
  "Invoice number": "219",
  "Items": [
    "Tandoori\nPizzaiolo",
    "Kebab Cobb\nSalad"
  ],
  "Store address": "Palladium FC 04, No 142,\nVelachery Main Road\nChennai-600042",
  "Store name": "lyfe by soul Garden Bistro",
  "Time": "21:21:55",
  "Total bill amount": 890.0
}


In [51]:
%time
# Calling the function to get the output
getOutput("img", "1.jpg")

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 4.77 µs
{
  "Date": "28-Nov-19",
  "GSTIN": "33AATCG73851125",
  "Invoice number": "LTN02B1920003774",
  "Items": [
    "VEG RICE BOWL MEA",
    "CLASSIC LEMONADE",
    "MILD BASTING\nGAL",
    "VEG RICE BOWL MEA",
    "CLASSIC LEMONADE",
    "MILD BASTING\n",
    "VEG RICE BOWL MEA\nGAL",
    "CLASSIC LEMONADE",
    "MILD BASTING GAL",
    "QUARTER CHICKEN M",
    "CORN ON THE COB",
    "CLASSIC LEMONADE",
    "MILD BASTING GAL"
  ],
  "Store address": "Unit No: UG-41,PMC,0ld Door.No. 66, New\nDoor No. 142, Velaohery, Channai",
  "Store name": "Calito's",
  "Store name-1": "Galito's",
  "Time": "16:47",
  "Total bill amount": 762.0
}


In [52]:
%time
# Calling the function to get the output
getOutput("img", "2.jpg")

CPU times: user 2 µs, sys: 2 µs, total: 4 µs
Wall time: 7.15 µs
{
  "Date": "Date:2019-11-28",
  "Invoice number": "1925",
  "Items": [
    "\u0421\u041d\u041e\u0421OLAT\u0415 \u041e\nVERLOAD"
  ],
  "Store name": "Belgian Waffle",
  "Time": "18:41:46",
  "Total bill amount": 140.0
}


In [53]:
%time
# Calling the function to get the output
getOutput("img", "3.jpg")

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.91 µs
{
  "GSTIN": "33AAECT2235P 1Z6",
  "Invoice number": "PM7332",
  "Items": [
    "H/S TOP XXL",
    "TOP H/S B XXL",
    "PAPER BAG M"
  ],
  "Store address": "142, VELACHERY ,MAIN ROAD,\nPHOENIX MALL, SHOP NO.S-17 VELACHERY\nVELACHERY\nCHENNAI",
  "Time": "07:51 PM",
  "Total bill amount": 269.0
}


In [54]:
%time
# Calling the function to get the output
getOutput("txt", "sample.txt")

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 5.48 µs
{
  "Date": "29-11-2019",
  "GSTIN": "33AAECA0726C1ZG",
  "Invoice number": "201911291623",
  "Items": [
    "YOU ARE THE GREATE 1",
    "ST MUG-FATHER\nPRINTED PAPER MATT 1 35.00",
    "CRAFT PAPER BAG- H 1 12.00"
  ],
  "Store address": "PHOENIX MARKETCITY\nS-23,IIND FLOOR, 142, VELACHERY MAIN ROAD,\nCHENNAI-600042",
  "Store name": "ARCHIES",
  "Store name-1": "ARCHIES",
  "Time": "16:22",
  "Total bill amount": 434.0
}


In [56]:
%time
# Calling the function to get the output
getOutput("img", "sea-lands.jpeg")

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.2 µs
{
  "GSTIN": "27AAIPS4809H1ZE",
  "Items": [
    "VEG TRIPLE SEZ FRIED",
    "RICE\nCOLD DRINK(500ML)",
    "COLD DRINK"
  ],
  "Store name": "SEA LAND",
  "Time": "03:30 PM",
  "Total bill amount": 262.5
}
