In [0]:
# Uncomment below lines on first run
# !pip install spacy google-cloud-vision

In [6]:
# Unzipping model files
! unzip models.zip

Archive:  models.zip
   creating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/
  inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/meta.json  
   creating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/ner/
  inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/ner/cfg  
  inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/ner/model  
  inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/ner/moves  
  inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/tokenizer  
   creating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/
 extracting: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/key2row  
  inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/lexemes.bin  
  inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/strings.json  
  inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/vectors  
   creating: models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/
  inflating: models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/meta.json  
   creating: models/AIDL_N

In [0]:
# Importing Libraries
import spacy
import os 
import json
from google.cloud import vision
import io

# Setting Environment Variable for Vision API
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/content/fyp-bot-fkvpth-63ef51dcf510.json"

In [0]:
# Setting variables
modelDir = "models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA"
fileType = "img"
filename = "sample.jpg"

In [0]:
# Initializing vision API
client = vision.ImageAnnotatorClient()

# Loading the saved Spacy model
nlp = spacy.load(modelDir)

In [0]:
def getOutput(type, data):
  """
  Parameters: type: type of data, either img or txt
  Output: Prints the dictionary
  """
  textToPredict = ""
  # Checking if file type is img or not
  if (type == "img"):
    with io.open(data, 'rb') as image_file:
        # Reading file contente
        content = image_file.read()
        # Creating image format to match Vision API format
        image = vision.types.Image(content=content)
        # Getting results from Vision API
        text_response = client.text_detection(image=image)
        # Getting the text from the response
        texts = [text.description for text in text_response.text_annotations]
        # Storing data in variable
        textToPredict = texts[0]
  else:
    # Opening txt file
    f = open(data, "r")
    # Storing data in variable
    textToPredict = f.read()
  # Sending textual data to Spacy model for NER
  doc = nlp(textToPredict)
  max_amt = 0
  i = 1
  data = {}
  items_list = []
  # Iterating over every entitiy to create a dictionary
  for ent in doc.ents:
    # Saving only one instance of Total Bill Amount
    if (ent.label_ == "Total bill amount"):
      try:
        amt = float(ent.text)
        if amt > max_amt:
          data["Total bill amount"] = amt
      except Exception as e:
        pass
    # Creating a list of Items
    elif (ent.label_ == "Items"):
      try:
        items_list.append(ent.text)
      except Exception as e:
        print(e)
    # Checking if the detected key is already present in the key,
    # If yes then we create a new key to store that value instead of overwriting the previous one
    else:
      if ent.label_ in data.keys():
        data[ent.label_+"-"+str(i)] = ent.text
        i +=1
      else:
        data[ent.label_] = ent.text
  # Staring the list of items using the Items key in the dictionary
  data["Items"]=items_list
  # Sorting all the elements of the dictionary
  data = dict(sorted(data.items()))
  # Printing final result
  print(json.dumps(data, indent=2))

In [14]:
%time
# Calling the function to get the output
getOutput(fileType, filename)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 7.15 µs
{
  "Date": "29-11-2019",
  "Invoice number": "201911291623",
  "Items": [
    "YOU ARE THE GREATE\nST MUG-FATHER",
    "PRINTED PAPER MATT\nER\nCRAFT PAPER BAG- H"
  ],
  "Store address": "PHOENIX MARKETCITY\n",
  "Store address-2": "S-23,IIND FLOOR, 142, VELACHERY MAIN ROAD,\nCHENNAI-600042",
  "Store name": "ARCHIES",
  "Store name-1": "ARCHIES",
  "Time": "16:22",
  "Total bill amount": 434.0
}
