In [19]:
import os
import io
import dotenv
from pprint import pprint
import six

import cv2

# Load env
dotenv.load_dotenv('../.env')

# Imports the Google Cloud client library
from google.cloud import vision
#from google.cloud.vision import types

# Imports the Google Cloud client library
from google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types

class BusinessCardReader:
    required_entities = {'ORGANIZATION':'','PERSON':'','LOCATION':'','ADDRESS':'','PHONE_NUMBER':'',}
    
    def __init__(self):
        pass
    
    def detect_text(self,path):
        """Detects text in the file."""
        from google.cloud import vision
        client = vision.ImageAnnotatorClient()
    
        with io.open(path, 'rb') as image_file:
            content = image_file.read()
    
        image = vision.types.Image(content=content)
    
        self.response = client.text_detection(image=image)
        texts = self.response.text_annotations
        print('Texts:')
    
        for text in texts:
            print('\n"{}"'.format(text.description))
    
            vertices = (['({},{})'.format(vertex.x, vertex.y)
                        for vertex in text.bounding_poly.vertices])
    
            print('bounds: {}'.format(','.join(vertices)))

    def detect_text_uri(self,uri):
        """Detects text in the file located in Google Cloud Storage or on the Web.
        """
        #from google.cloud import vision
        client = vision.ImageAnnotatorClient()
        image = vision.types.Image()
        image.source.image_uri = uri
    
        self.response = client.text_detection(image=image)
        texts = self.response.text_annotations
        print('Texts:')
    
        for text in texts:
            print('\n"{}"'.format(text.description))
    
            vertices = (['({},{})'.format(vertex.x, vertex.y)
                        for vertex in text.bounding_poly.vertices])
    
            print('bounds: {}'.format(','.join(vertices)))
        
    
    
    def extract_entities(self,):
        client = language.LanguageServiceClient()
        
        text = self.response.text_annotations[0].description
        
        if isinstance(text, six.binary_type):
            text = text.decode('utf-8')
        
        # Instantiates a plain text document.
        document = types.Document(
            content=text,
            type=enums.Document.Type.PLAIN_TEXT)
        
        # Detects entities in the document. You can also analyze HTML with:
        #   document.type == enums.Document.Type.HTML
        entities = client.analyze_entities(document).entities
        
        for entity in entities:
            entity_type = enums.Entity.Type(entity.type)
            if entity_type.name in self.required_entities:
                #print(entity.name)
                self.required_entities[entity_type.name] += entity.name
            #print('=' * 20)
            #print(u'{:<16}: {}'.format('name', entity.name))
            #print(u'{:<16}: {}'.format('type', entity_type.name))
            #print(u'{:<16}: {}'.format('salience', entity.salience))
            #print(u'{:<16}: {}'.format('wikipedia_url',
            #      entity.metadata.get('wikipedia_url', '-')))
            #print(u'{:<16}: {}'.format('mid', entity.metadata.get('mid', '-')))
        pprint(self.required_entities)
        
    def save_result(self):
        pass
        
        
    def read_business_card(self,path):
        self.detect_text(path)
        self.extract_entities()
        self.save_result()
        

test_url = 'http://web.cs.wpi.edu/~claypool/mmsys-dataset/2011/stanford/mvs_images/business_cards/Reference/016.jpg'
image_path = '../images/001.jpg'

business_card_reader = BusinessCardReader()
#business_card_reader.detect_text_uri(test_url)
#business_card_reader.extract_entities()
#business_card_reader.read_business_card_uri(test_url)
business_card_reader.read_business_card(image_path)

Texts:

"FOR GUYS AND GALS
STANEORD HAIR
650-853-9659
First Floor Tresidder Union
mAT A MDUS
"
bounds: (71,80),(563,80),(563,953),(71,953)

"FOR"
bounds: (86,86),(174,85),(174,123),(86,124)

"GUYS"
bounds: (187,84),(306,83),(306,122),(187,123)

"AND"
bounds: (315,83),(410,82),(410,120),(315,121)

"GALS"
bounds: (424,81),(537,80),(537,119),(424,120)

"STANEORD"
bounds: (113,480),(414,551),(398,616),(98,545)

"HAIR"
bounds: (410,543),(520,456),(563,510),(453,597)

"650-853-9659"
bounds: (193,802),(448,800),(448,832),(193,834)

"First"
bounds: (127,852),(192,851),(192,873),(127,874)

"Floor"
bounds: (204,851),(278,850),(278,872),(204,873)

"Tresidder"
bounds: (287,849),(421,848),(421,872),(287,873)

"Union"
bounds: (432,846),(515,845),(515,869),(432,870)

"mAT"
bounds: (71,911),(201,912),(201,947),(71,946)

"A"
bounds: (235,912),(330,913),(330,950),(235,949)

"MDUS"
bounds: (359,913),(544,915),(544,953),(359,951)
{'ADDRESS': '',
 'LOCATION': '',
 'ORGANIZATION': 'First Floor Tresidder Uni