In [1]:
# Service requests require credentials
#     Google has classified service request from local machine as server-to-server.
#     I have downloaded credentials as a json file to local machine. Here, I create
#     a credentials object by supplying the downloaded file.
from google.oauth2 import service_account

SCOPES = ['https://www.googleapis.com/auth/cloud-platform']
SERVICE_ACCOUNT_FILE = 'vision_fission.json'

credentials = service_account.Credentials.from_service_account_file(
        SERVICE_ACCOUNT_FILE, scopes=SCOPES)

# Also note: From https://console.developers.google.com/
# 1. A capsule of work is a project.
# 2. Services need to be enabled for a project.
# 3. Billing needs to be enabled for a project with valid Credit Card.
# Else, NO GO!

# Ref: https://google-cloud-python.readthedocs.io/en/latest/vision/index.html

In [2]:
# Create a client object for a specific service and
# pass it credentials. Ensure that you have satisfied
# all conditions noted above. 
# Service methods are invoked using the client object.
import io
import os
from google.cloud import vision
from google.cloud.vision import types

client = vision.ImageAnnotatorClient(
    credentials=credentials
)

# Ref: https://google-cloud-python.readthedocs.io/en/latest/vision/index.html

In [6]:
# There is the triumvirate of 
# 1. Input (image file)
# 2. Service (vision client)
# 3. Output (json)
# Ingest the image file with io
file_name = 'sunbeam.JPG'

with io.open(file_name, 'rb') as image_file:
    content = image_file.read()

image = types.Image(content=content)
# Ref: https://cloud.google.com/vision/docs/libraries#client-libraries-install-python

In [23]:
# Make the service call and see what's in the response.
# Invoke a method exposed by the API. 
# 1. Use: label_detection
# 2. Use: annotate_image
# 3. Use: document_text_detection
# Let's detect labels with 1.

response = client.label_detection(image=image)
labels = response.label_annotations

print("Labels: ")
for label in labels:
    print(label.description)
# Ref: https://cloud.google.com/vision/docs/libraries
# Ref: http://google-cloud-python.readthedocs.io/en/latest/vision/gapic/v1/api.html

Labels: 
text
line
font
media
document
identity document
jaw


In [37]:
# Make the service call and see what's in the response.
# Invoke a method exposed by the API. 
# 1. Use: label_detection
# 2. Use: annotate_image
# 3. Use: document_text_detection
# Let's detect annotation with 2.

response = client.annotate_image({
    'image': image, #{'source': {'image_uri': '.\my-test-bucket\sunbeam.jpg'}},
    'features': [{'type': vision.enums.Feature.Type.FACE_DETECTION}],
})
faces = response.face_annotations

likelihood_name = ('UNKNOWN', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 
                   'LIKELY', 'VERY_LIKELY')
print("Faces:")
for face in faces:
    print "Anger likelihood is %s" % likelihood_name[face.anger_likelihood]
    print "Joy likelihood is %s" % likelihood_name[face.joy_likelihood]
    print "Surprise likelihood is %s" % likelihood_name[face.surprise_likelihood]
    
# Ref: https://cloud.google.com/vision/docs/detecting-faces

Faces:
Anger likelihood is VERY_UNLIKELY
Joy likelihood is VERY_UNLIKELY
Surprise likelihood is VERY_UNLIKELY


In [11]:
# Make the service call and see what's in the response.
# Invoke a method exposed by the API. 
# 1. Use: label_detection
# 2. Use: annotate_image
# 3. Use: document_text_detection
# Let's extract text by OCR with 3.

response = client.document_text_detection(image=image)
document = response.full_text_annotation
document

# Ref: https://cloud.google.com/vision/docs/fulltext-annotations
# Ref: https://cloud.google.com/vision/docs/detecting-fulltext

pages {
  property {
    detected_languages {
      language_code: "en"
      confidence: 0.560000002384
    }
    detected_languages {
      language_code: "fi"
      confidence: 0.129999995232
    }
    detected_languages {
      language_code: "fil"
      confidence: 0.119999997318
    }
    detected_languages {
      language_code: "uz"
      confidence: 0.070000000298
    }
    detected_languages {
      language_code: "fr"
      confidence: 0.0500000007451
    }
    detected_languages {
      language_code: "eu"
      confidence: 0.019999999553
    }
    detected_languages {
      language_code: "su"
      confidence: 0.019999999553
    }
    detected_languages {
      language_code: "it"
      confidence: 0.019999999553
    }
    detected_languages {
      language_code: "no"
      confidence: 0.00999999977648
    }
  }
  width: 2700
  height: 1703
  blocks {
    bounding_box {
      vertices {
        x: 2145
        y: 38
      }
      vertices {
        x: 2670
        y: 38


In [14]:
document.text

u'FORM - 7\n. [See Rule 16(2)]\nDL No. KA51 20160029943 DOI: 18/10/2016\nNAME\nSANJAY RAJAN BHATIKAR\nD.O.B 29/03/1975\nB.G. :\nVALID TILL : 28/03/2025(NT)\nVALID THROUGHOUT INDIA\nCOV: LMV 18/10/2016\nCDOI: 14-11-2016\nSlo\n: RAJAN JAYAKRISHNA BHATIKAR\nADDRESS : #B10 405 L&T SOUTH CITY ARAKERE MICO\nLAYOUT BANGALORE ( ABHI DS ) 560076\nrillant\nSign. Of Holder\nSign. Licencing Authority\nECITY-BANGALORE\n'

In [22]:
document.pages[1]

IndexError: list index out of range

In [None]:
def show_results(inputfile, data, outputfile):
    #read original file
    im = read_image(inputfile)
    
    #draw face, boxes and text for each response
    for r in data['responses']:
        if 'faceAnnotations' in r:
            draw_face(im, r['faceAnnotations'])
        
        if 'labelAnnotations' in r:
            strs = map(lambda a: a['description'], r['labelAnnotations'])
            im = draw_text(im, ", ".join(strs))
            
        for field in ['textAnnotations', 'logoAnnotations']:
            if field in r:
                for a in r[field]:
                    draw_box(im, a['boundingPoly']['vertices'])

    #save to output file
    save_image(outputfile, im)

In [None]:
# LABEL_DETECTION --- Execute Image Content Analysis on the entire image and return
# TEXT_DETECTION --- Perform Optical Character Recognition (OCR) on text within the image
# FACE_DETECTION --- Detect faces within the image
# LANDMARK_DETECTION --- Detect geographic landmarks within the image
# LOGO_DETECTION --- Detect company logos within the image
# SAFE_SEARCH_DETECTION --- Determine image safe search properties on the image
# IMAGE_PROPERTIES --- Compute a set of properties about the image (such as the image's dominant colors)

# Ref: 