## Google Cloud APIs

### Image

In [1]:
from google.cloud import vision
from google.cloud.vision import types
from PIL import Image, ImageDraw
import os
import io

os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="path/key.json"

#### Landmarks

In [2]:
path = 'path/taj.jpg'
client = vision.ImageAnnotatorClient()

with io.open(path, 'rb') as image_file:
    content = image_file.read()

image = vision.types.Image(content=content)

response = client.landmark_detection(image=image)
landmarks = response.landmark_annotations
print('Landmarks:')

for landmark in landmarks:
    print("Description : ",landmark.description)
    for location in landmark.locations:
        lat_lng = location.lat_lng
        print('Latitude {}'.format(lat_lng.latitude))
        print('Longitude {}'.format(lat_lng.longitude))

Landmarks:
Description :  Taj Mahal
Latitude 27.174698469698683
Longitude 78.042073


#### Image Properties
The ColorInfo field does not carry information about the absolute color space that should be used to interpret the RGB value (e.g. sRGB, Adobe RGB, DCI-P3, BT.2020, etc.). By default, applications should assume the sRGB color space.

In [3]:
response = client.image_properties(image=image)
props = response.image_properties_annotation
print('Properties:')

for color in props.dominant_colors.colors:
    print('fraction: {}'.format(color.pixel_fraction))
    print('\tr: {}'.format(color.color.red))
    print('\tg: {}'.format(color.color.green))
    print('\tb: {}'.format(color.color.blue))
    print('\ta: {}'.format(color.color.alpha))

Properties:
fraction: 0.05480984225869179
	r: 127.0
	g: 115.0
	b: 121.0
	a: 
fraction: 0.07614253461360931
	r: 95.0
	g: 85.0
	b: 93.0
	a: 
fraction: 0.03147970512509346
	r: 161.0
	g: 149.0
	b: 154.0
	a: 
fraction: 0.06032278761267662
	r: 155.0
	g: 157.0
	b: 181.0
	a: 
fraction: 0.15164589881896973
	r: 181.0
	g: 187.0
	b: 216.0
	a: 
fraction: 0.019734740257263184
	r: 204.0
	g: 191.0
	b: 195.0
	a: 
fraction: 0.011744966730475426
	r: 86.0
	g: 84.0
	b: 104.0
	a: 
fraction: 0.017178012058138847
	r: 117.0
	g: 116.0
	b: 139.0
	a: 
fraction: 0.029322467744350433
	r: 103.0
	g: 82.0
	b: 94.0
	a: 
fraction: 0.0069511025212705135
	r: 131.0
	g: 111.0
	b: 122.0
	a: 


#### Content Moderation
Safe Search Detection detects explicit content such as adult content or violent content within an image. This feature uses five categories (adult, spoof, medical, violence, and racy) and returns the likelihood that each is present in a given image.

In [4]:
path = 'path/brain.jpg'
with io.open(path, 'rb') as image_file:
    content = image_file.read()

image = vision.types.Image(content=content)

response = client.safe_search_detection(image=image)
safe = response.safe_search_annotation

# Names of likelihood from google.cloud.vision.enums
likelihood_name = ('UNKNOWN', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE',
                   'LIKELY', 'VERY_LIKELY')
print('Safe search:')

print('adult: {}'.format(likelihood_name[safe.adult]))
print('medical: {}'.format(likelihood_name[safe.medical]))
print('spoofed: {}'.format(likelihood_name[safe.spoof]))
print('violence: {}'.format(likelihood_name[safe.violence]))
print('racy: {}'.format(likelihood_name[safe.racy]))

Safe search:
adult: VERY_UNLIKELY
medical: POSSIBLE
spoofed: VERY_UNLIKELY
violence: LIKELY
racy: UNLIKELY


#### Emotion
Only anger, sorrow and joy
This does face detection as well

In [5]:
path = 'path/happy.jpg'
with io.open(path, 'rb') as image_file:
        content = image_file.read()

image = vision.types.Image(content=content)

response = client.face_detection(image=image)
faces = response.face_annotations

# Names of likelihood from google.cloud.vision.enums
likelihood_name = ('UNKNOWN', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE',
                   'LIKELY', 'VERY_LIKELY')
print('Faces:')

for face in faces:
    print('anger: {}'.format(likelihood_name[face.anger_likelihood]))
    print('joy: {}'.format(likelihood_name[face.joy_likelihood]))
    print('surprise: {}'.format(likelihood_name[face.surprise_likelihood]))

    vertices = (['({},{})'.format(vertex.x, vertex.y)
                for vertex in face.bounding_poly.vertices])

Faces:
anger: VERY_UNLIKELY
joy: VERY_LIKELY
surprise: VERY_UNLIKELY


#### Object Classification
Actually an object localizer, we can use just the classification aspect

In [6]:
path = 'path/tiger.jpg'
with io.open(path, 'rb') as image_file:
        content = image_file.read()
image = vision.types.Image(content=content)

objects = client.object_localization(image=image).localized_object_annotations

print('Number of objects found: {}'.format(len(objects)))
for object_ in objects:
    print('\n{} (confidence: {})'.format(object_.name, object_.score))

Number of objects found: 1

Tiger (confidence: 0.9640637636184692)


#### Celebrity 
Needs a seperate form, waiting for permission

### Audio

In [3]:
from google.cloud import speech_v1
from google.cloud.speech_v1 import enums
import io
import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="path/key.json"

local_file_path = 'path/00.wav'

client = speech_v1.SpeechClient()

# local_file_path = 'resources/brooklyn_bridge.raw'

# The language of the supplied audio
language_code = "en-US"

# Sample rate in Hertz of the audio data sent
sample_rate_hertz = 48000

# Encoding of audio data sent. This sample sets this explicitly.
# This field is optional for FLAC and WAV audio formats.
encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16
config = {
    "language_code": language_code,
    "sample_rate_hertz": sample_rate_hertz,
    "encoding": encoding,
}
with io.open(local_file_path, "rb") as f:
    content = f.read()
audio = {"content": content}

response = client.recognize(config, audio)
for result in response.results:
    # First alternative is the most probable result
    alternative = result.alternatives[0]
    print(u"Transcript: {}".format(alternative.transcript))

InvalidArgument: 400 sample_rate_hertz (48000) in RecognitionConfig must either be omitted or match the value in the WAV header ( 44100).

In [4]:

rate

44100