In [12]:
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials

from array import array
import os
from PIL import Image
import sys
import time
import json

In [10]:
with open('./config/secret.json') as f:
    secret = json.load(f)

In [13]:
KEY = secret['KEY']
ENDPOINT = secret['ENDPOINT']

In [14]:
computervision_client = ComputerVisionClient(ENDPOINT, CognitiveServicesCredentials(KEY))

In [15]:
remote_image_url = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-sample-data-files/master/ComputerVision/Images/landmark.jpg"

# 画像タグの検出

In [33]:
print("===== Tag an image - remote =====")
# Call API with remote image
tags_result_remote = computervision_client.tag_image(remote_image_url, language='ja')

# Print results with confidence score
print("Tags in the remote image: ")
if (len(tags_result_remote.tags) == 0):
    print("No tags detected.")
else:
    for tag in tags_result_remote.tags:
        print("'{}' with confidence {:.2f}%".format(tag.name, tag.confidence * 100))
print()
'''
END - Tag an Image - remote
'''
print("End of Computer Vision quickstart.")

===== Tag an image - remote =====
Tags in the remote image: 
'屋外' with confidence 99.00%
'建物' with confidence 98.81%
'空' with confidence 98.21%
'スタジアム' with confidence 98.17%
'古代ローマ' with confidence 96.16%
'遺跡' with confidence 95.04%
'アンフィテアトルム' with confidence 93.99%
'ローマ建築' with confidence 92.65%
'史跡' with confidence 89.55%
'古代史' with confidence 89.54%
'歴史' with confidence 86.72%
'遺跡' with confidence 84.41%
'旅行' with confidence 65.85%
'大きい' with confidence 61.02%
'都市' with confidence 56.57%

End of Computer Vision quickstart.


# 画像の説明の取得

In [32]:
print("===== Describe an image - remote =====")
# Call API with remote image
description_results = computervision_client.describe_image(remote_image_url, language='ja')

# Print results with confidence score
print("Description of remote image: ")
if (len(description_results.captions) == 0):
    print("No description detected.")
else:
    for caption in description_results.captions:
        print("'{}' with confidence {:.2f}%".format(caption.text, caption.confidence * 100))
print()
'''
END - Tag an Image - remote
'''
print("End of Computer Vision quickstart.")

===== Describe an image - remote =====
Description of remote image: 
'城のような建物、背景はコロッセオ' with confidence 61.39%

End of Computer Vision quickstart.


# 画像カテゴリの取得

In [31]:
print("===== Analyze an image - remote =====")
# Select the visual feature(s) you want.
remote_image_features = [VisualFeatureTypes.categories,VisualFeatureTypes.brands,VisualFeatureTypes.adult,VisualFeatureTypes.color,VisualFeatureTypes.description,VisualFeatureTypes.faces,VisualFeatureTypes.image_type,VisualFeatureTypes.objects,VisualFeatureTypes.tags]
# Call API with URL and features
results_remote = computervision_client.analyze_image(remote_image_url , remote_image_features, language='ja')

# Print results with confidence score
print("Categories from remote image: ")
if (len(results_remote.categories) == 0):
    print("No categories detected.")
else:
    for category in results_remote.categories:
        print("'{}' with confidence {:.2f}%".format(category.name, category.score * 100))
print()

===== Analyze an image - remote =====
Categories from remote image: 
'建物_' with confidence 31.64%
'その他_' with confidence 0.39%
'屋外_' with confidence 3.91%



# 物体検出

In [37]:
remote_image_url3 = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-sample-data-files/master/ComputerVision/Images/objects.jpg"

print("===== Analyze an image - remote =====")
# Select the visual feature(s) you want.
remote_image_features = [VisualFeatureTypes.categories,VisualFeatureTypes.brands,VisualFeatureTypes.adult,VisualFeatureTypes.color,VisualFeatureTypes.description,VisualFeatureTypes.faces,VisualFeatureTypes.image_type,VisualFeatureTypes.objects,VisualFeatureTypes.tags]

# Call API with URL and features
results_remote = computervision_client.analyze_image(remote_image_url3 , remote_image_features, language='ja')

# Detect objects
# Print detected objects results with bounding boxes
print("Detecting objects in remote image:")
if len(results_remote.objects) == 0:
    print("No objects detected.")
else:
    for object in results_remote.objects:
        print("object at location {}, {}, {}, {}".format( \
        object.rectangle.x, object.rectangle.x + object.rectangle.w, \
        object.rectangle.y, object.rectangle.y + object.rectangle.h))

===== Analyze an image - remote =====
Detecting objects in remote image:
object at location 213, 365, 85, 208
object at location 218, 402, 179, 384
object at location 238, 417, 298, 416
object at location 116, 419, 60, 386


# ローカルファイルに対応

### 物体検出

In [64]:
local_image_path = './images/sample01.jpg'
local_image = open(local_image_path, 'rb')

print("===== Analyze an image - local =====")
# Select the visual feature(s) you want.
remote_image_features = [VisualFeatureTypes.categories,VisualFeatureTypes.brands,VisualFeatureTypes.adult,VisualFeatureTypes.color,VisualFeatureTypes.description,VisualFeatureTypes.faces,VisualFeatureTypes.image_type,VisualFeatureTypes.objects,VisualFeatureTypes.tags]

# Call API with URL and features
results_local = computervision_client.analyze_image_in_stream(local_image , remote_image_features, language='ja')

# Detect objects
# Print detected objects results with bounding boxes
print("Detecting objects in local image:")
if len(results_local.objects) == 0:
    print("No objects detected.")
else:
    for object in results_local.objects:
        print("object at location {}, {}, {}, {}".format( \
        object.rectangle.x, object.rectangle.x + object.rectangle.w, \
        object.rectangle.y, object.rectangle.y + object.rectangle.h))

===== Analyze an image - local =====
Detecting objects in local image:
object at location 879, 1201, 262, 773
object at location 426, 1085, 835, 1271


### 画像タグの検出

In [51]:
local_image_path = './images/sample01.jpg'
# local_image = open(local_image_path, 'rb')

with open(local_image_path, 'rb') as local_image:
    tags_result_local = computervision_client.tag_image_in_stream(local_image, language='ja')
    tags = []
    for tag in tags_result_local.tags:
        tags.append(tag.name)

    local_image.close()

tags

['食器',
 '食品',
 'オーブンで焼かれた食品',
 'プレート',
 '飲料',
 'コーヒー カップ',
 '食器類',
 'ソーサー',
 'スナック',
 'サーブウェア',
 '食事',
 'マグカップ',
 'お茶',
 'ファスト フード',
 '朝食',
 'フォーク',
 '食器',
 '料理',
 'ブランチ (食事)',
 '大皿',
 'デザート',
 'カップ',
 'コーヒー',
 '屋内',
 '座っている',
 'テーブル']

# 関数

In [62]:
local_image_path = './images/sample01.jpg'

def read_local_image(filepath: str) -> object:
    with open(filepath, 'rb') as local_image:
        yield local_image

def analyze_local_image(local_image: object, language: str='en') -> object:
    # remote_image_features = [VisualFeatureTypes.categories,VisualFeatureTypes.brands,VisualFeatureTypes.adult,VisualFeatureTypes.color,VisualFeatureTypes.description,VisualFeatureTypes.faces,VisualFeatureTypes.image_type,VisualFeatureTypes.objects,VisualFeatureTypes.tags]
    remote_image_features = [VisualFeatureTypes.categories, VisualFeatureTypes.objects, VisualFeatureTypes.tags]
    results_local = computervision_client.analyze_image_in_stream(local_image , remote_image_features, language=language)
    return results_local

def get_local_tags(results: object):
    tags = []
    for tag in results.tags:
        tags.append(tag.name)
    
    return tags

def get_detect_objects(results: object):
    return results.objects

    # for object in results.objects:
    #     print("object at location {}, {}, {}, {}".format( \
    #     object.rectangle.x, object.rectangle.x + object.rectangle.w, \
    #     object.rectangle.y, object.rectangle.y + object.rectangle.h))

In [69]:
for local_image in read_local_image(local_image_path):
    results = analyze_local_image(local_image, language='ja')
    tags = get_local_tags(results)
    print(tags)
    objects = get_detect_objects(results)
    for object in objects:
        print("object at location {}, {}, {}, {}".format( \
        object.rectangle.x, object.rectangle.x + object.rectangle.w, \
        object.rectangle.y, object.rectangle.y + object.rectangle.h))
        print(object.object_property)

['食器', '食品', 'オーブンで焼かれた食品', 'プレート', '飲料', 'コーヒー カップ', '食器類', 'ソーサー', 'スナック', 'サーブウェア', '食事', 'マグカップ', 'お茶', 'ファスト フード', '朝食', 'フォーク', '料理', 'ブランチ (食事)', '大皿', 'デザート', 'カップ', 'コーヒー', '屋内', '座っている', 'テーブル']
object at location 1211, 1379, 227, 539
cup
object at location 871, 1206, 257, 779
cup
object at location 423, 1086, 829, 1256
Food
object at location 1258, 1746, 752, 1292
Fork
object at location 30, 1615, 176, 1434
dining table
