In [1]:
pip install --upgrade azure-cognitiveservices-vision-computervision

Collecting azure-cognitiveservices-vision-computervision
  Downloading azure_cognitiveservices_vision_computervision-0.9.0-py2.py3-none-any.whl (39 kB)
Collecting azure-common~=1.1
  Downloading azure_common-1.1.27-py2.py3-none-any.whl (12 kB)
Collecting msrest>=0.5.0
  Downloading msrest-0.6.21-py2.py3-none-any.whl (85 kB)
Collecting requests-oauthlib>=0.5.0
  Downloading requests_oauthlib-1.3.0-py2.py3-none-any.whl (23 kB)
Collecting isodate>=0.6.0
  Downloading isodate-0.6.0-py2.py3-none-any.whl (45 kB)
Collecting oauthlib>=3.0.0
  Downloading oauthlib-3.1.1-py2.py3-none-any.whl (146 kB)
Installing collected packages: oauthlib, requests-oauthlib, isodate, msrest, azure-common, azure-cognitiveservices-vision-computervision
Successfully installed azure-cognitiveservices-vision-computervision-0.9.0 azure-common-1.1.27 isodate-0.6.0 msrest-0.6.21 oauthlib-3.1.1 requests-oauthlib-1.3.0
Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'C:\Users\runr4\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.


In [2]:
pip install pillow

Note: you may need to restart the kernel to use updated packages.


In [3]:
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials

from array import array
import os
from PIL import Image
import sys
import time

In [4]:
import json
with open('secret.json') as f:
    secret = json.load(f)

In [5]:
KEY = secret['KEY']
ENDPOINT = secret['ENDPOINT']

In [6]:
computervision_client = ComputerVisionClient(ENDPOINT, CognitiveServicesCredentials(KEY))

In [7]:
remote_image_url = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-sample-data-files/master/ComputerVision/Images/landmark.jpg"

## 画像の説明の取得

In [8]:
print("===== Describe an image - remote =====")
description_results = computervision_client.describe_image(remote_image_url )

print("Description of remote image: ")
if (len(description_results.captions) == 0):
    print("No description detected.")
else:
    for caption in description_results.captions:
        print("'{}' with confidence {:.2f}%".format(caption.text, caption.confidence * 100))

===== Describe an image - remote =====
Description of remote image: 
'an ancient city with many ruins with Colosseum in the background' with confidence 33.80%


## 画像カテゴリの取得

In [10]:
print("===== Categorize an image - remote =====")
remote_image_features = ["categories"]
categorize_results_remote = computervision_client.analyze_image(remote_image_url , remote_image_features)

print("Categories from remote image: ")
if (len(categorize_results_remote.categories) == 0):
    print("No categories detected.")
else:
    for category in categorize_results_remote.categories:
        print("'{}' with confidence {:.2f}%".format(category.name, category.score * 100))

===== Categorize an image - remote =====
Categories from remote image: 
'building_' with confidence 31.64%
'others_' with confidence 0.39%
'outdoor_' with confidence 3.91%


## 画像タグの取得

In [11]:
print("===== Tag an image - remote =====")
tags_result_remote = computervision_client.tag_image(remote_image_url )

print("Tags in the remote image: ")
if (len(tags_result_remote.tags) == 0):
    print("No tags detected.")
else:
    for tag in tags_result_remote.tags:
        print("'{}' with confidence {:.2f}%".format(tag.name, tag.confidence * 100))

===== Tag an image - remote =====
Tags in the remote image: 
'outdoor' with confidence 99.00%
'building' with confidence 98.81%
'sky' with confidence 98.21%
'stadium' with confidence 98.17%
'ancient rome' with confidence 96.16%
'ruins' with confidence 95.04%
'amphitheatre' with confidence 93.99%
'ancient roman architecture' with confidence 92.65%
'historic site' with confidence 89.55%
'ancient history' with confidence 89.54%
'history' with confidence 86.72%
'archaeological site' with confidence 84.41%
'travel' with confidence 65.85%
'large' with confidence 61.02%
'city' with confidence 56.57%


## 物体を検出する

In [12]:
print("===== Detect Objects - remote =====")
remote_image_url_objects = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-sample-data-files/master/ComputerVision/Images/objects.jpg"
detect_objects_results_remote = computervision_client.detect_objects(remote_image_url_objects)

print("Detecting objects in remote image:")
if len(detect_objects_results_remote.objects) == 0:
    print("No objects detected.")
else:
    for object in detect_objects_results_remote.objects:
        print("object at location {}, {}, {}, {}".format( \
        object.rectangle.x, object.rectangle.x + object.rectangle.w, \
        object.rectangle.y, object.rectangle.y + object.rectangle.h))

===== Detect Objects - remote =====
Detecting objects in remote image:
object at location 213, 365, 85, 208
object at location 218, 402, 179, 384
object at location 238, 417, 298, 416
object at location 116, 419, 60, 386


## ローカルファイルに対応させる

In [13]:
local_image_path = 'sample01.jpg'
local_image = open(local_image_path, "rb")

print("===== Detect Objects - local =====")
detect_objects_results = computervision_client.detect_objects_in_stream(local_image)

print("Detecting objects in local image:")
if len(detect_objects_results.objects) == 0:
    print("No objects detected.")
else:
    for object in detect_objects_results.objects:
        print("object at location {}, {}, {}, {}".format( \
        object.rectangle.x, object.rectangle.x + object.rectangle.w, \
        object.rectangle.y, object.rectangle.y + object.rectangle.h))

===== Detect Objects - local =====
Detecting objects in remote image:
object at location 879, 1201, 262, 773
object at location 426, 1085, 835, 1271


In [16]:
def get_tags(filepath):
    local_image = open(filepath, "rb")

    print("===== Tag an image - local =====")
    tags_result_local = computervision_client.tag_image_in_stream(local_image)
    tags = tags_result_local.tags
    tags_name = []
    for tag in tags:
        tags_name.append(tag.name)
    return tags_name

filepath = 'sample01.jpg'
get_tags(filepath)

===== Tag an image - local =====


['tableware',
 'food',
 'baked goods',
 'plate',
 'drink',
 'coffee cup',
 'dishware',
 'saucer',
 'snack',
 'serveware',
 'meal',
 'mug',
 'tea',
 'fast food',
 'breakfast',
 'fork',
 'kitchen utensil',
 'dish',
 'brunch',
 'platter',
 'dessert',
 'cup',
 'coffee',
 'indoor',
 'sitting',
 'table']

In [19]:
def detect_objects(filepath):
    local_image = open(filepath, "rb")

    print("===== Detect Objects - local =====")
    detect_objects_results = computervision_client.detect_objects_in_stream(local_image)
    objects = detect_objects_results.objects
    return objects

filepath = 'sample01.jpg'
objects = detect_objects(filepath)

===== Detect Objects - local =====
