## Image Analysis with Azure Computer Vision Service

### Installing Dependencies and Libraries

In [None]:
%pip install azure-cognitiveservices-vision-computervision==0.9.1 pillow==10.4.0

### Setting Up the Environment

In [None]:
from dotenv import load_dotenv
import os
import time

load_dotenv()
cv_endpoint = os.getenv("CV_ENDPOINT")
cv_key = os.getenv("CV_KEY")

### Creating our Computer Vision Client

In [None]:
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials

credential = CognitiveServicesCredentials(cv_key) 
cv_client = ComputerVisionClient(cv_endpoint, credential)

### Specify the Features to be Retrieved

In [None]:
features = [VisualFeatureTypes.description,
            VisualFeatureTypes.tags,
            VisualFeatureTypes.categories,
            VisualFeatureTypes.brands,
            VisualFeatureTypes.objects,
            VisualFeatureTypes.adult,
            VisualFeatureTypes.faces]

### Get Image Analysis Results

In [None]:
images_to_analyze = [
    "https://raw.githubusercontent.com/kuljotSB/AI-102-Certification/main/AzureAIVision/Image-Analysis/Images/building.jpg",
    "https://raw.githubusercontent.com/kuljotSB/AI-102-Certification/main/AzureAIVision/Image-Analysis/Images/person.jpg",
    "https://raw.githubusercontent.com/kuljotSB/AI-102-Certification/main/AzureAIVision/Image-Analysis/Images/street.jpg"
]

for image_url in images_to_analyze:
    print(f"\nAnalyzing image: {image_url}\n")

    # Analyze the image
    analysis = cv_client.analyze_image(image_url, visual_features=features)
    
    # Print the results
    print("Description captions:")
    for caption in analysis.description.captions:
        print(f"'{caption.text}' (confidence: {caption.confidence:.2f})")

    print("\nTags:")
    for tag in analysis.tags:
        print(f"{tag.name} (confidence: {tag.confidence:.2f})")

    print("\nCategories:")
    for category in analysis.categories:
        print(f"{category.name} (confidence: {category.score:.2f})")

    print("\nBrands:")
    for brand in analysis.brands:
        print(f"{brand.name} (confidence: {brand.confidence:.2f})")

    print("\nObjects:")
    for obj in analysis.objects:
        rect = obj.rectangle
        print(f"{obj.object_property} at location {rect.x},{rect.y},{rect.w},{rect.h} (confidence: {obj.confidence:.2f})")
    
    print("\nFaces:")
    if not analysis.faces:
        print("No faces detected.")
    else:
        for face in analysis.faces:
            rect = face.face_rectangle
            print(f"Gender: {face.gender}, Age: {face.age}, Bounding box: "
                f"left={rect.left}, top={rect.top}, width={rect.width}, height={rect.height}")

### Use the OCR Capability to Read Handwritten Text

![handwritten.jpg](./Images/handwritten.png)

In [None]:
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from PIL import Image, ImageDraw
from io import BytesIO
import requests

ocr_image_url = "https://raw.githubusercontent.com/kuljotSB/AI-102-Certification/main/AzureAIVision/Image-Analysis/Images/handwritten.png"

# Download image for drawing
response = requests.get(ocr_image_url)
original_image = Image.open(BytesIO(response.content)).convert("RGB")

# Call API with URL and raw response to get the operation location from headers
read_response = cv_client.read(ocr_image_url, raw=True)
read_operation_location = read_response.headers["Operation-Location"]
operation_id = read_operation_location.split("/")[-1]

# Wait for the operation to complete
while True:
    read_result = cv_client.get_read_result(operation_id)
    if read_result.status not in ['notStarted', 'running']:
        break
    time.sleep(1)

# Print the detected text, line by line
if read_result.status == OperationStatusCodes.succeeded:
    for text_result in read_result.analyze_result.read_results:
        for line in text_result.lines:
            print(line.text)
            print("Bounding box:", line.bounding_box)

# Draw bounding boxes
draw = ImageDraw.Draw(original_image)
if read_result.status == OperationStatusCodes.succeeded:
    for text_result in read_result.analyze_result.read_results:
        for line in text_result.lines:
            box = line.bounding_box  # [x1, y1, x2, y2, x3, y3, x4, y4]
            # Draw a polygon bounding the text line
            polygon = [(box[i], box[i + 1]) for i in range(0, len(box), 2)]
            draw.polygon(polygon, outline="red", width=3)

# Save and display image with bounding boxes
output_path = "ocr_out.png"
original_image.save(output_path)
original_image.show()
print(f"OCR visualization saved as {output_path}")