In [10]:
import boto3
import io
import re
import numpy as np
import os
import xml.etree.ElementTree as ET 
from google.cloud import vision
import requests
import json
from utilities.pythonDB import writeToDB, recordsExists

In [5]:
def extract_clean_str(original_str):
    original_str = original_str.replace('<br/>', '')
    return re.sub('[^a-zA-Z0-9]+', ' ', original_str, flags=re.UNICODE).lower().strip().replace(' ', '')

def get_mapped(path):
    key_map, dict_idx = {}, 0
    for r, _, f in os.walk(dir_path):
        for file in f:
            if '.JPG' in file or '.jpg' in file:
                key_map[dict_idx] = file
                dict_idx += 1
    idxs = list(key_map.keys())          
    shuffled_idxs = np.random.randint(0, len(idxs), len(idxs))
    return key_map, shuffled_idxs

def get_captions(base_path, annotation_file):
    with open(os.path.join(base_path, annotation_file)) as json_file:
        caption_file = json.load(json_file)
    captions, mapped_annotation = {}, {}

    for annotation in caption_file['annotations']:
        mapped_annotation[annotation['image_id']] = annotation['caption']

    for image in caption_file['images']:
        captions[image['file_name']] = mapped_annotation.get(image['id'])
    return captions

def fetch_from_xml(path, file):
    tree = ET.parse(os.path.join(path, file)).getroot()  
    return extract_clean_str(''.join([e.attrib['char'] for e in tree.iter(tag='character')]))

In [6]:
class DetectLabels():
    def __init__(self, base, file, max_labels=30):
        with io.open(os.path.join(base, file), 'rb') as image_file:
            content = (image_file.read())
        
        self.content = content
        self.base, self.file, self.actual_str, self.detected_str = base, file, '', ''
        self.label = ''
        self.start = time()
        self.max_labels = max_labels
    def return_function(self, name):
        return getattr(self, 'if_' + name)()
    def if_gc(self):
        vision_client = vision.ImageAnnotatorClient()
        image = vision.types.Image(content=self.content)
        text_detection_response = vision_client.label_detection(image=image)
        return [label.description for label in text_detection_response.label_annotations]
    def if_aws(self):
        imgobj = {'Bytes': self.content}
        client=boto3.client('rekognition', region_name='us-east-1')
        response=client.detect_labels(Image=imgobj, MaxLabels=self.max_labels)
        return [label['Name'] for label in response['Labels']]
    def if_azure(self):
        subscription_key = os.environ['COMPUTER_VISION_SUBSCRIPTION_KEY']
        endpoint = os.environ['COMPUTER_VISION_ENDPOINT']
        ocr_url = endpoint + "vision/v2.1/describe"
        headers = {'Ocp-Apim-Subscription-Key': subscription_key, 'Content-Type': 'application/octet-stream'}
        params = {'maxCandidates': self.max_labels}
        response = requests.post(ocr_url, headers=headers, params=params, data=self.content).json()
        return response['description']['tags']

In [9]:
annotation_dir_path = 'datasets/image_labeling/annotations'
train_dir_path = 'datasets/image_labeling/train2017'
captions_validation = get_captions(annotation_dir_path, 'captions_val2017.json')
captions = get_captions(annotation_dir_path, 'captions_train2017.json')
captions.update(captions_validation)
del captions_validation

In [13]:
dir_path = 'datasets/image_labeling/val2017/'
dict_files, files_idx = get_mapped(dir_path)
shuffled_idx = np.random.randint(0, len(files_idx), len(files_idx))

In [14]:
detected_labels = DetectLabels(dir_path, dict_files.get(shuffled_idx[2]) ).return_function('gc')
print (detected_labels)

['Cat', 'Mammal', 'Small to medium-sized cats', 'Whiskers', 'Felidae', 'Tabby cat', 'European shorthair', 'Carnivore', 'American shorthair', 'Domestic short-haired cat']


In [16]:
dict_files.get(shuffled_idx[2])

'000000287649.jpg'