I'm using a modified version of the example code found here:

https://github.com/GoogleCloudPlatform/cloud-vision/blob/master/python/text/textindex.py

In [1]:
import argparse
import base64
import os
import re
import sys
from MEATY.shared.tools import *
from googleapiclient import discovery
from googleapiclient import errors
import nltk
from nltk.stem.snowball import EnglishStemmer
from oauth2client.client import GoogleCredentials
from sklearn.externals.joblib import dump, load
import redis

GOOGLE_APPLICATION_CREDENTIALS='D:/Users/thebrick/Desktop/MEATY/MEATY-credentials.json'
DISCOVERY_URL = 'https://vision.googleapis.com/v1/images:annotate'  # noqa
BATCH_SIZE = 16

In [7]:
class VisionApi:
    """Construct and use the Google Vision API service."""

    def __init__(self):
        self.credentials = GoogleCredentials.get_application_default()
        self.service = discovery.build(
            'vision', 'v1', credentials=self.credentials,
            discoveryServiceUrl=DISCOVERY_URL)

    def detect_all(self, input_filenames, num_retries=3, max_results=6):
        """Uses the Vision API to detect text in the given file.
        """
        images = {}
        for filename in input_filenames:
            with open(filename, 'rb') as image_file:
                images[filename] = image_file.read()

        batch_request = []
        for filename in images:
            batch_request.append({
                'image': {
                    'content': base64.b64encode(
                        images[filename]).decode('UTF-8')
                },
                'features': [{
                    'type': 'LABEL_DETECTION',
                    'maxResults': max_results,
                },
                    {
                    'type': 'LOGO_DETECTION',
                    'maxResults': max_results,
                },
                    {
                    'type': 'FACE_DETECTION',
                    'maxResults': max_results,
                },
                    {
                    'type': 'IMAGE_PROPERTIES',
                    'maxResults': max_results,
                },
                    {
                    'type': 'TEXT_DETECTION',
                    'maxResults': max_results
                },
                    {
                    'type': 'WEB_DETECTION',
                    'maxResults': max_results
                }]

            })
        request = self.service.images().annotate(
            body={'requests': batch_request})

        responses = request.execute(num_retries=num_retries)
        if 'responses' not in responses:
            return {}
        response_data = {}
        for filename, response in zip(images, responses['responses']):
            post_id = filename[5:-4]
            img_features = {}
            if 'error' in response:
                print("API Error for %s: %s" % (
                    filename,
                    response['error']['message']
                    if 'message' in response['error']
                    else ''))
                continue
            if 'textAnnotations' in response:
                img_features['text'] = response['textAnnotations'][0]['description']
            else:
                img_features['text'] = None

            if 'logoAnnotations' in response:
                img_features['logos'] = response['logoAnnotations'][0]['description']
            else:
                img_features['logos'] = None
                
            if 'labelAnnotations' in response:
                img_features['labels'] = response['labelAnnotations'][0]['description']
            else:
                img_features['labels'] = None

            if 'webDetection' in response:
                wd = response['webDetection']
                if 'webEntities' in wd.keys():
                    img_features['web_entities'] = wd['webEntities']
                else:
                    img_features['web_entities'] = None
                if 'pagesWithMatchingImages' in wd.keys():
                    img_features['img_match'] = wd['pagesWithMatchingImages']
            else:
                img_features['web_entities'] = None
                img_features['img_match'] = None
            response_data[post_id] = img_features
        return response_data

In [3]:
def process_imgs_api():
    api = VisionApi()
    img_paths = ['imgs/' + x for x in os.listdir('imgs')]
    all_results = {}
    for i, paths in enumerate(batch(img_paths)):
        print('Batch', i)
        batch_results = api.detect_all(paths)
        all_results.update(batch_results)
        if batch % 20 == 0: # just in case
            dump(all_results, 'api_batches.pkl')
    dump(all_results, 'api_batches.pkl')
    return all_results

In [10]:
def batch(iterable, batch_size=BATCH_SIZE):
    """Group an iterable into batches of size batch_size.
    >>> tuple(batch([1, 2, 3, 4, 5], batch_size=2))
    ((1, 2), (3, 4), (5))
    """
    b = []
    for i in iterable:
        b.append(i)
        if len(b) == batch_size:
            yield tuple(b)
            b = []
    if b:
        yield tuple(b)

In [11]:
results = process_imgs_api()

Batch 0
Batch 1
Batch 2
Batch 3
Batch 4
Batch 5
Batch 6
Batch 7
Batch 8
Batch 9
Batch 10
Batch 11
Batch 12
Batch 13
Batch 14
Batch 15
Batch 16
Batch 17
Batch 18
Batch 19
Batch 20
Batch 21
Batch 22
Batch 23
Batch 24
Batch 25
Batch 26
Batch 27
Batch 28
Batch 29
Batch 30
Batch 31
Batch 32
Batch 33
Batch 34
Batch 35
Batch 36
Batch 37
Batch 38
Batch 39
Batch 40
Batch 41
Batch 42
Batch 43
Batch 44
Batch 45
Batch 46
Batch 47
Batch 48
Batch 49
Batch 50
Batch 51
Batch 52
Batch 53
Batch 54
Batch 55
Batch 56
Batch 57
Batch 58
Batch 59
Batch 60
Batch 61
Batch 62
Batch 63
Batch 64
Batch 65
Batch 66
Batch 67
Batch 68
Batch 69
Batch 70
Batch 71
Batch 72
Batch 73
Batch 74
Batch 75
Batch 76
Batch 77
Batch 78
Batch 79
Batch 80
Batch 81
Batch 82
Batch 83
Batch 84
Batch 85
Batch 86
Batch 87
Batch 88
Batch 89
Batch 90
Batch 91
Batch 92
Batch 93
Batch 94
Batch 95
Batch 96
Batch 97
Batch 98
Batch 99
Batch 100
Batch 101
Batch 102
Batch 103
Batch 104
Batch 105
Batch 106
Batch 107
Batch 108
Batch 109
Batch 110


Batch 822
Batch 823
Batch 824
Batch 825
Batch 826
Batch 827
Batch 828
Batch 829
Batch 830
Batch 831
Batch 832
Batch 833
Batch 834
Batch 835
Batch 836
Batch 837
Batch 838
Batch 839
Batch 840
Batch 841
Batch 842
Batch 843
Batch 844
Batch 845
Batch 846
Batch 847
Batch 848
Batch 849
Batch 850
Batch 851
Batch 852
Batch 853
Batch 854
Batch 855
Batch 856
Batch 857
Batch 858
Batch 859
Batch 860
Batch 861
Batch 862
Batch 863
Batch 864
Batch 865
Batch 866
Batch 867
Batch 868
Batch 869
Batch 870
Batch 871
Batch 872
Batch 873
Batch 874
Batch 875
Batch 876
Batch 877
Batch 878
Batch 879
Batch 880
Batch 881
Batch 882
Batch 883
Batch 884
Batch 885
Batch 886
Batch 887
Batch 888
Batch 889
Batch 890
Batch 891
Batch 892
Batch 893
Batch 894
Batch 895
Batch 896
Batch 897
Batch 898
Batch 899
Batch 900
Batch 901
Batch 902
Batch 903
Batch 904
Batch 905
Batch 906
Batch 907
Batch 908
Batch 909
Batch 910
Batch 911
Batch 912
Batch 913
Batch 914
Batch 915
Batch 916
Batch 917
Batch 918
Batch 919
Batch 920
Batch 921
