# Assumptions

This notebook assumes you have enabled the Google Cloud Vision API for your account. You can enable the API via your Google Cloud Console.

In [1]:
import argparse
import io
import os
import pandas as pd
import push_to_sheets as push

from google.cloud import vision
from google.oauth2 import service_account

# Detect Labels

In [None]:
def detect_labels(images_path, creds_path):
    """Detects labels in the file."""
    credentials = service_account.Credentials.from_service_account_file(creds_path)
    scoped_credentials = credentials.with_scopes(
        ['https://www.googleapis.com/auth/cloud-platform'])
    
    client = vision.ImageAnnotatorClient(credentials=credentials)

  #  requests = []
    filenames = [filename for filename in os.listdir(images_path) if 'DS_Store' not in filename]
    
    
    limit = 8 # pretty sure this is the limit
    if len(filenames) > limit:
        full = len(filenames) // limit # how many full length 
        partial = len(filenames) % limit
        if partial != 0:
            names = [str(x + 1) for x in range(0,full+1)]
        else: 
            names = [str(x + 1) for x in range(0, full)]
        fileDict = dict.fromkeys(names) #initialize dict to 
        for item in names:
            fileDict[item] = [filenames[x-1] for x in range(limit * (int(item)-1), limit * int(item))]
    
    structured_data = []  
    for item in fileDict.keys():
        requests = []
        for filename in fileDict[item]:
            with io.open(images_path + filename, 'rb') as image_file:
                content = image_file.read()
                requests.append({
                    "image":{"content":content},
                    "features":[
                        {"type":"LABEL_DETECTION"}
                    ]})

            resp = client.batch_annotate_images(requests)
            response = resp.responses

    
            for i in range(len(response)):
                labels = response[i].label_annotations
                for l in labels:
                    structured_data.append([filenames[i], 'Labels', l.description, l.score])

    return structured_data         

## NOTE:

YOU WILL HAVE TO UPDATE THE CELL BELOW WITH THE PATH TO YOUR FILE OF IMAGES & THE PATH TO YOUR GOOGLE CLOUD API KEY

In [18]:
output = detect_labels('/Users/rachel.ehlers/Desktop/minigolfpicsonly/', '/Users/rachel.ehlers/Desktop/keys/vizlistrach.json')

{'1': ['1794586297278859343_31215051.jpg', '1790686511567256671_1138037657.jpg', '1790731246218616061_14321435.jpg', '1790834044943899696_36539708.jpg', '1790859361301342487_709294145.jpg', '1790883968997697255_14642890.jpg', '1790884902029064266_14642890.jpg', '1790913659100641678_173910183.jpg'], '2': ['1790916329832178209_173910183.jpg', '1790921827432264430_173910183.jpg', '1790957002927972393_11424582.jpg', '1790987069479103478_10395782.jpg', '1791026105219830307_173910183.jpg', '1791218835401412872_1835021371.jpg', '1791448547533515091_1053659.jpg', '1791532380108954754_12295703.jpg'], '3': ['1791533375929123682_12295703.jpg', '1791565965008486204_413792081.jpg', '1791567660455413077_23480643.jpg', '1791581055191405775_1509603622.jpg', '1791624544555702374_227266577.jpg', '1791739173347153201_183026249.jpg', '1791789176808908584_142470.jpg', '1791846542605604808_175798.jpg'], '4': ['1791850139482026273_709294145.jpg', '1791904383619810649_2533797730.jpg', '1792068206884502039_278

## Reshape label output

In [45]:
df = pd.DataFrame(output, columns=['file', 'classification', 'output', 'likelihood'])
df.head()

Unnamed: 0,file,classification,output,likelihood
0,1790686511567256671_1138037657.jpg,Labels,interior design,0.799627
1,1790686511567256671_1138037657.jpg,Labels,furniture,0.722092
2,1790686511567256671_1138037657.jpg,Labels,table,0.687024
3,1790686511567256671_1138037657.jpg,Labels,interior design,0.799627
4,1790686511567256671_1138037657.jpg,Labels,furniture,0.722092


# Detect Faces

In [39]:
def detect_faces(images_path):
    """Detects faces in the file."""
    credentials = service_account.Credentials.from_service_account_file('/Users/rachel.ehlers/Desktop/keys/vizlistrach.json')
    scoped_credentials = credentials.with_scopes(
        ['https://www.googleapis.com/auth/cloud-platform'])
    
    client = vision.ImageAnnotatorClient(credentials=credentials)

    
    filenames = [filename for filename in os.listdir(images_path) if 'DS_Store' not in filename]
    
    limit = 6 # pretty sure this is the limit
    if len(filenames) > limit:
        full = len(filenames) // limit # how many full length 
        partial = len(filenames) % limit
        if partial != 0:
            names = [str(x + 1) for x in range(0,full+1)]
        else: 
            names = [str(x + 1) for x in range(0, full)]
        fileDict = dict.fromkeys(names) #initialize dict to 
        for item in names:
            fileDict[item] = [filenames[x-1] for x in range(limit * (int(item)-1), limit * int(item))]
    
    structured_data = []   
    for item in fileDict.keys():
        requests = []
        for filename in fileDict[item]:
            with io.open(images_path + filename, 'rb') as image_file:
                content = image_file.read()
                requests.append({
                    "image":{"content":content},
                    "features":[
                        {"type":"FACE_DETECTION"}
                    ]})

            resp = client.batch_annotate_images(requests)
            response = resp.responses

            
            for i in range(len(response)):
                if not response[i].face_annotations:
                    structured_data.append([filenames[i], 'Faces', 0])
                else:
                    labels = response[i].face_annotations
                    counter = 0
                    for l in labels:
                        counter += 1
                    structured_data.append([filenames[i], 'Faces', counter])
                
    return structured_data

In [40]:
face = detect_faces('/Users/rachel.ehlers/Desktop/minigolfpicsonly/')
# face

## Reshape Faces Output

In [42]:
df_faces = pd.DataFrame(face, columns=['file', 'classification', 'output'])
#df_faces.head()
df_faces

Unnamed: 0,file,classification,output
0,1790686511567256671_1138037657.jpg,Faces,0
1,1790686511567256671_1138037657.jpg,Faces,0
2,1790731246218616061_14321435.jpg,Faces,1
3,1790686511567256671_1138037657.jpg,Faces,0
4,1790731246218616061_14321435.jpg,Faces,1
5,1790834044943899696_36539708.jpg,Faces,0
6,1790686511567256671_1138037657.jpg,Faces,0
7,1790731246218616061_14321435.jpg,Faces,1
8,1790834044943899696_36539708.jpg,Faces,0
9,1790859361301342487_709294145.jpg,Faces,0


# Combine outputs

In [59]:
output = pd.concat([df, df_faces], axis=0)

In [60]:
output.head()

Unnamed: 0,classification,file,likelihood,output
0,Labels,1790686511567256671_1138037657.jpg,0.799627,interior design
1,Labels,1790686511567256671_1138037657.jpg,0.722092,furniture
2,Labels,1790686511567256671_1138037657.jpg,0.687024,table
3,Labels,1790686511567256671_1138037657.jpg,0.799627,interior design
4,Labels,1790686511567256671_1138037657.jpg,0.722092,furniture


In [61]:
output

Unnamed: 0,classification,file,likelihood,output
0,Labels,1790686511567256671_1138037657.jpg,0.799627,interior design
1,Labels,1790686511567256671_1138037657.jpg,0.722092,furniture
2,Labels,1790686511567256671_1138037657.jpg,0.687024,table
3,Labels,1790686511567256671_1138037657.jpg,0.799627,interior design
4,Labels,1790686511567256671_1138037657.jpg,0.722092,furniture
5,Labels,1790686511567256671_1138037657.jpg,0.687024,table
6,Labels,1790731246218616061_14321435.jpg,0.968197,blue
7,Labels,1790731246218616061_14321435.jpg,0.886611,text
8,Labels,1790731246218616061_14321435.jpg,0.826473,male
9,Labels,1790731246218616061_14321435.jpg,0.701894,poster


# Write to sheet for analysis

Make sure to create a named range in your data sheet called 'API_Output' - just select an entire sheet in your workbook & make it a named range.  Also, insert your sheet ID below! 

In [214]:
sheet_id = '1BMvFqBbUBtL4TPu7mXMaQQ--U5oz3PC00AuOjznio6s'
nm_range = 'API_Output'

In [217]:
data = output.values.astype(str).tolist()
data.insert(0, list(output.columns))

body_vals ={"range": nm_range,"values": data,}

push.batch_write(sheet_id, body_vals)

{'responses': [{'spreadsheetId': '1BMvFqBbUBtL4TPu7mXMaQQ--U5oz3PC00AuOjznio6s',
   'updatedCells': 632,
   'updatedColumns': 4,
   'updatedRange': 'Sheet1!A1:D158',
   'updatedRows': 158}],
 'spreadsheetId': '1BMvFqBbUBtL4TPu7mXMaQQ--U5oz3PC00AuOjznio6s',
 'totalUpdatedCells': 632,
 'totalUpdatedColumns': 4,
 'totalUpdatedRows': 158,
 'totalUpdatedSheets': 1}