<!--TABLE OF CONTENTS-->
# Table of Contents:
- [Notebook Config](#Notebook-Config)
- [Config](#Config)
  - [Input Configs](#Input-Configs)
  - [Export Config](#Export-Config)
- [Main Process: Generating Keywords](#Main-Process:-Generating-Keywords)
- [Optional: Insert EXIF Tag to Image Files](#Optional:-Insert-EXIF-Tag-to-Image-Files)

# Notebook Config

In [1]:
import sys
import io
import os
import re
from PIL import Image
import pandas as pd


from google.cloud import vision

# Config

In [None]:
# Set the filepath to the Google Cloud Platform (GCP) Credential File
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "<your_credential>.json"

## Input Configs

In [None]:
# Folder containing the images to be tagged
input_folder = os.path.join(os.path.pardir, "inputs")

# Whether to include Images in Subfolders
recursive = False

## Export Config

In [4]:
# FilePath for the results in .csv file
output_filepath = os.path.join(os.path.pardir, "outputs", "Results_tagged_images.csv")

# Maximum Number of Keywords to extract per image (Set 0 as Unlimited)
maxkeywords = 0

# Whether to export the relevance score of each keyword ("yes" or "no")
export_score = 'yes'


# Main Process: Generating Keywords

In [None]:
# Pre-scan the directories
pat = re.compile('/\.')
n_files = 0
for root, dirs, files in os.walk(input_folder, topdown=False):
    if len(pat.findall(root)) == 0:
        n_files += len(files)
    if not recursive:
        break


# Instantiates a client
client = vision.ImageAnnotatorClient()
df = []

counter = 0
for root, dirs, files in os.walk(input_folder, topdown=True):
    # Ignore folder starts with .
    if len(pat.findall(root)) == 0:
        for filename in files:
            if str(filename).lower().endswith(('.jpeg', 'jpg', 'png', 'gif', 'bmp', 'tiff')):
                img_filepath = os.path.join(root, filename)

                with io.open(img_filepath, 'rb') as image_file:
                    content = image_file.read()
                    image = vision.Image(content=content)

                response = client.label_detection(image=image)
                results = [(os.path.join(root, filename), x.description, x.score) for x in response.label_annotations]
                
                # Limited to certain keywords
                dft = pd.DataFrame(results, columns=['filepath', 'keyword', 'relevance'])
                dft = df[df.keyword.isin(my_custom_list)]

                df.append(dft)

                counter += 1
                print("--------------------------")
                print("Start to generate tags for the images.")
                print("progress: {}/{}".format(counter, n_files))
        if not recursive:
            break

df = pd.concat(df, axis=0, ignore_index=True, sort=False)
df = df.sort_values(['filepath', 'relevance'], axis=0, ascending=False)

if maxkeywords > 0:
    df = df.groupby('filepath').head(maxkeywords).reset_index()
if export_score == "no":
    df = df.drop('relevance', axis=1)

df.to_csv(output_filepath, index=False, encoding="utf-8")
print("--------------------------")
print("csv. file with tagged records has been exported to {}".format(output_filepath))

# Optional: Insert EXIF Tag to Image Files

In [6]:
cd ..

D:\GT_Workstation\TechShare\AutoML_Vision_GCP\production


In [3]:
remove_existing_tags = True

In [11]:
cd quickfire_image_tagger

D:\GT_Workstation\TechShare\AutoML_Vision_GCP\production\quickfire_image_tagger


In [13]:
df = pd.read_csv("outputs/Results_tagged_images.csv")
df = df.groupby('filepath').apply(lambda x: ', '.join(x.keyword)).reset_index()
df.columns = ['filepath', 'tags_string']

In [12]:
df = pd.read_csv(output_filepath)
df = df.groupby('filepath').apply(lambda x: ', '.join(x.keyword)).reset_index()
df.columns = ['filepath', 'tags_string']

FileNotFoundError: [Errno 2] No such file or directory: '..\\outputs\\Results_tagged_images.csv'

In [None]:
counter = 0
total_len = len(df)
for index, row in df.iterrows():
    image = Image.open(row.filepath)

    XPKeywords = 0x9C9E
    #XPComment = 0x9C9C
    exifdata = image.getexif()

    if remove_existing_tags:
        exifdata[XPKeywords] = row.tags_string.encode("utf16")
    else:
        tags_string_concat = exifdata[XPKeywords].decode('utf16') + ', ' + row.tags_string
        exifdata[XPKeywords] = tags_string_concat.encode("utf16")

    image.save(row.filepath, exif=exifdata)


    # progress
    counter += 1
    print("progress: {}/{}".format(counter, total_len))

In [None]:
counter = 0
total_len = len(df)
for index, row in df.iterrows():
    image = Image.open(row.filepath)

    XPKeywords = 0x9C9E
    #XPComment = 0x9C9C
    exifdata = image.getexif()


    exifdata[XPKeywords] = "".encode("utf16")


    image.save(row.filepath, exif=exifdata)


    # progress
    counter += 1
    print("progress: {}/{}".format(counter, total_len))