# Overview
The following notebook will import data from AWS S3, look for images (PNG and JPEG) then attempt to label the images for analysis
The notebook requires access to 
* AWS S3
* AWS Rekognition


## Imports

In [4]:
import base64
import csv
import io
import json
import os
import pathlib
from io import BytesIO
import cv2

import boto3
import numpy as np
import pandas as pd
import requests
from IPython.display import HTML, display

## Main Function

In [5]:
def main(bucket):
    s3 = boto3.client("s3")
    paginator = s3.get_paginator("list_objects_v2")
    pages = paginator.paginate(Bucket=bucket, Prefix="")

    # with open("main.csv", "a", newline="") as myfile:
    #     writer = csv.writer(myfile)
        
    # Loop thour all pages
    total = 0
    for j, page in enumerate(pages):
        print(j)
        for obj in page["Contents"]:
            try:
                with open("main.csv", "a", newline="") as myfile:
                    writer = csv.writer(myfile)

                    file_extension = pathlib.Path(obj["Key"]).suffix
                    if file_extension.casefold() == ".jpeg" or file_extension.casefold() == ".png":
                        total += 1
                        
                        base64 = getbase64(obj["Key"], bucket)
                        
                        list = [obj["Key"], obj["ETag"], base64]
                        
                        response = detect_labels(obj["Key"], bucket)
                        for i, label in enumerate(response["Labels"]):
                            if (i <=4):
                                list.append(label["Name"])
                                list.append(label["Confidence"])
                        writer.writerow(list)
            except:
                print("File Skipped due to error")
        print(f'total images {total}')

## Create Function to Tag Images

In [6]:
def detect_labels(photo, bucket):

    client = boto3.client("rekognition")

    response = client.detect_labels(
        Image={"S3Object": {"Bucket": bucket, "Name": photo}}, MaxLabels=10
    )

#     print("Detected labels for " + photo)
#     print()
#     for label in response["Labels"]:
#         print("Label: " + label["Name"])
#         print("Confidence: " + str(label["Confidence"]))
#         print("Instances:")
#         for instance in label["Instances"]:
#             print("  Bounding box")
#             print("    Top: " + str(instance["BoundingBox"]["Top"]))
#             print("    Left: " + str(instance["BoundingBox"]["Left"]))
#             print("    Width: " + str(instance["BoundingBox"]["Width"]))
#             print("    Height: " + str(instance["BoundingBox"]["Height"]))
#             print("  Confidence: " + str(instance["Confidence"]))
#             print()

#         print("Parents:")
#         for parent in label["Parents"]:
#             print("   " + parent["Name"])
#         print("----------")
#         print()
    return response

## Function to create Base64 strings from images

In [7]:
# bucket = 'agd-000001-grpdrv-poc'
# obj = "AA_Enduring Power of Attorney/Working Files/Attorney+General's+Department.png"



def getbase64(obj, bucket):
    # import file
    s3 = boto3.resource("s3")
    file = s3.Object(bucket, obj)
    file = file.get()
    data = file["Body"].read()

    # read and resize 
    img = cv2.imdecode(np.asarray(bytearray(data)), cv2.IMREAD_COLOR)
    scale_percent = 10 # percent of original size
    width = int(img.shape[1] * scale_percent / 100)
    height = int(img.shape[0] * scale_percent / 100)
    dim = (width, height)
    img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    
    # to base64
    _, buffer_img= cv2.imencode('.jpg', img)
    b64 = str(base64.b64encode(buffer_img).decode("ascii"))
    
    return b64



# getbase64(obj, bucket)

## Run

In [None]:
bucket = 'agd-000001-grpdrv-poc'

main(bucket)

0
total images 11
1
File Skipped due to error
File Skipped due to error
File Skipped due to error




total images 25
2




File Skipped due to error
File Skipped due to error
File Skipped due to error




File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error




File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error




File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error




File Skipped due to error
File Skipped due to error
total images 304
3
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
File Skipped due to error
