# Overview
The following notebook will import data from AWS S3, look for images (PNG and JPEG) then attempt to label the images for analysis
The notebook requires access to 
* AWS S3
* AWS Rekognition


## Import

In [60]:
import base64
import csv
import io
import json
import os
import pathlib
from io import BytesIO

import boto3
import numpy as np
import pandas as pd
import requests
from IPython.display import HTML, display
from PIL import Image

## Create Function to Tag Images

In [63]:
def detect_labels(photo, bucket):

    client = boto3.client("rekognition")

    response = client.detect_labels(
        Image={"S3Object": {"Bucket": bucket, "Name": photo}}, MaxLabels=10
    )

    print("Detected labels for " + photo)
    print()
    for label in response["Labels"]:
        print("Label: " + label["Name"])
        print("Confidence: " + str(label["Confidence"]))
        print("Instances:")
        for instance in label["Instances"]:
            print("  Bounding box")
            print("    Top: " + str(instance["BoundingBox"]["Top"]))
            print("    Left: " + str(instance["BoundingBox"]["Left"]))
            print("    Width: " + str(instance["BoundingBox"]["Width"]))
            print("    Height: " + str(instance["BoundingBox"]["Height"]))
            print("  Confidence: " + str(instance["Confidence"]))
            print()

        print("Parents:")
        for parent in label["Parents"]:
            print("   " + parent["Name"])
        print("----------")
        print()
    return response

## Function to create Base64 strings from images

In [2]:
def getbase64(obj, bucket):
    # import file
    s3 = boto3.resource("s3")
    file = s3.Object(bucket, obj)
    file = file.get()
    data = file["Body"].read()

    # read and resize 
    img = cv2.imdecode(np.asarray(bytearray(data)), cv2.IMREAD_COLOR)
    scale_percent = 10 # percent of original size
    width = int(img.shape[1] * scale_percent / 100)
    height = int(img.shape[0] * scale_percent / 100)
    dim = (width, height)
    img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    
    # to base64
    _, buffer_img= cv2.imencode('.jpg', img)
    b64 = str(base64.b64encode(buffer_img).decode("ascii"))
    
    return b64

## Define the location of AWS S3 to us

In [None]:
source_bucketname = "cemf-suan"
dest_bucketname = "sbbridledestbucket"

Function for getting AWS S3 objects and processing

In [None]:
def list_s3_files(bucket, dest_bucket):
    s3 = boto3.client("s3")
    paginator = s3.get_paginator("list_objects_v2")
    pages = paginator.paginate(Bucket=bucket, Prefix="")
    with open("filetags.csv", "w", newline="") as myfile:
        writer = csv.writer(myfile)
        # df = pd.DataFrame()
        for page in pages:
            for obj in page["Contents"]:
                file_extension = pathlib.Path(obj["Key"]).suffix
                if file_extension == ".jpeg" or file_extension == ".png":
                    base64 = getbase64(obj, bucket)
                    #   print(base64)
                    print("moving file " + obj["Key"])
                    list = [obj["Key"], base64]
                    # s3copy(obj['Key'], bucket, dest_bucket)
                    # copy has been commented out for the moment
                    response = detect_labels(obj["Key"], bucket)
                    for label in response["Labels"]:
                        list.append(label["Name"])
                        list.append(label["Confidence"])
                    print(list)
                    writer.writerow(list)

In [None]:
list_s3_files(source_bucketname, dest_bucketname)

In [62]:
df = pd.read_csv("filetags.csv", header=None)
display(df)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
0,download (1).jpeg,b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAoHCBYWFRgW...,Puppy,99.878677,Dog,99.878677,Mammal,99.878677,Canine,99.878677,...,Pet,99.878677,Labrador Retriever,96.055397,Kangaroo,69.639275,Wood,57.51548,Panther,57.51083
1,download (2).jpeg,b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAoHCBUVFRgW...,Kangaroo,79.141579,Animal,79.141579,Mammal,79.141579,Rodent,67.135056,...,Bird,57.850574,Squirrel,56.678795,Wire,56.496326,Sparrow,55.198532,,
2,download.jpeg,b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkGBxITEhUS...,Dynamite,71.098,Weapon,71.098,Food,64.390114,Game,62.355255,...,Gum,55.997955,Super Mario,55.933823,,,,,,
3,images.jpeg,b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkGBxMSEhUS...,Suv,99.987778,Vehicle,99.987778,Car,99.987778,Transportation,99.987778,...,Alloy Wheel,57.570053,Spoke,57.570053,Wheel,57.570053,Machine,57.570053,Sedan,56.736477
