# Data Augmentation of training image set


Keras API in TensorFlow 2 is used for data augmentation of the training image set.

In [79]:
# Importing Keras functions in TensorFlow2
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import boto3
import io
s3_connection = boto3.resource('s3')
from PIL import Image, ImageDraw, ExifTags, ImageColor, ImageFont
from IPython.display import display 
import pandas as pd
import numpy as np
import cv2
%matplotlib inline

You can change the values of the augmentation parameters below to change augmentation outputs. Keep in mind you should aim to emulate real world images and not cause excessive image distortion with augmentation.

In [80]:
# Initialising ImageGenerator class and pass in the augmentation parameters
datagenerator = ImageDataGenerator(
        rotation_range = 15,
        shear_range = 0.1,
        zoom_range = 0.3,
        horizontal_flip = True,
        brightness_range = (0.03, 0.06))

- In this code block training images which need to be augmented are imported from an S3 bucket.
- Create an S3 bucket and load the images targeted for augmentation into the bucket. 
- Replace the value of the vairable   "bucketname" below to the name of the bucket you have created

- You can also change the value of "i" below to change the number of augmented images to be generated.

- The augmented images are loaded to this notebook in the local directory and can be downloaded, and then loaded to the desired S3 bucket for training. 

In [81]:
#Variables
bucketname = "circuitboard-augmentation-solder-bucket"
contenttype = "image/jpeg"
index = 0
clients3 = boto3.client('s3')
paginator = clients3.get_paginator('list_objects_v2')
result = paginator.paginate(Bucket=bucketname)
    
for page in result:
        if "Contents" in page:
            for key in page[ "Contents" ]:
                photo = key[ "Key" ]
                print(photo)
                s3_object = s3_connection.Object(bucketname,photo)
                s3_response = s3_object.get()
                stream = io.BytesIO(s3_response['Body'].read())
                image1=Image.open(stream)
                print(image1)
                imgWidth, imgHeight = image1.size  
                draw = ImageDraw.Draw(image1) 
                image1.show()
                file_obj = clients3.get_object(Bucket=bucketname, Key=photo)
                # reading the file content in bytes
                file_content = file_obj["Body"].read()
                # creating 1D array from bytes data range between[0,255]
                np_array = np.frombuffer(file_content, np.uint8)
                # decoding array
                image_np = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
                # saving image to tmp (writable) directory
                cv2.imwrite("/tmp/"+photo, image_np)
                photopath = "/tmp/"+photo
                # Loading a sample image 
                img = load_img(photopath) 
                # Converting the input sample image to an array
                x = img_to_array(img)
                # Reshaping the input image
                x = x.reshape((1, ) + x.shape)
                index += 1
                # Generating and saving 5 augmented samples 
                # using the above defined parameters. 
                iteration = 0
                for batch in datagenerator.flow(x, batch_size = 1,
                          save_to_dir ='preview', 
                          save_prefix =photo, save_format ='jpeg'):
                    iteration += 1
                    if iteration > 1:
                        break

test-anomaly_12.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=4000x2667 at 0x7FE600DF82E8>
test-anomaly_13.jpg
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=4000x2667 at 0x7FE600DF8F98>


KeyboardInterrupt: 