In [3]:
!pip install -Uqq fastbook
import fastbook

In [4]:
#hide
from fastbook import *
from fastai.vision.widgets import *

To download images with Bing Image Search, sign up at [Microsoft Azure](https://azure.microsoft.com/en-us/services/cognitive-services/bing-web-search-api/) for a free account. You will be given a key, which you can copy and enter in a cell as follows (replacing 'XXX' with your key and executing it):

In [None]:
key = os.environ.get('AZURE_SEARCH_KEY', 'XXX')


In [None]:
search_images_bing

In [None]:
results = search_images_bing(key, 'grizzly bear')
ims = results.attrgot('contentUrl')
len(ims)


We've successfully downloaded the URLs of 150 grizzly bears (or, at least, images that Bing Image Search finds for that search term).

**NB**: there's no way to be sure exactly what images a search like this will find. The results can change over time. We've heard of at least one case of a community member who found some unpleasant pictures of dead bears in their search results. You'll receive whatever images are found by the web search engine. If you're running this at work, or with kids, etc, then be cautious before you display the downloaded images.

Let's look at one:

In [None]:
#hide
ims = ['http://3.bp.blogspot.com/-S1scRCkI3vY/UHzV2kucsPI/AAAAAAAAA-k/YQ5UzHEm9Ss/s1600/Grizzly%2BBear%2BWildlife.jpg']

In [None]:
dest = 'images/grizzly.jpg'
download_url(ims[0], dest)

In [None]:
im = Image.open(dest)
im.to_thumb(128,128)


In [None]:
lable_types = 'aws logo','aws jumper','aws tshirt', 'aws hat', 'people'
path = Path('logoDetector')

In [None]:
if not path.exists():
    path.mkdir()
    for o in bear_types:
        dest = (path/o)
        dest.mkdir(exist_ok=True)
        results = search_images_bing(key, f'{o} bear')
        download_images(dest, urls=results.attrgot('contentUrl'))

Our folder has image files, as we'd expect:

In [None]:
fns = get_image_files(path)
fns


> j: I just love this about working in Jupyter notebooks! It's so easy to gradually build what I want, and check my work every step of the way. I make a _lot_ of mistakes, so this is really helpful to me...

Often when we download files from the internet, there are a few that are corrupt. Let's check:

In [None]:
failed = verify_images(fns)
failed


To remove all the failed images, you can use `unlink` on each of them. Note that, like most fastai functions that return a collection, `verify_images` returns an object of type `L`, which includes the `map` method. This calls the passed function on each element of the collection:

In [None]:
failed.map(Path.unlink);

In [None]:
s3 = boto3.resource('s3')

In [None]:
def file_exists_in_s3(s3bucket, s3key):
    try:
        s3.meta.client.head_object(Bucket=s3bucket, Key=s3key)
        return True
    except ClientError as e:
        if e.response["Error"]["Code"] == '404':
            return False
        else:
            print (e.response["Error"])
            raise e

In [None]:
def download_file_from_s3(uri, working_dir):
    fname = uri.split('/')[-1]
    image_local_path = os.path.join(working_dir, fname)
    s3_paths = urlparse(uri)
    s3_bucket = s3_paths.netloc
    s3_key = s3_paths.path.lstrip('/')
    s3.Bucket(s3_bucket).download_file(s3_key, image_local_path)
    return image_local_path

In [None]:

def upload_file_to_s3(image_local_path, s3_bucket, s3_prefix):
    fname = image_local_path.split('/')[-1]
    s3_key = s3_prefix + "/" + fname
    s3.Bucket(s3_bucket).upload_file(image_local_path, s3_prefix + "/" + fname)
    logger.info("wrote to s3://{}/{}".format(s3_bucket, s3_key))

In [None]:

from enum import Enum

class Transform(Enum):
    X_FLIP = 0
    Y_FLIP = 1
    CW_ROTATE = 2
    CCW_ROTATE = 3
    

def flip(image_local_path, working_dir, x_axis=True):
    img = imageio.imread(image_local_path)
    flipped_image = cv2.flip(img, 1 if x_axis else 0)
    fname = os.path.split(image_local_path)[1]

    # If original file is "test-1.jpg", a x-flipped file will be named "test-1-x-flip.jpg"
    flipped_fname_suffix = '-x-flip.' if x_axis else '-y-flip.'
    flipped_fname = fname.replace('.', flipped_fname_suffix)

    flipped_fpath = os.path.join(working_dir, flipped_fname)
    imageio.imwrite(flipped_fpath, flipped_image)
    logger.info("wrote image to {}".format(flipped_fpath))
    return flipped_fpath


def rotate(image_local_path, working_dir, cw=True):
    img = imageio.imread(image_local_path)
    rotated = cv2.transpose(img)
    rotated = cv2.flip(rotated, 1 if cw else 0)
    fname = os.path.split(image_local_path)[1]

    # If original file is "test-1.jpg", a cw rotated file will be named "test-1-cw-rotate.jpg"
    rotated_fname_suffix = "-cw-rotate." if cw else '-ccw-rotate.'
    rotated_fname = fname.replace('.', rotated_fname_suffix)

    rotated_fpath = os.path.join(working_dir, rotated_fname)
    imageio.imwrite(rotated_fpath, rotated)
    logger.info("wrote image to {}".format(rotated_fpath))
    return rotated_fpath


def transform_and_upload(transformation, image_path, working_directory, s3_bucket, s3_prefix, cleanup):
    
    if transformation is Transform.X_FLIP:
        transformed = flip(image_path, working_directory, x_axis=True)
    elif transformation is Transform.Y_FLIP:
        transformed = flip(image_path, working_directory, x_axis=False)
    elif transformation is Transform.CW_ROTATE:
        transformed = rotate(image_path, working_directory, cw=True)
    else:
        transformed = rotate(image_path, working_directory, cw=False)
    utils.upload_file(transformed, s3_bucket, s3_prefix)
    if cleanup:
        os.remove(transformed)


def transform_img(img_s3_uri, working_directory, output_s3_bucket, cleanup=True):
    o = urlparse(img_s3_uri)
    s3_bucket = o.netloc
    s3_key = o.path.lstrip('/')

    img_fname = os.path.split(s3_key)[1]
    img_id = os.path.splitext(img_fname)[0]
    logger.info("image: {}".format(img_id))

    img_ccw = 'frames/ccw/' + img_id + '-ccw-rotate.jpg'
    
    if utils.exists_in_s3(output_s3_bucket, img_ccw):
        logger.info("augmentation already exists: s3://{}/{}".format(output_s3_bucket, img_ccw))
    else:
        logger.info("augmentation does not exist: s3://{}/{}".format(output_s3_bucket, img_ccw))
        image_path = utils.download_file(img_s3_uri, working_directory)
        transform_and_upload(transformation=Transform.X_FLIP, image_path=image_path,
                             working_directory=working_directory, s3_bucket=output_s3_bucket, s3_prefix='frames/x-flipped',
                             cleanup=cleanup)
        transform_and_upload(transformation=Transform.Y_FLIP, image_path=image_path,
                             working_directory=working_directory, s3_bucket=output_s3_bucket, s3_prefix='frames/y-flipped',
                             cleanup=cleanup)
        transform_and_upload(transformation=Transform.CW_ROTATE, image_path=image_path,
                             working_directory=working_directory, s3_bucket=output_s3_bucket, s3_prefix='frames/cw',
                             cleanup=cleanup)
        transform_and_upload(transformation=Transform.CCW_ROTATE, image_path=image_path,
                             working_directory=working_directory, s3_bucket=output_s3_bucket, s3_prefix='frames/ccw',
                             cleanup=cleanup)
        if cleanup:
            os.remove(image_path)