# Async IO

In [None]:
from load_url import *
import nest_asyncio
nest_asyncio.apply()

def load_urls(file_list, url_col, parent_directory):
    loop = asyncio.get_event_loop()  # event loop
    future = asyncio.ensure_future(fetch_all(file_list, url_col, parent_directory))
    loop.run_until_complete(future)
    file_list = future.result()[0]
    image_arr_list = future.result()[1]
    return file_list, image_arr_list

In [None]:
url_col = 'URL'
parent_directory = 'images/'

total = df.shape[0]
batch_size = 200
for i in range(0, total, batch_size):
    input_dict = df[i:i+batch_size].to_dict(orient='records')
    files, _ = load_urls(input_dict, url_col, parent_directory)

# futures (ThreadPoolExecutor)

In [None]:
# load image from url
def load_image(im_url):
    response = requests.get(im_url, timeout=3, verify=False , proxies=proxyDict)
    im = Image.open(BytesIO(response.content))
    return im


# convert image to RGB mode if it's not
def change_image_mode(im, change_to='RGB'):
    if im.mode == 'RGBA':
        x = np.array(im)
        r, g, b, a = np.rollaxis(x, axis=-1)
        r[a == 0] = 255
        g[a == 0] = 255
        b[a == 0] = 255
        x = np.dstack([r, g, b])
        im = Image.fromarray(x, change_to)
    else:
        im = im.convert(change_to)
    return im


# resize image to different sizes. Return a list
def resize_image(im, resizes):
    im_arrays = []
    for size in resizes:
        try:
            im_resized = im.resize(size, Image.ANTIALIAS)
            im_arr = np.asarray(im_resized)
        except Exception as e:
            logging.warning("could not resize to {}".format(size[0], size[1]))
            im_arr = None
        else:
            if im_arr.shape != (size[0], size[1], 3):
                logging.warning("could not resize to {}".format(size[0], size[1]))
                im_arr = None
        im_arrays.append(im_arr)

    return im_arrays


# normalize image
def normalize_image(im_arrays):
    # input im_arrays is a list
    norm_images_for_model = np.array(im_arrays).astype(float)
    norm_images_for_model /= 255.0
    return norm_images_for_model

In [None]:
# process image and catch all exceptions
# return an numpy array list and a string
def preprocess_image(im, resizes, min_size=(128, 128), change_to='RGB'):
    # convert image
    im_mode = im.mode
    if im_mode == 'RGB':
        pass
    elif im_mode in ['RGBA', 'P', 'L', 'CMYK' ]:
        try:
            im = change_image_mode(im, change_to=change_to)
        except:
            err_message = 'Image mode conversion failed from %s to %s' %(im_mode, change_to)
            return None, err_message
    else:
        err_message = "Image processing failed - invalid image mode: {}".format(im.mode)
        return None, err_message

    # check image size and resize images
    w, h = im.size
    if w < min_size[0] and h < min_size[1]:
        return None, "Image is too small: {}, {}".format(w, h)

    im_array = resize_image(im, resizes)

    return im_array, None

In [None]:
# load several images in parralel.
# return successfully loaded image only
def load_images_parallel_futures(product_imagedata, resize_to, min_size=(128, 128)):
    im_metadata = list()
    images_for_model = list()
    im_metadata_err = list()
    # Load images in parallel
    n = len(product_imagedata)

    # We can use a with statement to ensure threads are cleaned up promptly
    with futures.ThreadPoolExecutor(max_workers=n) as executor:  # io bounds task
        # Start the load operations and mark each future with its URL
        future_to_imdata = {executor.submit(load_image, im_data['url']):
                                im_data for im_data in product_imagedata}
        for future in futures.as_completed(future_to_imdata):
            next_data = future_to_imdata[future]
            try:
                loaded_image_data = future.result()
            except Exception as e:
                logging.warn('%r generated an exception: %s' % (next_data, e))
                im_metadata_err.append(next_data)
            else:
                if not loaded_image_data:
                    logging.error("loaded image is invalid")
                    im_metadata_err.append(next_data)
                    continue

                image_arr, err = preprocess_image(loaded_image_data, resizes=[resize_to], min_size=min_size)
                if err:
                    logging.warning(err)
                else:
                    im_metadata.append(next_data)
                    images_for_model.append(image_arr[0])

    norm_images_for_model = normalize_image(images_for_model)

    return (im_metadata, norm_images_for_model), im_metadata_err