In [3]:
import itertools
import numbers
import warnings

import dask
import dask.array
import dask.delayed
import numpy
import pims


def _read_frame(fn, i):
    with pims.open(fn) as imgs:
        return numpy.asanyarray(imgs[i])

    
def imread(fname, nframes=1):
    """
    Read image data into a Dask Array.

    Provides a simple, fast mechanism to ingest image data into a
    Dask Array.

    Parameters
    ----------
    fname : str
        A glob like string that may match one or multiple filenames.
    nframes : int, optional
        Number of the frames to include in each chunk (default: 1).

    Returns
    -------
    array : dask.array.Array
        A Dask Array representing the contents of all image files.
    """

    if not isinstance(nframes, numbers.Integral):
        raise ValueError("`nframes` must be an integer.")
    if (nframes != -1) and not (nframes > 0):
        raise ValueError("`nframes` must be greater than zero.")

    with pims.open(fname) as imgs:
        shape = (len(imgs),) + imgs.frame_shape
        dtype = numpy.dtype(imgs.pixel_type)

    if nframes == -1:
        nframes = shape[0]

    if nframes > shape[0]:
        warnings.warn(
            "`nframes` larger than number of frames in file."
            " Will truncate to number of frames in file.",
            RuntimeWarning
        )
    elif shape[0] % nframes != 0:
        warnings.warn(
            "`nframes` does not nicely divide number of frames in file."
            " Last chunk will contain the remainder.",
            RuntimeWarning
        )

    lower_iter, upper_iter = itertools.tee(itertools.chain(
        range(0, shape[0], nframes),
        [shape[0]]
    ))
    next(upper_iter)

    a = []
    for i, j in zip(lower_iter, upper_iter):
        a.append(dask.array.from_delayed(
            dask.delayed(_read_frame)(fname, slice(i, j)),
            (j - i,) + shape[1:],
            dtype
        ))
    a = dask.array.concatenate(a)

    return a

In [7]:
local_path = '../data/hymenoptera/train/bees/'

In [9]:
local_array = imread(local_path + '*.jpg')

In [10]:
remote_path = 's3://sofroniewn/image-data/bees/'

In [12]:
remote_array = imread(remote_path + '*.jpg')

  warn(message)
  warn(message)
  warn(message)
  warn(message)


UnknownFormatError: All handlers returned exceptions:
<class 'pims.image_reader.ImageReader'> errored: No such file: '/Users/nicholassofroniew/Github/image-demos/notebooks/s3:/sofroniewn/image-data/bees/*.jpg'
<class 'pims.image_reader.ImageReaderND'> errored: No such file: '/Users/nicholassofroniew/Github/image-demos/notebooks/s3:/sofroniewn/image-data/bees/*.jpg'
<class 'pims.imageio_reader.ImageIOReader'> errored: No such file: '/Users/nicholassofroniew/Github/image-demos/notebooks/s3:/sofroniewn/image-data/bees/*.jpg'
<class 'pims.bioformats.BioformatsReader'> errored: The file "s3://sofroniewn/image-data/bees/*.jpg" does not exist.


In [None]:
with fs.open('my-bucket/my-file.txt', 'rb') as f:
...     print(f.read())

In [15]:
import imageio

In [17]:
with open(local_path + '17209602_fe5a5a746f.jpg', 'rb') as f:
    array = imageio.imread(f.read(), format='jpg')
    print(array.shape)

(412, 500, 3)


In [26]:
import s3fs
fs = s3fs.S3FileSystem()
files = fs.ls('sofroniewn/image-data/bees/')
with fs.open(files[1], mode='rb') as f:
    array = imageio.imread(f.read(), format='jpg')
    print(array.shape)

In [27]:
files[0]

'sofroniewn/image-data/bees/1092977343_cb42b38d62.jpg'

In [31]:
with fs.open(files[1], mode='rb') as f:
    array = imageio.imread(f.read(), format='jpg')
    print(array.shape)

(333, 500, 3)
