In [1]:
from fastai import *
from fastai.vision import *
from PIL import *

import colorcet as cc
cmap_grey = cc.cm.linear_grey_0_100_c0

# Demonstration of  speed and correctness of conversion of PIL Images to tensors

The result is that the proposed method "allPil2tensor" can handle both rgb and 16bit gray images correct

The existing method "Pil2tensor" can only handle jpg images/ 8 bit images

The proposed method is also slighty faster if we include the following conversion of float that takes place in open_image

In [2]:
# generate a test imag
arr = np.random.randint(0, 2**8-1, (224, 224, 3), dtype=np.uint8)  # or np.ones etc.
img = Image.fromarray(np.asarray(arr))

# The pil2tensor methods

In [3]:
# the existing method
def Pil2tensor(image:NPImage)->TensorImage:
    "handles numpy of rgb only - Convert PIL style `image` array to torch style image tensor."
    arr = ByteTensor(torch.ByteStorage.from_buffer(image.tobytes()))
    arr = arr.view(image.size[1], image.size[0], -1)
    return arr.permute(2,0,1)


In [4]:
from numpy import ndarray
NDArray = ndarray
def pil2tensor_new_argtype(image:Union[NPImage,NDArray], dtype)->TensorImage:
    #image: must be a PIL.image or a numpy array. 
    #       Grayscale (single channel) convert to the shape: 1, heigh, widt
    #       rgb converts to a shape of:                      3, width, height
    #dtype: pytorch support: np.double, np.float16, np.int64, np.int32, and np.uint8
    "Convert PIL.Image or numpy.ndarray to a torch tensor formattet for input to a neural network."
    "Usage 1: pil2tensor(Image.open(\"dog.47.jpg\").convert(fmt), dtype).div_(scale)"
    "         where fmt = RGB, L (=8bit for fx masks) or I (= int for fx 16 bit grayscale)"
    "         where scale = 255 for rgb, 65535 for grayscale"
    "         if your data are already at the right scale then no .div is required"
    "Usage 2: if your image is a simple rgb of grayscale the you can do. The following is slightly faster but more risky"
    "         pil2tensor(Image.open(\"dog.47.jpg\"), dtype).div_(scale) with scale as above. "
    "Usage 3: pil2tensor(Image.open(numpy_array,dtype), dtype).div_(scale) if data alredy are in a numpy array"
    a = np.asarray(image)
    if a.ndim==2 : a = np.expand_dims(a,2)    
    a = np.transpose(a, (1, 0, 2))  #transpose width, height to height,width
    a = np.transpose(a, (2, 1, 0))  #move channels to the first positionf
    return torch.from_numpy( a.astype(dtype, copy=False) )


# Speed of pil2tensor

In [5]:
%timeit -r 20 Pil2tensor(img).float().div_(255)

285 µs ± 8.24 µs per loop (mean ± std. dev. of 20 runs, 1000 loops each)


In [6]:
%timeit -r 20 pil2tensor_new_argtype(img,np.float32).div_(255)

111 µs ± 1.93 µs per loop (mean ± std. dev. of 20 runs, 10000 loops each)


# Speed of reading and conversion with convert(RGB)

In [7]:
%timeit -r 50 Pil2tensor(Image.open("dog.47.jpg").convert("RGB")).float().div_(255)

4.04 ms ± 412 µs per loop (mean ± std. dev. of 50 runs, 100 loops each)


In [8]:
%timeit -r 50 pil2tensor_new_argtype(Image.open("dog.47.jpg").convert("RGB"), np.float32).div_(255)

2.94 ms ± 47.2 µs per loop (mean ± std. dev. of 50 runs, 100 loops each)


# Speed of reading and conversion with NO convert(RGB)

In [9]:
%timeit -r 50 Pil2tensor(Image.open("dog.47.jpg")).float().div_(255)

3.6 ms ± 62.2 µs per loop (mean ± std. dev. of 50 runs, 100 loops each)


In [10]:
%timeit -r 50 pil2tensor_new_argtype(Image.open("dog.47.jpg"), np.float32).div_(255)

2.86 ms ± 43.2 µs per loop (mean ± std. dev. of 50 runs, 100 loops each)


# Convert numpy to tensor assuming that the array is already at the right scale

In [11]:
arrfloat  = np.random.rand(224,224,3).astype(np.float32)

In [12]:
%timeit -r 50 pil2tensor_new_argtype(arrfloat,np.float32)

5.44 µs ± 524 ns per loop (mean ± std. dev. of 50 runs, 100000 loops each)


# Is the shape correct

In [13]:
# shape of the image
np.asarray(img).shape

(224, 224, 3)

In [14]:
Pil2tensor(img).shape

torch.Size([3, 224, 224])

In [15]:
pil2tensor_new(img).shape

NameError: name 'pil2tensor_new' is not defined

# Is the content correct for rgb image
Notice that the pixel values are preserved and the tensors size is correct for both conversion methods

In [None]:
rbg16 = Image.open("gray.jpg").convert("RGB")
rbg16

In [None]:
np.asarray(rbg16)[:,:,0]

In [None]:
print(Pil2tensor(rbg16).size())
pil2tensor(rbg16).int()[0]

In [None]:
print(pil2tensor_new_argtype(rbg16, np.int).size())
pil2tensor_new_argtype(rbg16,np.int)[0]

# # Is the the content correct for 16bit grayscale png
Notice that Pil2tensor truncates the 16 bit values to 8 bit and that the tensor size i wrong

allPil2tensor preserve the pixel values and the tensor size is correct

In [None]:
im16bit = Image.open("gray_16bit.png").convert("I")
im16bit

In [None]:
np.asarray(im16bit)

In [None]:
print(Pil2tensor(im16bit).size())
Pil2tensor(im16bit).int()

In [None]:
print(pil2tensor_new_argtype(im16bit,np.int).size())
pil2tensor_new_argtype(im16bit,np.int)

# Show diferent test images in rgb and grascale

In [None]:
extensions = ('.bmp', '.gif', '.icns', '.ico', '.jpg', '.pgm', '.png', '.tga', '.tif')
files      = np.asarray( list( Path.cwd().glob("*.*") ) )
files      = files[[f.suffix in extensions for f in files]] 

ncols = 4
nrows = int( math.ceil(len(files)/float(ncols)) )

fig,axes = plt.subplots(nrows=nrows, ncols=ncols, figsize = (12,12), dpi=100 )
axes     = axes.flatten()
for i,f in enumerate(files):
    im = np.asarray(Image.open(f).convert("RGB"))
    axes[i].imshow(  im )
    axes[i].set_title(f.name)

In [None]:
fig,axes = plt.subplots(nrows=nrows, ncols=ncols, figsize = (12,12), dpi=100 )
axes     = axes.flatten()
for i,f in enumerate(files):
    im = np.asarray(Image.open(f).convert("I")) / 65535.
    axes[i].imshow(  im, cmap=cmap_grey )
    axes[i].set_title(f.name)