Skip to content

Commit

Permalink
Profile loading png as np
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonathan Tripp committed Nov 16, 2021
1 parent f791bed commit 6ac3048
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 2 deletions.
90 changes: 89 additions & 1 deletion docs/source/loading_images.md
@@ -1,4 +1,4 @@
# Loading Images
# Loading Images as Torch Tensors

There are many libraries available that can load png images. Simple examples were made using most of them and time of execution was compared. The goal was to load a png file, either RGB or greyscale, into a torch.Tensor.

Expand Down Expand Up @@ -286,3 +286,91 @@ Similarly, with greyscale versions of the RGB images:
| read_image_torch2 | **7.73153** |

The recommendation therefore is to use matplotlib `mpimg.imread` to load the image and `TF.to_tensor` to transform the numpy array to a torch tensor. This is almost as fast as loading the data directly in a native numpy or torch format.

# Loading Images as Numpy Arrays

Alternatively, a numpy array may be required with an equivalent form to PIL:

* shape [Height, Width, 3] (for RGB images), in RGB order or [Height, Width] (for greyscale images);
* dtype float;
* range between 0.0 and 255.0.

## Pillow

If the image is known to be a png then a shortcut can be taken, which is quicker:

```python
from pathlib import Path

import numpy as np
import PIL.PngImagePlugin
from PIL import Image


def read_image_pillow2(input_filename: Path) -> np.array: # type: ignore
"""
Read an image file with pillow and return a numpy array.
:param input_filename: Source image file path.
:return: numpy array of shape (H, W), (H, W, 3).
"""
with Image.open(input_filename) as pil_png:
return np.asarray(pil_png, np.float)


def read_image_pillow3(input_filename: Path) -> np.array: # type: ignore
"""
Read an image file with pillow and return a numpy array.
:param input_filename: Source image file path.
:return: numpy array of shape (H, W), (H, W, 3).
"""
with PIL.PngImagePlugin.PngImageFile(input_filename) as pil_png:
return np.asarray(pil_png, np.float)
```

## SciPy

Similarly, using SciPy:

```python
from pathlib import Path

import imageio
import numpy as np


def read_image_scipy2(input_filename: Path) -> np.array: # type: ignore
"""
Read an image file with scipy and return a numpy array.
:param input_filename: Source image file path.
:return: numpy array of shape (H, W), (H, W, 3).
"""
numpy_array = imageio.imread(input_filename).astype(np.float)
return numpy_array
```

# Results

The three above methods were tested against the same images as above.

## RGB Images

For 61 RGB images of size 224 x 224 pixels and 61 of size 180 x 224 pixels, repeated 10 times, there are the following timings:

| Function | Total time (s) |
|------------------------|----------------|
| read_image_pillow2 | 44.8641 |
| read_image_pillow3 | 18.1665 |
| read_image_scipy2 | 51.8801 |

## Greyscale Images

Similarly, with greyscale versions of the RGB images:

| Function | Total time (s) |
|------------------------|----------------|
| read_image_pillow2 | 38.3468 |
| read_image_pillow3 | 14.664 |
| read_image_scipy2 | 39.6123 |
Expand Up @@ -10,6 +10,7 @@
import imageio
import matplotlib.image as mpimg
import numpy as np
import PIL.PngImagePlugin
import SimpleITK as sitk
import torch
import torchvision.transforms.functional as TF
Expand Down Expand Up @@ -146,6 +147,28 @@ def read_image_pillow(input_filename: Path) -> torch.Tensor:
return torch_tensor


def read_image_pillow2(input_filename: Path) -> np.array:
"""
Read an image file with pillow and return a numpy array.
:param input_filename: Source image file path.
:return: numpy array of shape (H, W), (H, W, 3).
"""
with Image.open(input_filename) as pil_png:
return np.asarray(pil_png, np.float)


def read_image_pillow3(input_filename: Path) -> np.array:
"""
Read an image file with pillow and return a numpy array.
:param input_filename: Source image file path.
:return: numpy array of shape (H, W), (H, W, 3).
"""
with PIL.PngImagePlugin.PngImageFile(input_filename) as pil_png:
return np.asarray(pil_png, np.float)


def read_image_scipy(input_filename: Path) -> torch.Tensor:
"""
Read an image file with scipy and return a torch.Tensor.
Expand All @@ -158,6 +181,17 @@ def read_image_scipy(input_filename: Path) -> torch.Tensor:
return torch_tensor


def read_image_scipy2(input_filename: Path) -> np.array:
"""
Read an image file with scipy and return a numpy array.
:param input_filename: Source image file path.
:return: numpy array of shape (H, W), (H, W, 3).
"""
numpy_array = imageio.imread(input_filename).astype(np.float)
return numpy_array


def read_image_sitk(input_filename: Path) -> torch.Tensor:
"""
Read an image file with SimpleITK and return a torch.Tensor.
Expand Down Expand Up @@ -263,6 +297,31 @@ def check_loaded_image(type: str, image_file: Path, tensor: torch.Tensor) -> Non
assert torch.equal(tensor, reference_tensor)


def check_loaded_image2(type: str, image_file: Path, im2: np.ndarray) -> None:
"""
Check that an image loaded as a numpy array has the expected forat, size, and value range.
:param type: Label for printing progress.
:param image_file: Path to reference png.
:param im2: Loaded numpy array.
:return: None.
"""
im = Image.open(image_file)
source_greyscale = im.mode == 'L'
width, height = im.size
print(f"Testing file: {image_file}, type: {type}, format: {im.format}, size: {im.size}, mode: {im.mode}")
assert isinstance(im2, np.ndarray)
assert im2.dtype == np.float
if source_greyscale:
assert im2.shape == (height, width)
else:
assert im2.shape == (height, width, 3)
assert np.max(im2) <= 255.0
assert np.min(im2) >= 0.0
im_data = np.asarray(im, np.float)
assert np.array_equal(im_data, im2)


def mount_and_convert_source_files(
dataset: FileDataset,
output_folder: Path,
Expand Down Expand Up @@ -306,6 +365,7 @@ def run_profiling(
output_folder: Path,
source_options: List[str],
png_libs: List[Tuple[str, Callable[[Path], torch.Tensor]]],
png2_libs: List[Tuple[str, Callable[[Path], np.array]]],
bin_libs: List[Tuple[str, str, Callable[[torch.Tensor, Path], None], Callable[[Path], torch.Tensor]]]) -> None:
"""
Loop through multiple repeats of each source type, loading the image file and processing it with each
Expand All @@ -329,6 +389,10 @@ def run_profiling(
tensor = op(image_file)
check_loaded_image(lib, image_file, tensor)

for lib, op in png2_libs:
nd = op(image_file)
check_loaded_image2(lib, image_file, nd)

for folder, suffix, _, op in bin_libs:
target_folder = output_folder / folder / source_option
native_file = target_folder / image_file.with_suffix(suffix).name
Expand All @@ -340,6 +404,7 @@ def wrap_run_profiling(
repeats: int,
output_folder: Path,
png_libs: List[Tuple[str, Callable[[Path], torch.Tensor]]],
png2_libs: List[Tuple[str, Callable[[Path], np.array]]],
bin_libs: List[Tuple[str, str, Callable[[torch.Tensor, Path], None], Callable[[Path], torch.Tensor]]],
profile_name: str,
profile_source_options: List[str]) -> None:
Expand All @@ -364,6 +429,7 @@ def curry_run_profiling() -> None:
output_folder,
profile_source_options,
png_libs,
png2_libs,
bin_libs)

"""
Expand All @@ -375,7 +441,10 @@ def curry_run_profiling() -> None:
lp.add_function(read_image_opencv)
lp.add_function(read_image_opencv2)
lp.add_function(read_image_pillow)
lp.add_function(read_image_pillow2)
lp.add_function(read_image_pillow3)
lp.add_function(read_image_scipy)
lp.add_function(read_image_scipy2)
lp.add_function(read_image_sitk)
lp.add_function(read_image_skimage)
lp.add_function(read_image_torch)
Expand Down Expand Up @@ -413,6 +482,12 @@ def main() -> None:
# ("torch", read_image_torch),
]

png2_libs: List[Tuple[str, Callable[[Path], np.array]]] = [
("pillow2", read_image_pillow2),
("pillow3", read_image_pillow3),
("scipy2", read_image_scipy2),
]

bin_libs: List[Tuple[str, str, Callable[[torch.Tensor, Path], None], Callable[[Path], torch.Tensor]]] = [
("pt", ".pt", write_image_torch2, read_image_torch2),
("npy", ".npy", write_image_numpy, read_image_numpy),
Expand All @@ -435,7 +510,13 @@ def main() -> None:
}

for profile_name, profile_source_options in profile_sets.items():
wrap_run_profiling(10, output_folder, png_libs, bin_libs, profile_name, profile_source_options)
wrap_run_profiling(10,
output_folder,
png_libs,
png2_libs,
bin_libs,
profile_name,
profile_source_options)


if __name__ == '__main__':
Expand Down

1 comment on commit 6ac3048

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filename Stmts Miss Cover Missing
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_azure/init.py 4 0 100.00%
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_azure/datasets.py 128 0 100.00%
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_azure/himl.py 177 1 99.44% 530
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_azure/himl_download.py 25 9 64.00% 41-54
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_azure/himl_tensorboard.py 56 4 92.86% 87-89,101
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_azure/utils.py 505 28 94.46% 192,286,331,334,336,343,385,402,406,426,432,448,452,488,495,555,1001,1151,1198-1213
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_ml/init.py 0 0 100.00%
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_ml/data/init.py 0 0 100.00%
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_ml/losses/init.py 0 0 100.00%
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_ml/networks/init.py 0 0 100.00%
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_ml/networks/blocks/init.py 0 0 100.00%
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_ml/networks/layers/init.py 0 0 100.00%
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_ml/networks/nets/init.py 0 0 100.00%
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_ml/utils/init.py 3 0 100.00%
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_ml/utils/diagnostics.py 123 0 100.00%
/opt/hostedtoolcache/Python/3.7.12/x64/lib/python3.7/site-packages/health_ml/utils/logging.py 157 2 98.73% 70,205
TOTAL 1178 44 96.26%

Please sign in to comment.