Skip to content

Commit

Permalink
Merge branch 'main' into fer2013.py_issue
Browse files Browse the repository at this point in the history
  • Loading branch information
real-ojaswi committed May 20, 2024
2 parents dabe6e3 + 61d97f4 commit 3ef3c4b
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 37 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,7 @@ def run(self):
zip_safe=False,
install_requires=requirements,
extras_require={
"gdown": ["gdown>=4.7.3"],
"scipy": ["scipy"],
},
ext_modules=get_extensions(),
Expand Down
13 changes: 11 additions & 2 deletions test/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,9 @@ def test_pathlib_support(tmpdir):
write_png(img, write_path)


@pytest.mark.parametrize("name", ("gifgrid", "fire", "porsche", "treescap", "treescap-interlaced", "solid2", "x-trans"))
@pytest.mark.parametrize(
"name", ("gifgrid", "fire", "porsche", "treescap", "treescap-interlaced", "solid2", "x-trans", "earth")
)
@pytest.mark.parametrize("scripted", (True, False))
def test_decode_gif(tmpdir, name, scripted):
# Using test images from GIFLIB
Expand All @@ -560,9 +562,16 @@ def test_decode_gif(tmpdir, name, scripted):
# We're not testing against "welcome2" because PIL and GIFLIB disagee on what
# the background color should be (likely a difference in the way they handle
# transparency?)
# 'earth' image is from wikipedia, licensed under CC BY-SA 3.0
# https://creativecommons.org/licenses/by-sa/3.0/
# it allows to properly test for transparency, TOP-LEFT offsets, and
# disposal modes.

path = tmpdir / f"{name}.gif"
url = f"https://sourceforge.net/p/giflib/code/ci/master/tree/pic/{name}.gif?format=raw"
if name == "earth":
url = "https://upload.wikimedia.org/wikipedia/commons/2/2c/Rotating_earth_%28large%29.gif"
else:
url = f"https://sourceforge.net/p/giflib/code/ci/master/tree/pic/{name}.gif?format=raw"
with open(path, "wb") as f:
f.write(requests.get(url).content)

Expand Down
84 changes: 50 additions & 34 deletions torchvision/csrc/io/image/cpu/decode_gif.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,32 +86,19 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
// This check should already done within DGifSlurp(), just to be safe
TORCH_CHECK(num_images > 0, "GIF file should contain at least one image!");

// Note:
// The GIF format has this notion of "canvas" and "canvas size", where each
// image could be displayed on the canvas at different offsets, forming a
// mosaic/picture wall like so:
//
// <--- canvas W --->
// ------------------------ ^
// | | | |
// | img1 | img3 | |
// | |------------| canvas H
// |---------- | |
// | img2 | img4 | |
// | | | |
// ------------------------ v
// The GifLib docs indicate that this is mostly vestigial
// (https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html), and
// modern viewers ignore the canvas size as well as image offsets. Hence,
// we're ignoring that too:
// - We're ignoring the canvas width and height and assume that the shape of
// the canvas and of all images is the shape of the first image.
// - We're enforcing that all images have the same shape.
// - Left and Top offsets of each image are ignored as well and assumed to be
// 0.

auto out_h = gifFile->SavedImages[0].ImageDesc.Height;
auto out_w = gifFile->SavedImages[0].ImageDesc.Width;
GifColorType bg = {0, 0, 0};
if (gifFile->SColorMap) {
bg = gifFile->SColorMap->Colors[gifFile->SBackGroundColor];
}

// The GIFLIB docs say that the canvas's height and width are potentially
// ignored by modern viewers, so to be on the safe side we set the output
// height to max(canvas_heigh, first_image_height). Same for width.
// https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html
auto out_h =
std::max(gifFile->SHeight, gifFile->SavedImages[0].ImageDesc.Height);
auto out_w =
std::max(gifFile->SWidth, gifFile->SavedImages[0].ImageDesc.Width);

// We output a channels-last tensor for consistency with other image decoders.
// Torchvision's resize tends to be is faster on uint8 channels-last tensors.
Expand All @@ -121,30 +108,59 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
auto out = torch::empty(
{int64_t(num_images), 3, int64_t(out_h), int64_t(out_w)}, options);
auto out_a = out.accessor<uint8_t, 4>();

for (int i = 0; i < num_images; i++) {
const SavedImage& img = gifFile->SavedImages[i];
const GifImageDesc& desc = img.ImageDesc;
TORCH_CHECK(
desc.Width == out_w && desc.Height == out_h,
"All images in the gif should have the same dimensions.");

GraphicsControlBlock gcb;
DGifSavedExtensionToGCB(gifFile, i, &gcb);

const GifImageDesc& desc = img.ImageDesc;
const ColorMapObject* cmap =
desc.ColorMap ? desc.ColorMap : gifFile->SColorMap;
TORCH_CHECK(
cmap != nullptr,
"Global and local color maps are missing. This should never happen!");

// When going from one image to another, there is a "disposal method" which
// specifies how to handle the transition. E.g. DISPOSE_DO_NOT means that
// the current image should essentially be drawn on top of the previous
// canvas. The pixels of that previous canvas will appear on the new one if
// either:
// - a pixel is transparent in the current image
// - the current image is smaller than the canvas, hence exposing its pixels
// The "background" disposal method means that the current canvas should be
// set to the background color.
// We only support these 2 modes and default to "background" when the
// disposal method is unspecified, or when it's set to "DISPOSE_PREVIOUS"
// which according to GIFLIB is not widely supported.
// (https://giflib.sourceforge.net/whatsinagif/animation_and_transparency.html).
if (i > 0 && gcb.DisposalMode == DISPOSE_DO_NOT) {
out[i] = out[i - 1];
} else {
// Background. If bg wasn't defined, it will be (0, 0, 0)
for (int h = 0; h < gifFile->SHeight; h++) {
for (int w = 0; w < gifFile->SWidth; w++) {
out_a[i][0][h][w] = bg.Red;
out_a[i][1][h][w] = bg.Green;
out_a[i][2][h][w] = bg.Blue;
}
}
}

for (int h = 0; h < desc.Height; h++) {
for (int w = 0; w < desc.Width; w++) {
auto c = img.RasterBits[h * desc.Width + w];
if (c == gcb.TransparentColor) {
continue;
}
GifColorType rgb = cmap->Colors[c];
out_a[i][0][h][w] = rgb.Red;
out_a[i][1][h][w] = rgb.Green;
out_a[i][2][h][w] = rgb.Blue;
out_a[i][0][h + desc.Top][w + desc.Left] = rgb.Red;
out_a[i][1][h + desc.Top][w + desc.Left] = rgb.Green;
out_a[i][2][h + desc.Top][w + desc.Left] = rgb.Blue;
}
}
}

out = out.squeeze(0); // remove batch dim if there's only one image

DGifCloseFile(gifFile, &error);
Expand Down
2 changes: 1 addition & 1 deletion torchvision/transforms/v2/functional/_color.py
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,7 @@ def permute_channels(inpt: torch.Tensor, permutation: List[int]) -> torch.Tensor
Example:
>>> rgb_image = torch.rand(3, 256, 256)
>>> bgr_image = F.permutate_channels(rgb_image, permutation=[2, 1, 0])
>>> bgr_image = F.permute_channels(rgb_image, permutation=[2, 1, 0])
Args:
permutation (List[int]): Valid permutation of the input channel indices. The index of the element determines the
Expand Down

0 comments on commit 3ef3c4b

Please sign in to comment.