Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experimental: OpenMP #1013

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ install-coverage:
CFLAGS="-coverage" python setup.py build_ext install
python selftest.py --installed

install-openmp:
python setup.py build_ext --enable-openmp install
python selftest.py --installed

debug:
# make a debug version if we don't have a -dbg python. Leaves in symbols
# for our stuff, kills optimization, and redirects to dev null so we
Expand Down
Binary file added Tests/5k_image.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
36 changes: 36 additions & 0 deletions Tests/mpbench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from PIL import Image
import time
import math

def timeit(n, f, *args, **kwargs):
def run():
start = time.time()
f(*args, **kwargs)
return time.time() - start

runs = [run() for _ in range(n)]
mean = sum(runs)/float(n)
stddev = math.sqrt(sum((r-mean)**2 for r in runs)/float(n))
return {'mean':mean,
'median': sorted(runs)[int(n/2)],
'min': min(runs),
'max': max(runs),
'stddev':stddev,
'dev_pct': stddev/mean*100.0
}

#return min(run() for _ in range(n))

n = 400
image = Image.open('5k_image.jpg').copy()
print 'warmup {mean:.4}'.format(**timeit(n // 4, image.im.resize, (2048, 1152), Image.ANTIALIAS))
print "%s runs"%n
print "Interpolation | Size | min | max | mean | median| stddev | Dev %"
print "--------- | --------- | ----- | ----- | ----- | ----- | ----- | ----"
print 'Antialias | 2048x1152 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (2048, 1152), Image.ANTIALIAS))
print 'Antialias | 320x240 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (320, 240), Image.ANTIALIAS))
print 'Bicubic | 2048x1152 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (2048, 1152), Image.BICUBIC))
print 'Bicubic | 320x240 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (320, 240), Image.BICUBIC))
print 'Bilinear | 2048x1152 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (2048, 1152), Image.BILINEAR))
print 'Bilinear | 320x240 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (320, 240), Image.BILINEAR))

9 changes: 6 additions & 3 deletions libImaging/Geometry.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
Rotating in chunks that fit in the cache can speed up rotation
8x on a modern CPU. A chunk size of 128 requires only 65k and is large enough
that the overhead from the extra loops are not apparent. */

#define ROTATE_CHUNK 512
#define ROTATE_SMALL_CHUNK 8

Expand Down Expand Up @@ -163,11 +164,13 @@ ImagingTranspose(Imaging imOut, Imaging imIn)

ImagingSectionEnter(&cookie);

if (imIn->image8)
if (imIn->image8){
#pragma omp parallel for private(x,y,xx,yy,xxx,yyy,yysize,xxsize,xxxsize,yyysize) shared(imIn,imOut) default(none) collapse(2)
TRANSPOSE(UINT8, image8)
else
} else {
#pragma omp parallel for private(x,y,xx,yy,xxx,yyy,yysize,xxsize,xxxsize,yyysize) shared(imIn,imOut) default(none) collapse(2)
TRANSPOSE(INT32, image32)

}
ImagingSectionLeave(&cookie);

#undef TRANSPOSE
Expand Down
16 changes: 12 additions & 4 deletions libImaging/Resample.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,10 @@ ImagingResampleHorizontal_8bpc(Imaging imOut, Imaging imIn, int offset,

ImagingSectionEnter(&cookie);
if (imIn->image8) {
#pragma omp parallel for private(k,x,xx,yy,xmin,xmax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds,offset) default(none) collapse(2)
for (yy = 0; yy < imOut->ysize; yy++) {
for (xx = 0; xx < imOut->xsize; xx++) {
xmin = bounds[xx * 2 + 0];
xmin = bounds[xx * 2 + 0];
xmax = bounds[xx * 2 + 1];
k = &kk[xx * ksize];
ss0 = 1 << (PRECISION_BITS -1);
Expand All @@ -254,7 +255,8 @@ ImagingResampleHorizontal_8bpc(Imaging imOut, Imaging imIn, int offset,
}
} else if (imIn->type == IMAGING_TYPE_UINT8) {
if (imIn->bands == 2) {
for (yy = 0; yy < imOut->ysize; yy++) {
#pragma omp parallel for private(k,x,xx,yy,xmin,xmax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds,offset) default(none) collapse(2)
for (yy = 0; yy < imOut->ysize; yy++) {
for (xx = 0; xx < imOut->xsize; xx++) {
xmin = bounds[xx * 2 + 0];
xmax = bounds[xx * 2 + 1];
Expand All @@ -269,7 +271,8 @@ ImagingResampleHorizontal_8bpc(Imaging imOut, Imaging imIn, int offset,
}
}
} else if (imIn->bands == 3) {
for (yy = 0; yy < imOut->ysize; yy++) {
#pragma omp parallel for private(k,x,xx,yy,xmin,xmax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds,offset) default(none) collapse(2)
for (yy = 0; yy < imOut->ysize; yy++) {
for (xx = 0; xx < imOut->xsize; xx++) {
xmin = bounds[xx * 2 + 0];
xmax = bounds[xx * 2 + 1];
Expand All @@ -285,6 +288,7 @@ ImagingResampleHorizontal_8bpc(Imaging imOut, Imaging imIn, int offset,
}
}
} else {
#pragma omp parallel for private(k,x,xx,yy,xmin,xmax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds,offset) default(none) collapse(2)
for (yy = 0; yy < imOut->ysize; yy++) {
for (xx = 0; xx < imOut->xsize; xx++) {
xmin = bounds[xx * 2 + 0];
Expand Down Expand Up @@ -322,7 +326,8 @@ ImagingResampleVertical_8bpc(Imaging imOut, Imaging imIn, int offset,

ImagingSectionEnter(&cookie);
if (imIn->image8) {
for (yy = 0; yy < imOut->ysize; yy++) {
#pragma omp parallel for private(k,y,xx,yy,ymin,ymax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds) default(none)
for (yy = 0; yy < imOut->ysize; yy++) {
k = &kk[yy * ksize];
ymin = bounds[yy * 2 + 0];
ymax = bounds[yy * 2 + 1];
Expand All @@ -335,6 +340,7 @@ ImagingResampleVertical_8bpc(Imaging imOut, Imaging imIn, int offset,
}
} else if (imIn->type == IMAGING_TYPE_UINT8) {
if (imIn->bands == 2) {
#pragma omp parallel for private(k,y,xx,yy,ymin,ymax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds) default(none)
for (yy = 0; yy < imOut->ysize; yy++) {
k = &kk[yy * ksize];
ymin = bounds[yy * 2 + 0];
Expand All @@ -350,6 +356,7 @@ ImagingResampleVertical_8bpc(Imaging imOut, Imaging imIn, int offset,
}
}
} else if (imIn->bands == 3) {
#pragma omp parallel for private(k,y,xx,yy,ymin,ymax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds) default(none)
for (yy = 0; yy < imOut->ysize; yy++) {
k = &kk[yy * ksize];
ymin = bounds[yy * 2 + 0];
Expand All @@ -366,6 +373,7 @@ ImagingResampleVertical_8bpc(Imaging imOut, Imaging imIn, int offset,
}
}
} else {
#pragma omp parallel for private(k,y,xx,yy,ymin,ymax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds) default(none)
for (yy = 0; yy < imOut->ysize; yy++) {
k = &kk[yy * ksize];
ymin = bounds[yy * 2 + 0];
Expand Down
20 changes: 13 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def _pkg_config(name):
class pil_build_ext(build_ext):
class feature:
features = ['zlib', 'jpeg', 'tiff', 'freetype', 'raqm', 'lcms', 'webp',
'webpmux', 'jpeg2000', 'imagequant']
'webpmux', 'jpeg2000', 'imagequant', 'openmp']

required = {'jpeg', 'zlib'}

Expand Down Expand Up @@ -179,9 +179,6 @@ def initialize_options(self):

def finalize_options(self):
build_ext.finalize_options(self)
if self.debug:
global DEBUG
DEBUG = True
for x in self.feature:
if getattr(self, 'disable_%s' % x):
setattr(self.feature, x, False)
Expand All @@ -199,7 +196,8 @@ def build_extensions(self):

library_dirs = []
include_dirs = []

extra_compile_args = []

_add_directory(include_dirs, "libImaging")

pkg_config = None
Expand Down Expand Up @@ -571,6 +569,10 @@ def build_extensions(self):
_find_library_file(self, "libwebpdemux")):
feature.webpmux = "libwebpmux"

if feature.require('openmp'):
extra_compile_args.append('-fopenmp')
feature.openmp = 'gomp'

for f in feature:
if not getattr(feature, f) and feature.require(f):
if f in ('jpeg', 'zlib'):
Expand Down Expand Up @@ -609,6 +611,8 @@ def build_extensions(self):
libs.extend(["kernel32", "user32", "gdi32"])
if struct.unpack("h", "\0\1".encode('ascii'))[0] == 1:
defs.append(("WORDS_BIGENDIAN", None))
if feature.openmp:
libs.append(feature.openmp)

if sys.platform == "win32" and not (PLATFORM_PYPY or PLATFORM_MINGW):
defs.append(("PILLOW_VERSION", '"\\"%s\\""'%PILLOW_VERSION))
Expand All @@ -618,7 +622,8 @@ def build_extensions(self):
exts = [(Extension("PIL._imaging",
files,
libraries=libs,
define_macros=defs))]
define_macros=defs,
extra_compile_args=extra_compile_args))]

#
# additional libraries
Expand Down Expand Up @@ -697,7 +702,8 @@ def summary_report(self, feature):
(feature.lcms, "LITTLECMS2"),
(feature.webp, "WEBP"),
(feature.webpmux, "WEBPMUX"),
]
(feature.openmp, "OpenMP"),
]

all = 1
for option in options:
Expand Down